SDK Custom Dataset#
Uploading a custom dataset on the platform means performing these 3 things:
Create a new empty dataset and add any classes it will hold
Create a datapoint with any combination of input/targets from your script and upload the it
Finalize the dataset in order to let our system know it’s ready to be used!
Here’s an example of using pycocotools
to upload a COCO-format style dataset to the system.
import efemarai as ef
from efemarai.dataset import DatasetFormat
from pycocotools.coco import COCO
# Define an annotations URL
root = "./data"
# Create a custom project
project = ef.Session().create_project(
name="Example Custom",
description="Example Custom Project",
)
# Create an empty dataset
dataset = project.create_dataset(
name="Example Custom Dataset", stage=ef.DatasetStage.Validation, format=ef.DatasetFormat.Custom
)
# Load your custom dataset
coco = COCO(f"{root}/annotations/instances_val2017.json")
# Create the labels and add them to the dataset
for category in coco.loadCats(coco.getCatIds()):
dataset.add_annotation_class(
id=category["id"],
name=category["name"],
category=category.get("supercategory", "supercategory"),
)
# Iterate over your dataset
for cocoImg in coco.loadImgs(coco.getImgIds()):
# Instantiate the image with an ef.Image object
image = ef.Image(
file_path=f"{root}/val2017/{cocoImg['file_name']}",
width=cocoImg["width"],
height=cocoImg["height"],
)
# Create a datapoint and add the image as input.
datapoint = ef.Datapoint(
dataset=dataset,
inputs={"image": image},
)
# Get the AnnotationClass from the dataset
annotations = coco.loadAnns(coco.getAnnIds(imgIds=cocoImg["id"]))
for instance_id, cocoAnn in enumerate(annotations):
# Search for the true label object in the dataset by either `name` or `id`.
# Alternatively: label = dataset.get_annotation_class(id=cocoAnn["name"])
label = dataset.get_annotation_class(id=cocoAnn["category_id"])
# Add a bounding box to the image
datapoint.add_target(
ef.BoundingBox(
xyxy=cocoAnn["bbox"],
area=sum(cocoAnn["bbox"]),
label=label,
ref_field=[image],
)
)
# Upload a single datapoint
datapoint.upload()
# Finalize dataset
dataset.finalize()
You can now navigate to the Domain
page and create an operational domain. Afterwards navigate to Stress Test
and create a stress test as shown in Improving Models
For a more detailed view of all annotation objects, refer to SDK Fields