Detect and Annotate¶

Supervision provides a seamless process for annotating predictions generated by various object detection and segmentation models. This guide shows how to perform inference with the Inference, Ultralytics or Transformers packages. Following this, you'll learn how to import these predictions into Supervision and use them to annotate source image.

basic-annotation

Run Detection¶

First, you'll need to obtain predictions from your object detection or segmentation model.

InferenceUltralyticsTransformers

import cv2
from inference import get_model

model = get_model(model_id="yolov8n-640")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model.infer(image)[0]

import cv2
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model(image)[0]

import torch
from PIL import Image
from transformers import DetrImageProcessor, DetrForObjectDetection

processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

image = Image.open(<SOURCE_IMAGE_PATH>)
inputs = processor(images=image, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
    outputs=outputs, target_sizes=target_size)[0]

Load Predictions into Supervision¶

Now that we have predictions from a model, we can load them into Supervision.

InferenceUltralyticsTransformers

We can do so using the sv.Detections.from_inference method, which accepts model results from both detection and segmentation models.

import cv2
import supervision as sv
from inference import get_model

model = get_model(model_id="yolov8n-640")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)

We can do so using the sv.Detections.from_ultralytics method, which accepts model results from both detection and segmentation models.

import cv2
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model(image)[0]
detections = sv.Detections.from_ultralytics(results)

We can do so using the sv.Detections.from_transformers method, which accepts model results from both detection and segmentation models.

import torch
import supervision as sv
from PIL import Image
from transformers import DetrImageProcessor, DetrForObjectDetection

processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

image = Image.open(<SOURCE_IMAGE_PATH>)
inputs = processor(images=image, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
    outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(
    transformers_results=results,
    id2label=model.config.id2label)

You can load predictions from other computer vision frameworks and libraries using:

Annotate Image with Detections¶

Finally, we can annotate the image with the predictions. Since we are working with an object detection model, we will use the sv.BoundingBoxAnnotator and sv.LabelAnnotator classes.

InferenceUltralyticsTransformers

import cv2
import supervision as sv
from inference import get_model

model = get_model(model_id="yolov8n-640")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)

bounding_box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()

annotated_image = bounding_box_annotator.annotate(
    scene=image, detections=detections)
annotated_image = label_annotator.annotate(
    scene=annotated_image, detections=detections)

import cv2
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model(image)[0]
detections = sv.Detections.from_ultralytics(results)

bounding_box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()

annotated_image = bounding_box_annotator.annotate(
    scene=image, detections=detections)
annotated_image = label_annotator.annotate(
    scene=annotated_image, detections=detections)

import torch
import supervision as sv
from PIL import Image
from transformers import DetrImageProcessor, DetrForObjectDetection

processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

image = Image.open(<SOURCE_IMAGE_PATH>)
inputs = processor(images=image, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
    outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(
    transformers_results=results,
    id2label=model.config.id2label)

bounding_box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()

annotated_image = bounding_box_annotator.annotate(
    scene=image, detections=detections)
annotated_image = label_annotator.annotate(
    scene=annotated_image, detections=detections)

basic-annotation

Display Custom Labels¶

By default, sv.LabelAnnotator will label each detection with its class_name (if possible) or class_id. You can override this behavior by passing a list of custom labels to the annotate method.

InferenceUltralyticsTransformers

import cv2
import supervision as sv
from inference import get_model

model = get_model(model_id="yolov8n-640")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)

bounding_box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()

labels = [
    f"{class_name} {confidence:.2f}"
    for class_name, confidence
    in zip(detections['class_name'], detections.confidence)
]

annotated_image = bounding_box_annotator.annotate(
    scene=image, detections=detections)
annotated_image = label_annotator.annotate(
    scene=annotated_image, detections=detections, labels=labels)

import cv2
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model(image)[0]
detections = sv.Detections.from_ultralytics(results)

bounding_box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()

labels = [
    f"{class_name} {confidence:.2f}"
    for class_name, confidence
    in zip(detections['class_name'], detections.confidence)
]

annotated_image = bounding_box_annotator.annotate(
    scene=image, detections=detections)
annotated_image = label_annotator.annotate(
    scene=annotated_image, detections=detections, labels=labels)

import torch
import supervision as sv
from PIL import Image
from transformers import DetrImageProcessor, DetrForObjectDetection

processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

image = Image.open(<SOURCE_IMAGE_PATH>)
inputs = processor(images=image, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
    outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(
    transformers_results=results,
    id2label=model.config.id2label)

bounding_box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()

labels = [
    f"{class_name} {confidence:.2f}"
    for class_name, confidence
    in zip(detections['class_name'], detections.confidence)
]

annotated_image = bounding_box_annotator.annotate(
    scene=image, detections=detections)
annotated_image = label_annotator.annotate(
    scene=annotated_image, detections=detections, labels=labels)

custom-label-annotation

Annotate Image with Segmentations¶

If you are running the segmentation model sv.MaskAnnotator is a drop-in replacement for sv.BoundingBoxAnnotator that will allow you to draw masks instead of boxes.

InferenceUltralyticsTransformers

import cv2
import supervision as sv
from inference import get_model

model = get_model(model_id="yolov8n-seg-640")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)

mask_annotator = sv.MaskAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)

annotated_image = mask_annotator.annotate(
    scene=image, detections=detections)
annotated_image = label_annotator.annotate(
    scene=annotated_image, detections=detections)

import cv2
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8n-seg.pt")
image = cv2.imread(<SOURCE_IMAGE_PATH>)
results = model(image)[0]
detections = sv.Detections.from_ultralytics(results)

mask_annotator = sv.MaskAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)

annotated_image = mask_annotator.annotate(
    scene=image, detections=detections)
annotated_image = label_annotator.annotate(
    scene=annotated_image, detections=detections)

import torch
import supervision as sv
from PIL import Image
from transformers import DetrImageProcessor, DetrForSegmentation

processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50-panoptic")
model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic")

image = Image.open(<SOURCE_IMAGE_PATH>)
inputs = processor(images=image, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_segmentation(
    outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(
    transformers_results=results,
    id2label=model.config.id2label)

mask_annotator = sv.MaskAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)

labels = [
    f"{class_name} {confidence:.2f}"
    for class_name, confidence
    in zip(detections['class_name'], detections.confidence)
]

annotated_image = mask_annotator.annotate(
    scene=image, detections=detections)
annotated_image = label_annotator.annotate(
    scene=annotated_image, detections=detections, labels=labels)

segmentation-annotation

Detect and Annotate¶

Run Detection¶

Load Predictions into Supervision¶

Annotate Image with Detections¶

Display Custom Labels¶

Annotate Image with Segmentations¶

Comments