Save Detections¶
Supervision enables an easy way to save detections in .CSV and .JSON files for offline
processing. This guide demonstrates how to perform video inference using the
Inference,
Ultralytics or
Transformers packages and save their results with
sv.CSVSink
and
sv.JSONSink
.
Run Detection¶
First, you'll need to obtain predictions from your object detection or segmentation model. You can learn more on this topic in our How to Detect and Annotate guide.
import torch
import supervision as sv
from transformers import DetrImageProcessor, DetrForObjectDetection
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
for frame in frames_generator:
frame = sv.cv2_to_pillow(frame)
inputs = processor(images=frame, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
width, height = frame.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(results)
Save Detections as CSV¶
To save detections to a .CSV
file, open our
sv.CSVSink
and then pass the
sv.Detections
object resulting from the inference to it. Its fields are parsed and saved on disk.
import supervision as sv
from inference import get_model
model = get_model(model_id="yolov8n-640")
frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
for frame in frames_generator:
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)
sink.append(detections, {})
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
for frame in frames_generator:
results = model(frame)[0]
detections = sv.Detections.from_ultralytics(results)
sink.append(detections, {})
import torch
import supervision as sv
from transformers import DetrImageProcessor, DetrForObjectDetection
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
for frame in frames_generator:
frame = sv.cv2_to_pillow(frame)
inputs = processor(images=frame, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
width, height = frame.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(results)
sink.append(detections, {})
x_min | y_min | x_max | y_max | class_id | confidence | tracker_id | class_name |
---|---|---|---|---|---|---|---|
2941.14 | 1269.31 | 3220.77 | 1500.67 | 2 | 0.8517 | car | |
944.889 | 899.641 | 1235.42 | 1308.80 | 7 | 0.6752 | truck | |
1439.78 | 1077.79 | 1621.27 | 1231.40 | 2 | 0.6450 | car |
Custom Fields¶
Besides regular fields in
sv.Detections
,
sv.CSVSink
also allows you to add custom information to each row, which can be passed via the
custom_data
dictionary. Let's utilize this feature to save information about the
frame index from which the detections originate.
import supervision as sv
from inference import get_model
model = get_model(model_id="yolov8n-640")
frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
for frame_index, frame in enumerate(frames_generator):
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)
sink.append(detections, {"frame_index": frame_index})
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
for frame_index, frame in enumerate(frames_generator):
results = model(frame)[0]
detections = sv.Detections.from_ultralytics(results)
sink.append(detections, {"frame_index": frame_index})
import torch
import supervision as sv
from transformers import DetrImageProcessor, DetrForObjectDetection
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
for frame_index, frame in enumerate(frames_generator):
frame = sv.cv2_to_pillow(frame)
inputs = processor(images=frame, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
width, height = frame.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(results)
sink.append(detections, {"frame_index": frame_index})
x_min | y_min | x_max | y_max | class_id | confidence | tracker_id | class_name | frame_index |
---|---|---|---|---|---|---|---|---|
2941.14 | 1269.31 | 3220.77 | 1500.67 | 2 | 0.8517 | car | 0 | |
944.889 | 899.641 | 1235.42 | 1308.80 | 7 | 0.6752 | truck | 0 | |
1439.78 | 1077.79 | 1621.27 | 1231.40 | 2 | 0.6450 | car | 0 |
Save Detections as JSON¶
If you prefer to save the result in a .JSON
file instead of a .CSV
file, all you
need to do is replace
sv.CSVSink
with
sv.JSONSink
.
import supervision as sv
from inference import get_model
model = get_model(model_id="yolov8n-640")
frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
with sv.JSONSink(<TARGET_CSV_PATH>) as sink:
for frame_index, frame in enumerate(frames_generator):
results = model.infer(image)[0]
detections = sv.Detections.from_inference(results)
sink.append(detections, {"frame_index": frame_index})
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
with sv.JSONSink(<TARGET_CSV_PATH>) as sink:
for frame_index, frame in enumerate(frames_generator):
results = model(frame)[0]
detections = sv.Detections.from_ultralytics(results)
sink.append(detections, {"frame_index": frame_index})
import torch
import supervision as sv
from transformers import DetrImageProcessor, DetrForObjectDetection
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
with sv.JSONSink(<TARGET_CSV_PATH>) as sink:
for frame_index, frame in enumerate(frames_generator):
frame = sv.cv2_to_pillow(frame)
inputs = processor(images=frame, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
width, height = frame.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
outputs=outputs, target_sizes=target_size)[0]
detections = sv.Detections.from_transformers(results)
sink.append(detections, {"frame_index": frame_index})