Skip to content

Keypoint Detection

The sv.KeyPoints class in the Supervision library standardizes results from various keypoint detection and pose estimation models into a consistent format. This class simplifies data manipulation and filtering, providing a uniform API for integration with Supervision keypoints annotators.

Use sv.KeyPoints.from_ultralytics method, which accepts YOLOv8 pose result.

import cv2
import supervision as sv
from ultralytics import YOLO

image = cv2.imread(<SOURCE_IMAGE_PATH>)
model = YOLO('yolov8s-pose.pt')

result = model(image)[0]
key_points = sv.KeyPoints.from_ultralytics(result)

Use sv.KeyPoints.from_inference method, which accepts Inference pose result.

import cv2
import supervision as sv
from inference import get_model

image = cv2.imread(<SOURCE_IMAGE_PATH>)
model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)

result = model.infer(image)[0]
key_points = sv.KeyPoints.from_inference(result)

Use sv.KeyPoints.from_mediapipe method, which accepts MediaPipe pose result.

import cv2
import mediapipe as mp
import supervision as sv

image = cv2.imread(<SOURCE_IMAGE_PATH>)
image_height, image_width, _ = image.shape
mediapipe_image = mp.Image(
    image_format=mp.ImageFormat.SRGB,
    data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

options = mp.tasks.vision.PoseLandmarkerOptions(
    base_options=mp.tasks.BaseOptions(
        model_asset_path="pose_landmarker_heavy.task"
    ),
    running_mode=mp.tasks.vision.RunningMode.IMAGE,
    num_poses=2)

PoseLandmarker = mp.tasks.vision.PoseLandmarker
with PoseLandmarker.create_from_options(options) as landmarker:
    pose_landmarker_result = landmarker.detect(mediapipe_image)

key_points = sv.KeyPoints.from_mediapipe(
    pose_landmarker_result, (image_width, image_height))

Attributes:

Name Type Description
xy ndarray

An array of shape (n, 2) containing the bounding boxes coordinates in format [x1, y1]

confidence Optional[ndarray]

An array of shape (n,) containing the confidence scores of the keypoint keypoints.

class_id Optional[ndarray]

An array of shape (n,) containing the class ids of the keypoint keypoints.

data Dict[str, Union[ndarray, List]]

A dictionary containing additional data where each key is a string representing the data type, and the value is either a NumPy array or a list of corresponding data.

Source code in supervision/keypoint/core.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
@dataclass
class KeyPoints:
    """
    The `sv.KeyPoints` class in the Supervision library standardizes results from
    various keypoint detection and pose estimation models into a consistent format. This
    class simplifies data manipulation and filtering, providing a uniform API for
    integration with Supervision [keypoints annotators](/keypoint/annotators).

    === "Ultralytics"

        Use [`sv.KeyPoints.from_ultralytics`](/keypoint/core/#supervision.keypoint.core.KeyPoints.from_ultralytics)
        method, which accepts [YOLOv8](https://github.com/ultralytics/ultralytics)
        pose result.

        ```python
        import cv2
        import supervision as sv
        from ultralytics import YOLO

        image = cv2.imread(<SOURCE_IMAGE_PATH>)
        model = YOLO('yolov8s-pose.pt')

        result = model(image)[0]
        key_points = sv.KeyPoints.from_ultralytics(result)
        ```

    === "Inference"

        Use [`sv.KeyPoints.from_inference`](/keypoint/core/#supervision.keypoint.core.KeyPoints.from_inference)
        method, which accepts [Inference](https://inference.roboflow.com/) pose result.

        ```python
        import cv2
        import supervision as sv
        from inference import get_model

        image = cv2.imread(<SOURCE_IMAGE_PATH>)
        model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)

        result = model.infer(image)[0]
        key_points = sv.KeyPoints.from_inference(result)
        ```

    === "MediaPipe"

        Use [`sv.KeyPoints.from_mediapipe`](/keypoint/core/#supervision.keypoint.core.KeyPoints.from_mediapipe)
        method, which accepts [MediaPipe](https://github.com/google-ai-edge/mediapipe)
        pose result.

        ```python
        import cv2
        import mediapipe as mp
        import supervision as sv

        image = cv2.imread(<SOURCE_IMAGE_PATH>)
        image_height, image_width, _ = image.shape
        mediapipe_image = mp.Image(
            image_format=mp.ImageFormat.SRGB,
            data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        options = mp.tasks.vision.PoseLandmarkerOptions(
            base_options=mp.tasks.BaseOptions(
                model_asset_path="pose_landmarker_heavy.task"
            ),
            running_mode=mp.tasks.vision.RunningMode.IMAGE,
            num_poses=2)

        PoseLandmarker = mp.tasks.vision.PoseLandmarker
        with PoseLandmarker.create_from_options(options) as landmarker:
            pose_landmarker_result = landmarker.detect(mediapipe_image)

        key_points = sv.KeyPoints.from_mediapipe(
            pose_landmarker_result, (image_width, image_height))
        ```

    Attributes:
        xy (np.ndarray): An array of shape `(n, 2)` containing
            the bounding boxes coordinates in format `[x1, y1]`
        confidence (Optional[np.ndarray]): An array of shape
            `(n,)` containing the confidence scores of the keypoint keypoints.
        class_id (Optional[np.ndarray]): An array of shape
            `(n,)` containing the class ids of the keypoint keypoints.
        data (Dict[str, Union[np.ndarray, List]]): A dictionary containing additional
            data where each key is a string representing the data type, and the value
            is either a NumPy array or a list of corresponding data.
    """  # noqa: E501 // docs

    xy: npt.NDArray[np.float32]
    class_id: Optional[npt.NDArray[np.int_]] = None
    confidence: Optional[npt.NDArray[np.float32]] = None
    data: Dict[str, Union[npt.NDArray[Any], List]] = field(default_factory=dict)

    def __post_init__(self):
        validate_keypoints_fields(
            xy=self.xy,
            confidence=self.confidence,
            class_id=self.class_id,
            data=self.data,
        )

    def __len__(self) -> int:
        """
        Returns the number of keypoints in the `sv.KeyPoints` object.
        """
        return len(self.xy)

    def __iter__(
        self,
    ) -> Iterator[
        Tuple[
            np.ndarray,
            Optional[np.ndarray],
            Optional[float],
            Optional[int],
            Optional[int],
            Dict[str, Union[np.ndarray, List]],
        ]
    ]:
        """
        Iterates over the Keypoint object and yield a tuple of
        `(xy, confidence, class_id, data)` for each keypoint detection.
        """
        for i in range(len(self.xy)):
            yield (
                self.xy[i],
                self.confidence[i] if self.confidence is not None else None,
                self.class_id[i] if self.class_id is not None else None,
                get_data_item(self.data, i),
            )

    def __eq__(self, other: KeyPoints) -> bool:
        return all(
            [
                np.array_equal(self.xy, other.xy),
                np.array_equal(self.class_id, other.class_id),
                np.array_equal(self.confidence, other.confidence),
                is_data_equal(self.data, other.data),
            ]
        )

    @classmethod
    def from_inference(cls, inference_result: Union[dict, Any]) -> KeyPoints:
        """
        Create a `sv.KeyPoints` object from the [Roboflow](https://roboflow.com/)
        API inference result or the [Inference](https://inference.roboflow.com/)
        package results.

        Args:
            inference_result (dict, any): The result from the
                Roboflow API or Inference package containing predictions with keypoints.

        Returns:
            A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,
                and class names, and confidences of each keypoint.

        Examples:
            ```python
            import cv2
            import supervision as sv
            from inference import get_model

            image = cv2.imread(<SOURCE_IMAGE_PATH>)
            model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)

            result = model.infer(image)[0]
            key_points = sv.KeyPoints.from_inference(result)
            ```

            ```python
            import cv2
            import supervision as sv
            from inference_sdk import InferenceHTTPClient

            image = cv2.imread(<SOURCE_IMAGE_PATH>)
            client = InferenceHTTPClient(
                api_url="https://detect.roboflow.com",
                api_key=<ROBOFLOW_API_KEY>
            )

            result = client.infer(image, model_id=<POSE_MODEL_ID>)
            key_points = sv.KeyPoints.from_inference(result)
            ```
        """
        if isinstance(inference_result, list):
            raise ValueError(
                "from_inference() operates on a single result at a time."
                "You can retrieve it like so:  inference_result = model.infer(image)[0]"
            )

        with suppress(AttributeError):
            inference_result = inference_result.dict(exclude_none=True, by_alias=True)

        if not inference_result.get("predictions"):
            return cls.empty()

        xy = []
        confidence = []
        class_id = []
        class_names = []

        for prediction in inference_result["predictions"]:
            prediction_xy = []
            prediction_confidence = []
            for keypoint in prediction["keypoints"]:
                prediction_xy.append([keypoint["x"], keypoint["y"]])
                prediction_confidence.append(keypoint["confidence"])
            xy.append(prediction_xy)
            confidence.append(prediction_confidence)

            class_id.append(prediction["class_id"])
            class_names.append(prediction["class"])

        data = {CLASS_NAME_DATA_FIELD: np.array(class_names)}

        return cls(
            xy=np.array(xy, dtype=np.float32),
            confidence=np.array(confidence, dtype=np.float32),
            class_id=np.array(class_id, dtype=int),
            data=data,
        )

    @classmethod
    def from_mediapipe(
        cls, mediapipe_results, resolution_wh: Tuple[int, int]
    ) -> KeyPoints:
        """
        Creates a `sv.KeyPoints` instance from a
        [MediaPipe](https://github.com/google-ai-edge/mediapipe)
        pose landmark detection inference result.

        Args:
            mediapipe_results (Union[PoseLandmarkerResult, FaceLandmarkerResult, SolutionOutputs]):
                The output results from Mediapipe. It support pose and face landmarks
                from `PoseLandmaker`, `FaceLandmarker` and the legacy ones
                from `Pose` and `FaceMesh`.
            resolution_wh (Tuple[int, int]): A tuple of the form `(width, height)`
                representing the resolution of the frame.

        Returns:
            A `sv.KeyPoints` object containing the keypoint coordinates and
                confidences of each keypoint.

        !!! tip
            Before you start, download model bundles from the
            [MediaPipe website](https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker/index#models).

        Examples:
            ```python
            import cv2
            import mediapipe as mp
            import supervision as sv

            image = cv2.imread(<SOURCE_IMAGE_PATH>)
            image_height, image_width, _ = image.shape
            mediapipe_image = mp.Image(
                image_format=mp.ImageFormat.SRGB,
                data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

            options = mp.tasks.vision.PoseLandmarkerOptions(
                base_options=mp.tasks.BaseOptions(
                    model_asset_path="pose_landmarker_heavy.task"
                ),
                running_mode=mp.tasks.vision.RunningMode.IMAGE,
                num_poses=2)

            PoseLandmarker = mp.tasks.vision.PoseLandmarker
            with PoseLandmarker.create_from_options(options) as landmarker:
                pose_landmarker_result = landmarker.detect(mediapipe_image)

            key_points = sv.KeyPoints.from_mediapipe(
                pose_landmarker_result, (image_width, image_height))
            ```

            ```python
            import cv2
            import mediapipe as mp
            import supervision as sv

            image = cv2.imread(<SOURCE_IMAGE_PATH>)
            image_height, image_width, _ = image.shape
            mediapipe_image = mp.Image(
                image_format=mp.ImageFormat.SRGB,
                data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

            options = mp.tasks.vision.FaceLandmarkerOptions(
                base_options=mp.tasks.BaseOptions(
                    model_asset_path="face_landmarker.task"
                ),
                output_face_blendshapes=True,
                output_facial_transformation_matrixes=True,
                num_faces=2)

            FaceLandmarker = mp.tasks.vision.FaceLandmarker
            with FaceLandmarker.create_from_options(options) as landmarker:
                face_landmarker_result = landmarker.detect(mediapipe_image)

            key_points = sv.KeyPoints.from_mediapipe(
                face_landmarker_result, (image_width, image_height))
            ```
        """  # noqa: E501 // docs
        if hasattr(mediapipe_results, "pose_landmarks"):
            results = mediapipe_results.pose_landmarks
            if not isinstance(mediapipe_results.pose_landmarks, list):
                if mediapipe_results.pose_landmarks is None:
                    results = []
                else:
                    results = [
                        [
                            landmark
                            for landmark in mediapipe_results.pose_landmarks.landmark
                        ]
                    ]
        elif hasattr(mediapipe_results, "face_landmarks"):
            results = mediapipe_results.face_landmarks
        elif hasattr(mediapipe_results, "multi_face_landmarks"):
            if mediapipe_results.multi_face_landmarks is None:
                results = []
            else:
                results = [
                    face_landmark.landmark
                    for face_landmark in mediapipe_results.multi_face_landmarks
                ]

        if len(results) == 0:
            return cls.empty()

        xy = []
        confidence = []
        for pose in results:
            prediction_xy = []
            prediction_confidence = []
            for landmark in pose:
                keypoint_xy = [
                    landmark.x * resolution_wh[0],
                    landmark.y * resolution_wh[1],
                ]
                prediction_xy.append(keypoint_xy)
                prediction_confidence.append(landmark.visibility)

            xy.append(prediction_xy)
            confidence.append(prediction_confidence)

        return cls(
            xy=np.array(xy, dtype=np.float32),
            confidence=np.array(confidence, dtype=np.float32),
        )

    @classmethod
    def from_ultralytics(cls, ultralytics_results) -> KeyPoints:
        """
        Creates a `sv.KeyPoints` instance from a
        [YOLOv8](https://github.com/ultralytics/ultralytics) pose inference result.

        Args:
            ultralytics_results (ultralytics.engine.results.Keypoints):
                The output Results instance from YOLOv8

        Returns:
            A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,
                and class names, and confidences of each keypoint.

        Examples:
            ```python
            import cv2
            import supervision as sv
            from ultralytics import YOLO

            image = cv2.imread(<SOURCE_IMAGE_PATH>)
            model = YOLO('yolov8s-pose.pt')

            result = model(image)[0]
            key_points = sv.KeyPoints.from_ultralytics(result)
            ```
        """
        if ultralytics_results.keypoints.xy.numel() == 0:
            return cls.empty()

        xy = ultralytics_results.keypoints.xy.cpu().numpy()
        class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
        class_names = np.array([ultralytics_results.names[i] for i in class_id])

        confidence = ultralytics_results.keypoints.conf.cpu().numpy()
        data = {CLASS_NAME_DATA_FIELD: class_names}
        return cls(xy, class_id, confidence, data)

    @classmethod
    def from_yolo_nas(cls, yolo_nas_results) -> KeyPoints:
        """
        Create a `sv.KeyPoints` instance from a [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS-POSE.md)
        pose inference results.

        Args:
            yolo_nas_results (ImagePoseEstimationPrediction): The output object from
                YOLO NAS.

        Returns:
            A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,
                and class names, and confidences of each keypoint.

        Examples:
            ```python
            import cv2
            import torch
            import supervision as sv
            import super_gradients

            image = cv2.imread(<SOURCE_IMAGE_PATH>)

            device = "cuda" if torch.cuda.is_available() else "cpu"
            model = super_gradients.training.models.get(
                "yolo_nas_pose_s", pretrained_weights="coco_pose").to(device)

            results = model.predict(image, conf=0.1)
            key_points = sv.KeyPoints.from_yolo_nas(results)
            ```
        """  # noqa: E501 // docs
        if len(yolo_nas_results.prediction.poses) == 0:
            return cls.empty()

        xy = yolo_nas_results.prediction.poses[:, :, :2]
        confidence = yolo_nas_results.prediction.poses[:, :, 2]

        # yolo_nas_results treats params differently.
        # prediction.labels may not exist, whereas class_names might be None
        if hasattr(yolo_nas_results.prediction, "labels"):
            class_id = yolo_nas_results.prediction.labels  # np.array[int]
        else:
            class_id = None

        data = {}
        if class_id is not None and yolo_nas_results.class_names is not None:
            class_names = []
            for c_id in class_id:
                name = yolo_nas_results.class_names[c_id]  # tuple[str]
                class_names.append(name)
            data[CLASS_NAME_DATA_FIELD] = class_names

        return cls(
            xy=xy,
            confidence=confidence,
            class_id=class_id,
            data=data,
        )

    @classmethod
    def from_detectron2(cls, detectron2_results) -> KeyPoints:
        """
        Create a `sv.KeyPoints` object from the
        [Detectron2](https://github.com/facebookresearch/detectron2) inference result.

        Args:
            detectron2_results: The output of a
                Detectron2 model containing instances with prediction data.

        Returns:
            A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,
                and class names, and confidences of each keypoint.

        Example:
            ```python
            import cv2
            import supervision as sv
            from detectron2.engine import DefaultPredictor
            from detectron2.config import get_cfg


            image = cv2.imread(<SOURCE_IMAGE_PATH>)
            cfg = get_cfg()
            cfg.merge_from_file(<CONFIG_PATH>)
            cfg.MODEL.WEIGHTS = <WEIGHTS_PATH>
            predictor = DefaultPredictor(cfg)

            result = predictor(image)
            keypoints = sv.KeyPoints.from_detectron2(result)
            ```
        """

        if hasattr(detectron2_results["instances"], "pred_keypoints"):
            if detectron2_results["instances"].pred_keypoints.cpu().numpy().size == 0:
                return cls.empty()
            return cls(
                xy=detectron2_results["instances"]
                .pred_keypoints.cpu()
                .numpy()[:, :, :2],
                confidence=detectron2_results["instances"]
                .pred_keypoints.cpu()
                .numpy()[:, :, 2],
                class_id=detectron2_results["instances"]
                .pred_classes.cpu()
                .numpy()
                .astype(int),
            )
        else:
            return cls.empty()

    def __getitem__(
        self, index: Union[int, slice, List[int], np.ndarray, str]
    ) -> Union[KeyPoints, List, np.ndarray, None]:
        """
        Get a subset of the `sv.KeyPoints` object or access an item from its data field.

        When provided with an integer, slice, list of integers, or a numpy array, this
        method returns a new `sv.KeyPoints` object that represents a subset of the
        original `sv.KeyPoints`. When provided with a string, it accesses the
        corresponding item in the data dictionary.

        Args:
            index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,
                or key to access a subset of the `sv.KeyPoints` or an item from the
                data.

        Returns:
            A subset of the `sv.KeyPoints` object or an item from the data field.

        Examples:
            ```python
            import supervision as sv

            key_points = sv.KeyPoints()

            # access the first keypoint using an integer index
            key_points[0]

            # access the first 10 keypoints using index slice
            key_points[0:10]

            # access selected keypoints using a list of indices
            key_points[[0, 2, 4]]

            # access keypoints with selected class_id
            key_points[key_points.class_id == 0]

            # access keypoints with confidence greater than 0.5
            key_points[key_points.confidence > 0.5]
            ```
        """
        if isinstance(index, str):
            return self.data.get(index)
        if isinstance(index, int):
            index = [index]
        return KeyPoints(
            xy=self.xy[index],
            confidence=self.confidence[index] if self.confidence is not None else None,
            class_id=self.class_id[index] if self.class_id is not None else None,
            data=get_data_item(self.data, index),
        )

    def __setitem__(self, key: str, value: Union[np.ndarray, List]):
        """
        Set a value in the data dictionary of the `sv.KeyPoints` object.

        Args:
            key (str): The key in the data dictionary to set.
            value (Union[np.ndarray, List]): The value to set for the key.

        Examples:
            ```python
            import cv2
            import supervision as sv
            from ultralytics import YOLO

            image = cv2.imread(<SOURCE_IMAGE_PATH>)
            model = YOLO('yolov8s.pt')

            result = model(image)[0]
            keypoints = sv.KeyPoints.from_ultralytics(result)

            keypoints['class_name'] = [
                 model.model.names[class_id]
                 for class_id
                 in keypoints.class_id
             ]
            ```
        """
        if not isinstance(value, (np.ndarray, list)):
            raise TypeError("Value must be a np.ndarray or a list")

        if isinstance(value, list):
            value = np.array(value)

        self.data[key] = value

    @classmethod
    def empty(cls) -> KeyPoints:
        """
        Create an empty Keypoints object with no keypoints.

        Returns:
            An empty `sv.KeyPoints` object.

        Examples:
            ```python
            import supervision as sv

            key_points = sv.KeyPoints.empty()
            ```
        """
        return cls(xy=np.empty((0, 0, 2), dtype=np.float32))

Functions

__getitem__(index)

Get a subset of the sv.KeyPoints object or access an item from its data field.

When provided with an integer, slice, list of integers, or a numpy array, this method returns a new sv.KeyPoints object that represents a subset of the original sv.KeyPoints. When provided with a string, it accesses the corresponding item in the data dictionary.

Parameters:

Name Type Description Default
index Union[int, slice, List[int], ndarray, str]

The index, indices, or key to access a subset of the sv.KeyPoints or an item from the data.

required

Returns:

Type Description
Union[KeyPoints, List, ndarray, None]

A subset of the sv.KeyPoints object or an item from the data field.

Examples:

import supervision as sv

key_points = sv.KeyPoints()

# access the first keypoint using an integer index
key_points[0]

# access the first 10 keypoints using index slice
key_points[0:10]

# access selected keypoints using a list of indices
key_points[[0, 2, 4]]

# access keypoints with selected class_id
key_points[key_points.class_id == 0]

# access keypoints with confidence greater than 0.5
key_points[key_points.confidence > 0.5]
Source code in supervision/keypoint/core.py
def __getitem__(
    self, index: Union[int, slice, List[int], np.ndarray, str]
) -> Union[KeyPoints, List, np.ndarray, None]:
    """
    Get a subset of the `sv.KeyPoints` object or access an item from its data field.

    When provided with an integer, slice, list of integers, or a numpy array, this
    method returns a new `sv.KeyPoints` object that represents a subset of the
    original `sv.KeyPoints`. When provided with a string, it accesses the
    corresponding item in the data dictionary.

    Args:
        index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,
            or key to access a subset of the `sv.KeyPoints` or an item from the
            data.

    Returns:
        A subset of the `sv.KeyPoints` object or an item from the data field.

    Examples:
        ```python
        import supervision as sv

        key_points = sv.KeyPoints()

        # access the first keypoint using an integer index
        key_points[0]

        # access the first 10 keypoints using index slice
        key_points[0:10]

        # access selected keypoints using a list of indices
        key_points[[0, 2, 4]]

        # access keypoints with selected class_id
        key_points[key_points.class_id == 0]

        # access keypoints with confidence greater than 0.5
        key_points[key_points.confidence > 0.5]
        ```
    """
    if isinstance(index, str):
        return self.data.get(index)
    if isinstance(index, int):
        index = [index]
    return KeyPoints(
        xy=self.xy[index],
        confidence=self.confidence[index] if self.confidence is not None else None,
        class_id=self.class_id[index] if self.class_id is not None else None,
        data=get_data_item(self.data, index),
    )

__iter__()

Iterates over the Keypoint object and yield a tuple of (xy, confidence, class_id, data) for each keypoint detection.

Source code in supervision/keypoint/core.py
def __iter__(
    self,
) -> Iterator[
    Tuple[
        np.ndarray,
        Optional[np.ndarray],
        Optional[float],
        Optional[int],
        Optional[int],
        Dict[str, Union[np.ndarray, List]],
    ]
]:
    """
    Iterates over the Keypoint object and yield a tuple of
    `(xy, confidence, class_id, data)` for each keypoint detection.
    """
    for i in range(len(self.xy)):
        yield (
            self.xy[i],
            self.confidence[i] if self.confidence is not None else None,
            self.class_id[i] if self.class_id is not None else None,
            get_data_item(self.data, i),
        )

__len__()

Returns the number of keypoints in the sv.KeyPoints object.

Source code in supervision/keypoint/core.py
def __len__(self) -> int:
    """
    Returns the number of keypoints in the `sv.KeyPoints` object.
    """
    return len(self.xy)

__setitem__(key, value)

Set a value in the data dictionary of the sv.KeyPoints object.

Parameters:

Name Type Description Default
key str

The key in the data dictionary to set.

required
value Union[ndarray, List]

The value to set for the key.

required

Examples:

import cv2
import supervision as sv
from ultralytics import YOLO

image = cv2.imread(<SOURCE_IMAGE_PATH>)
model = YOLO('yolov8s.pt')

result = model(image)[0]
keypoints = sv.KeyPoints.from_ultralytics(result)

keypoints['class_name'] = [
     model.model.names[class_id]
     for class_id
     in keypoints.class_id
 ]
Source code in supervision/keypoint/core.py
def __setitem__(self, key: str, value: Union[np.ndarray, List]):
    """
    Set a value in the data dictionary of the `sv.KeyPoints` object.

    Args:
        key (str): The key in the data dictionary to set.
        value (Union[np.ndarray, List]): The value to set for the key.

    Examples:
        ```python
        import cv2
        import supervision as sv
        from ultralytics import YOLO

        image = cv2.imread(<SOURCE_IMAGE_PATH>)
        model = YOLO('yolov8s.pt')

        result = model(image)[0]
        keypoints = sv.KeyPoints.from_ultralytics(result)

        keypoints['class_name'] = [
             model.model.names[class_id]
             for class_id
             in keypoints.class_id
         ]
        ```
    """
    if not isinstance(value, (np.ndarray, list)):
        raise TypeError("Value must be a np.ndarray or a list")

    if isinstance(value, list):
        value = np.array(value)

    self.data[key] = value

empty() classmethod

Create an empty Keypoints object with no keypoints.

Returns:

Type Description
KeyPoints

An empty sv.KeyPoints object.

Examples:

import supervision as sv

key_points = sv.KeyPoints.empty()
Source code in supervision/keypoint/core.py
@classmethod
def empty(cls) -> KeyPoints:
    """
    Create an empty Keypoints object with no keypoints.

    Returns:
        An empty `sv.KeyPoints` object.

    Examples:
        ```python
        import supervision as sv

        key_points = sv.KeyPoints.empty()
        ```
    """
    return cls(xy=np.empty((0, 0, 2), dtype=np.float32))

from_detectron2(detectron2_results) classmethod

Create a sv.KeyPoints object from the Detectron2 inference result.

Parameters:

Name Type Description Default
detectron2_results

The output of a Detectron2 model containing instances with prediction data.

required

Returns:

Type Description
KeyPoints

A sv.KeyPoints object containing the keypoint coordinates, class IDs, and class names, and confidences of each keypoint.

Example
import cv2
import supervision as sv
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg


image = cv2.imread(<SOURCE_IMAGE_PATH>)
cfg = get_cfg()
cfg.merge_from_file(<CONFIG_PATH>)
cfg.MODEL.WEIGHTS = <WEIGHTS_PATH>
predictor = DefaultPredictor(cfg)

result = predictor(image)
keypoints = sv.KeyPoints.from_detectron2(result)
Source code in supervision/keypoint/core.py
@classmethod
def from_detectron2(cls, detectron2_results) -> KeyPoints:
    """
    Create a `sv.KeyPoints` object from the
    [Detectron2](https://github.com/facebookresearch/detectron2) inference result.

    Args:
        detectron2_results: The output of a
            Detectron2 model containing instances with prediction data.

    Returns:
        A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,
            and class names, and confidences of each keypoint.

    Example:
        ```python
        import cv2
        import supervision as sv
        from detectron2.engine import DefaultPredictor
        from detectron2.config import get_cfg


        image = cv2.imread(<SOURCE_IMAGE_PATH>)
        cfg = get_cfg()
        cfg.merge_from_file(<CONFIG_PATH>)
        cfg.MODEL.WEIGHTS = <WEIGHTS_PATH>
        predictor = DefaultPredictor(cfg)

        result = predictor(image)
        keypoints = sv.KeyPoints.from_detectron2(result)
        ```
    """

    if hasattr(detectron2_results["instances"], "pred_keypoints"):
        if detectron2_results["instances"].pred_keypoints.cpu().numpy().size == 0:
            return cls.empty()
        return cls(
            xy=detectron2_results["instances"]
            .pred_keypoints.cpu()
            .numpy()[:, :, :2],
            confidence=detectron2_results["instances"]
            .pred_keypoints.cpu()
            .numpy()[:, :, 2],
            class_id=detectron2_results["instances"]
            .pred_classes.cpu()
            .numpy()
            .astype(int),
        )
    else:
        return cls.empty()

from_inference(inference_result) classmethod

Create a sv.KeyPoints object from the Roboflow API inference result or the Inference package results.

Parameters:

Name Type Description Default
inference_result (dict, any)

The result from the Roboflow API or Inference package containing predictions with keypoints.

required

Returns:

Type Description
KeyPoints

A sv.KeyPoints object containing the keypoint coordinates, class IDs, and class names, and confidences of each keypoint.

Examples:

import cv2
import supervision as sv
from inference import get_model

image = cv2.imread(<SOURCE_IMAGE_PATH>)
model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)

result = model.infer(image)[0]
key_points = sv.KeyPoints.from_inference(result)
import cv2
import supervision as sv
from inference_sdk import InferenceHTTPClient

image = cv2.imread(<SOURCE_IMAGE_PATH>)
client = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key=<ROBOFLOW_API_KEY>
)

result = client.infer(image, model_id=<POSE_MODEL_ID>)
key_points = sv.KeyPoints.from_inference(result)
Source code in supervision/keypoint/core.py
@classmethod
def from_inference(cls, inference_result: Union[dict, Any]) -> KeyPoints:
    """
    Create a `sv.KeyPoints` object from the [Roboflow](https://roboflow.com/)
    API inference result or the [Inference](https://inference.roboflow.com/)
    package results.

    Args:
        inference_result (dict, any): The result from the
            Roboflow API or Inference package containing predictions with keypoints.

    Returns:
        A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,
            and class names, and confidences of each keypoint.

    Examples:
        ```python
        import cv2
        import supervision as sv
        from inference import get_model

        image = cv2.imread(<SOURCE_IMAGE_PATH>)
        model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)

        result = model.infer(image)[0]
        key_points = sv.KeyPoints.from_inference(result)
        ```

        ```python
        import cv2
        import supervision as sv
        from inference_sdk import InferenceHTTPClient

        image = cv2.imread(<SOURCE_IMAGE_PATH>)
        client = InferenceHTTPClient(
            api_url="https://detect.roboflow.com",
            api_key=<ROBOFLOW_API_KEY>
        )

        result = client.infer(image, model_id=<POSE_MODEL_ID>)
        key_points = sv.KeyPoints.from_inference(result)
        ```
    """
    if isinstance(inference_result, list):
        raise ValueError(
            "from_inference() operates on a single result at a time."
            "You can retrieve it like so:  inference_result = model.infer(image)[0]"
        )

    with suppress(AttributeError):
        inference_result = inference_result.dict(exclude_none=True, by_alias=True)

    if not inference_result.get("predictions"):
        return cls.empty()

    xy = []
    confidence = []
    class_id = []
    class_names = []

    for prediction in inference_result["predictions"]:
        prediction_xy = []
        prediction_confidence = []
        for keypoint in prediction["keypoints"]:
            prediction_xy.append([keypoint["x"], keypoint["y"]])
            prediction_confidence.append(keypoint["confidence"])
        xy.append(prediction_xy)
        confidence.append(prediction_confidence)

        class_id.append(prediction["class_id"])
        class_names.append(prediction["class"])

    data = {CLASS_NAME_DATA_FIELD: np.array(class_names)}

    return cls(
        xy=np.array(xy, dtype=np.float32),
        confidence=np.array(confidence, dtype=np.float32),
        class_id=np.array(class_id, dtype=int),
        data=data,
    )

from_mediapipe(mediapipe_results, resolution_wh) classmethod

Creates a sv.KeyPoints instance from a MediaPipe pose landmark detection inference result.

Parameters:

Name Type Description Default
mediapipe_results Union[PoseLandmarkerResult, FaceLandmarkerResult, SolutionOutputs]

The output results from Mediapipe. It support pose and face landmarks from PoseLandmaker, FaceLandmarker and the legacy ones from Pose and FaceMesh.

required
resolution_wh Tuple[int, int]

A tuple of the form (width, height) representing the resolution of the frame.

required

Returns:

Type Description
KeyPoints

A sv.KeyPoints object containing the keypoint coordinates and confidences of each keypoint.

Tip

Before you start, download model bundles from the MediaPipe website.

Examples:

import cv2
import mediapipe as mp
import supervision as sv

image = cv2.imread(<SOURCE_IMAGE_PATH>)
image_height, image_width, _ = image.shape
mediapipe_image = mp.Image(
    image_format=mp.ImageFormat.SRGB,
    data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

options = mp.tasks.vision.PoseLandmarkerOptions(
    base_options=mp.tasks.BaseOptions(
        model_asset_path="pose_landmarker_heavy.task"
    ),
    running_mode=mp.tasks.vision.RunningMode.IMAGE,
    num_poses=2)

PoseLandmarker = mp.tasks.vision.PoseLandmarker
with PoseLandmarker.create_from_options(options) as landmarker:
    pose_landmarker_result = landmarker.detect(mediapipe_image)

key_points = sv.KeyPoints.from_mediapipe(
    pose_landmarker_result, (image_width, image_height))
import cv2
import mediapipe as mp
import supervision as sv

image = cv2.imread(<SOURCE_IMAGE_PATH>)
image_height, image_width, _ = image.shape
mediapipe_image = mp.Image(
    image_format=mp.ImageFormat.SRGB,
    data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

options = mp.tasks.vision.FaceLandmarkerOptions(
    base_options=mp.tasks.BaseOptions(
        model_asset_path="face_landmarker.task"
    ),
    output_face_blendshapes=True,
    output_facial_transformation_matrixes=True,
    num_faces=2)

FaceLandmarker = mp.tasks.vision.FaceLandmarker
with FaceLandmarker.create_from_options(options) as landmarker:
    face_landmarker_result = landmarker.detect(mediapipe_image)

key_points = sv.KeyPoints.from_mediapipe(
    face_landmarker_result, (image_width, image_height))
Source code in supervision/keypoint/core.py
@classmethod
def from_mediapipe(
    cls, mediapipe_results, resolution_wh: Tuple[int, int]
) -> KeyPoints:
    """
    Creates a `sv.KeyPoints` instance from a
    [MediaPipe](https://github.com/google-ai-edge/mediapipe)
    pose landmark detection inference result.

    Args:
        mediapipe_results (Union[PoseLandmarkerResult, FaceLandmarkerResult, SolutionOutputs]):
            The output results from Mediapipe. It support pose and face landmarks
            from `PoseLandmaker`, `FaceLandmarker` and the legacy ones
            from `Pose` and `FaceMesh`.
        resolution_wh (Tuple[int, int]): A tuple of the form `(width, height)`
            representing the resolution of the frame.

    Returns:
        A `sv.KeyPoints` object containing the keypoint coordinates and
            confidences of each keypoint.

    !!! tip
        Before you start, download model bundles from the
        [MediaPipe website](https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker/index#models).

    Examples:
        ```python
        import cv2
        import mediapipe as mp
        import supervision as sv

        image = cv2.imread(<SOURCE_IMAGE_PATH>)
        image_height, image_width, _ = image.shape
        mediapipe_image = mp.Image(
            image_format=mp.ImageFormat.SRGB,
            data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        options = mp.tasks.vision.PoseLandmarkerOptions(
            base_options=mp.tasks.BaseOptions(
                model_asset_path="pose_landmarker_heavy.task"
            ),
            running_mode=mp.tasks.vision.RunningMode.IMAGE,
            num_poses=2)

        PoseLandmarker = mp.tasks.vision.PoseLandmarker
        with PoseLandmarker.create_from_options(options) as landmarker:
            pose_landmarker_result = landmarker.detect(mediapipe_image)

        key_points = sv.KeyPoints.from_mediapipe(
            pose_landmarker_result, (image_width, image_height))
        ```

        ```python
        import cv2
        import mediapipe as mp
        import supervision as sv

        image = cv2.imread(<SOURCE_IMAGE_PATH>)
        image_height, image_width, _ = image.shape
        mediapipe_image = mp.Image(
            image_format=mp.ImageFormat.SRGB,
            data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        options = mp.tasks.vision.FaceLandmarkerOptions(
            base_options=mp.tasks.BaseOptions(
                model_asset_path="face_landmarker.task"
            ),
            output_face_blendshapes=True,
            output_facial_transformation_matrixes=True,
            num_faces=2)

        FaceLandmarker = mp.tasks.vision.FaceLandmarker
        with FaceLandmarker.create_from_options(options) as landmarker:
            face_landmarker_result = landmarker.detect(mediapipe_image)

        key_points = sv.KeyPoints.from_mediapipe(
            face_landmarker_result, (image_width, image_height))
        ```
    """  # noqa: E501 // docs
    if hasattr(mediapipe_results, "pose_landmarks"):
        results = mediapipe_results.pose_landmarks
        if not isinstance(mediapipe_results.pose_landmarks, list):
            if mediapipe_results.pose_landmarks is None:
                results = []
            else:
                results = [
                    [
                        landmark
                        for landmark in mediapipe_results.pose_landmarks.landmark
                    ]
                ]
    elif hasattr(mediapipe_results, "face_landmarks"):
        results = mediapipe_results.face_landmarks
    elif hasattr(mediapipe_results, "multi_face_landmarks"):
        if mediapipe_results.multi_face_landmarks is None:
            results = []
        else:
            results = [
                face_landmark.landmark
                for face_landmark in mediapipe_results.multi_face_landmarks
            ]

    if len(results) == 0:
        return cls.empty()

    xy = []
    confidence = []
    for pose in results:
        prediction_xy = []
        prediction_confidence = []
        for landmark in pose:
            keypoint_xy = [
                landmark.x * resolution_wh[0],
                landmark.y * resolution_wh[1],
            ]
            prediction_xy.append(keypoint_xy)
            prediction_confidence.append(landmark.visibility)

        xy.append(prediction_xy)
        confidence.append(prediction_confidence)

    return cls(
        xy=np.array(xy, dtype=np.float32),
        confidence=np.array(confidence, dtype=np.float32),
    )

from_ultralytics(ultralytics_results) classmethod

Creates a sv.KeyPoints instance from a YOLOv8 pose inference result.

Parameters:

Name Type Description Default
ultralytics_results Keypoints

The output Results instance from YOLOv8

required

Returns:

Type Description
KeyPoints

A sv.KeyPoints object containing the keypoint coordinates, class IDs, and class names, and confidences of each keypoint.

Examples:

import cv2
import supervision as sv
from ultralytics import YOLO

image = cv2.imread(<SOURCE_IMAGE_PATH>)
model = YOLO('yolov8s-pose.pt')

result = model(image)[0]
key_points = sv.KeyPoints.from_ultralytics(result)
Source code in supervision/keypoint/core.py
@classmethod
def from_ultralytics(cls, ultralytics_results) -> KeyPoints:
    """
    Creates a `sv.KeyPoints` instance from a
    [YOLOv8](https://github.com/ultralytics/ultralytics) pose inference result.

    Args:
        ultralytics_results (ultralytics.engine.results.Keypoints):
            The output Results instance from YOLOv8

    Returns:
        A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,
            and class names, and confidences of each keypoint.

    Examples:
        ```python
        import cv2
        import supervision as sv
        from ultralytics import YOLO

        image = cv2.imread(<SOURCE_IMAGE_PATH>)
        model = YOLO('yolov8s-pose.pt')

        result = model(image)[0]
        key_points = sv.KeyPoints.from_ultralytics(result)
        ```
    """
    if ultralytics_results.keypoints.xy.numel() == 0:
        return cls.empty()

    xy = ultralytics_results.keypoints.xy.cpu().numpy()
    class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
    class_names = np.array([ultralytics_results.names[i] for i in class_id])

    confidence = ultralytics_results.keypoints.conf.cpu().numpy()
    data = {CLASS_NAME_DATA_FIELD: class_names}
    return cls(xy, class_id, confidence, data)

from_yolo_nas(yolo_nas_results) classmethod

Create a sv.KeyPoints instance from a YOLO-NAS pose inference results.

Parameters:

Name Type Description Default
yolo_nas_results ImagePoseEstimationPrediction

The output object from YOLO NAS.

required

Returns:

Type Description
KeyPoints

A sv.KeyPoints object containing the keypoint coordinates, class IDs, and class names, and confidences of each keypoint.

Examples:

import cv2
import torch
import supervision as sv
import super_gradients

image = cv2.imread(<SOURCE_IMAGE_PATH>)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = super_gradients.training.models.get(
    "yolo_nas_pose_s", pretrained_weights="coco_pose").to(device)

results = model.predict(image, conf=0.1)
key_points = sv.KeyPoints.from_yolo_nas(results)
Source code in supervision/keypoint/core.py
@classmethod
def from_yolo_nas(cls, yolo_nas_results) -> KeyPoints:
    """
    Create a `sv.KeyPoints` instance from a [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS-POSE.md)
    pose inference results.

    Args:
        yolo_nas_results (ImagePoseEstimationPrediction): The output object from
            YOLO NAS.

    Returns:
        A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,
            and class names, and confidences of each keypoint.

    Examples:
        ```python
        import cv2
        import torch
        import supervision as sv
        import super_gradients

        image = cv2.imread(<SOURCE_IMAGE_PATH>)

        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = super_gradients.training.models.get(
            "yolo_nas_pose_s", pretrained_weights="coco_pose").to(device)

        results = model.predict(image, conf=0.1)
        key_points = sv.KeyPoints.from_yolo_nas(results)
        ```
    """  # noqa: E501 // docs
    if len(yolo_nas_results.prediction.poses) == 0:
        return cls.empty()

    xy = yolo_nas_results.prediction.poses[:, :, :2]
    confidence = yolo_nas_results.prediction.poses[:, :, 2]

    # yolo_nas_results treats params differently.
    # prediction.labels may not exist, whereas class_names might be None
    if hasattr(yolo_nas_results.prediction, "labels"):
        class_id = yolo_nas_results.prediction.labels  # np.array[int]
    else:
        class_id = None

    data = {}
    if class_id is not None and yolo_nas_results.class_names is not None:
        class_names = []
        for c_id in class_id:
            name = yolo_nas_results.class_names[c_id]  # tuple[str]
            class_names.append(name)
        data[CLASS_NAME_DATA_FIELD] = class_names

    return cls(
        xy=xy,
        confidence=confidence,
        class_id=class_id,
        data=data,
    )

Comments