Skip to content

Legacy Metrics

Starting with 0.23.0, a new metrics module is being introduced to supervision. Metrics here are part of the legacy evaluation API and will be deprecated in the future.

supervision.metrics.detection.ConfusionMatrix dataclass

Confusion matrix for object detection tasks.

Attributes:

Name Type Description
matrix NDArray[int32]

An 2D np.ndarray of shape (len(classes) + 1, len(classes) + 1) containing the number of TP, FP, FN and TN for each class.

classes list[str]

Model class names.

conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded from the matrix.

iou_threshold float

Detection IoU threshold between 0 and 1. Detections with lower IoU will be classified as FP.

metric_target MetricTarget

The type of detection data used for IoU computation. Informational metadata set by from_detections and from_tensors. Excluded from __eq__ comparisons — two ConfusionMatrix instances with identical matrix, classes, conf_threshold, and iou_threshold compare as equal regardless of metric_target.

Source code in src/supervision/metrics/detection.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
@dataclass
class ConfusionMatrix:
    """
    Confusion matrix for object detection tasks.

    Attributes:
        matrix: An 2D `np.ndarray` of shape `(len(classes) + 1, len(classes) + 1)`
            containing the number of `TP`, `FP`, `FN` and `TN` for each class.
        classes: Model class names.
        conf_threshold: Detection confidence threshold between `0` and `1`.
            Detections with lower confidence will be excluded from the matrix.
        iou_threshold: Detection IoU threshold between `0` and `1`.
            Detections with lower IoU will be classified as `FP`.
        metric_target: The type of detection data used for IoU computation.
            Informational metadata set by `from_detections` and `from_tensors`.
            Excluded from `__eq__` comparisons — two `ConfusionMatrix` instances
            with identical `matrix`, `classes`, `conf_threshold`, and
            `iou_threshold` compare as equal regardless of `metric_target`.
    """

    matrix: npt.NDArray[np.int32]
    classes: list[str]
    conf_threshold: float
    iou_threshold: float
    metric_target: MetricTarget = MetricTarget.BOXES

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, ConfusionMatrix):
            return NotImplemented
        return (
            np.array_equal(self.matrix, other.matrix)
            and self.classes == other.classes
            and self.conf_threshold == other.conf_threshold
            and self.iou_threshold == other.iou_threshold
        )

    __hash__ = None  # type: ignore[assignment]

    @classmethod
    def from_detections(
        cls,
        predictions: list[Detections],
        targets: list[Detections],
        classes: list[str],
        conf_threshold: float = 0.3,
        iou_threshold: float = 0.5,
        metric_target: MetricTarget = MetricTarget.BOXES,
    ) -> ConfusionMatrix:
        """
        Calculate confusion matrix based on predicted and ground-truth detections.

        Args:
            targets: Detections objects from ground-truth.
            predictions: Detections objects predicted by the model.
            classes: Model class names.
            conf_threshold: Detection confidence threshold between `0` and `1`.
                Detections with lower confidence will be excluded.
            iou_threshold: Detection IoU threshold between `0` and `1`.
                Detections with lower IoU will be classified as `FP`.
            metric_target: The type of detection data to use.
                Supports `MetricTarget.BOXES` (default) and
                `MetricTarget.ORIENTED_BOUNDING_BOXES`. When using
                `MetricTarget.ORIENTED_BOUNDING_BOXES`, each `Detections`
                object must include OBB coordinates in
                `detections.data[ORIENTED_BOX_COORDINATES]` as a float32
                array of shape `(N, 8)` (flat) or `(N, 4, 2)` (as stored by
                `from_ultralytics`); both are normalised to `(N, 8)` internally.
                `MetricTarget.MASKS` is not supported.

        Returns:
            New instance of ConfusionMatrix.

        Examples:
            ```pycon
            >>> import numpy as np
            >>> import supervision as sv
            >>> targets = [
            ...     sv.Detections(
            ...         xyxy=np.array([[0, 0, 10, 10], [50, 50, 60, 60]]),
            ...         class_id=np.array([0, 0])
            ...     )
            ... ]
            >>> predictions = [
            ...     sv.Detections(
            ...         xyxy=np.array([[0, 0, 10, 10], [100, 100, 110, 110]]),
            ...         class_id=np.array([0, 0]),
            ...         confidence=np.array([0.9, 0.8])
            ...     )
            ... ]
            >>> confusion_matrix = sv.ConfusionMatrix.from_detections(
            ...     predictions=predictions,
            ...     targets=targets,
            ...     classes=['person']
            ... )
            >>> confusion_matrix.matrix
            array([[1., 1.],
                   [1., 0.]])

            ```
        """
        prediction_tensors = []
        target_tensors = []
        for prediction, target in zip(predictions, targets):
            prediction_tensors.append(
                detections_to_tensor(
                    prediction, with_confidence=True, metric_target=metric_target
                )
            )
            target_tensors.append(
                detections_to_tensor(
                    target, with_confidence=False, metric_target=metric_target
                )
            )
        return cls.from_tensors(
            predictions=prediction_tensors,
            targets=target_tensors,
            classes=classes,
            conf_threshold=conf_threshold,
            iou_threshold=iou_threshold,
            metric_target=metric_target,
        )

    @classmethod
    def from_tensors(
        cls,
        predictions: list[npt.NDArray[np.float32]],
        targets: list[npt.NDArray[np.float32]],
        classes: list[str],
        conf_threshold: float = 0.3,
        iou_threshold: float = 0.5,
        metric_target: MetricTarget = MetricTarget.BOXES,
    ) -> ConfusionMatrix:
        """
        Calculate confusion matrix based on predicted and ground-truth detections.

        Args:
            predictions: Each element of the list describes a single
                image and has `shape = (M, 6)` or `shape = (M, 10)` depending on
                `metric_target`.
                If `MetricTarget.BOXES`, each row is in
                `(x_min, y_min, x_max, y_max, class, conf)` format.
                If `MetricTarget.ORIENTED_BOUNDING_BOXES`, each row is in
                `(x1, y1, x2, y2, x3, y3, x4, y4, class, conf)` format.
            targets: Each element of the list describes a single
                image and has `shape = (N, 5)` or `shape = (N, 9)` depending on
                `metric_target`.
                If `MetricTarget.BOXES`, each row is in
                `(x_min, y_min, x_max, y_max, class)` format.
                If `MetricTarget.ORIENTED_BOUNDING_BOXES`, each row is in
                `(x1, y1, x2, y2, x3, y3, x4, y4, class)` format.
            classes: Model class names.
            conf_threshold: Detection confidence threshold between `0` and `1`.
                Detections with lower confidence will be excluded.
            iou_threshold: Detection iou threshold between `0` and `1`.
                Detections with lower iou will be classified as `FP`.
            metric_target: The type of detection data to use.
                Determines expected tensor shapes (see Args above for column
                layouts). `MetricTarget.MASKS` is not supported.

        Returns:
            New instance of ConfusionMatrix.

        Examples:
            ```pycon
            >>> import supervision as sv
            >>> import numpy as np
            >>> targets = [
            ...     np.array([
            ...         [0.0, 0.0, 3.0, 3.0, 0],
            ...         [2.0, 2.0, 5.0, 5.0, 0],
            ...         [6.0, 1.0, 8.0, 3.0, 1],
            ...     ])
            ... ]
            >>> predictions = [
            ...     np.array([
            ...         [0.0, 0.0, 3.0, 3.0, 0, 0.9],
            ...         [0.1, 0.1, 3.0, 3.0, 0, 0.9],
            ...         [6.0, 1.0, 8.0, 3.0, 1, 0.8],
            ...     ])
            ... ]
            >>> confusion_matrix = sv.ConfusionMatrix.from_tensors(
            ...     predictions=predictions,
            ...     targets=targets,
            ...     classes=['person', 'dog']
            ... )
            >>> confusion_matrix.matrix
            array([[1., 0., 1.],
                   [0., 1., 0.],
                   [1., 0., 0.]])

            ```
        """
        _assert_supported_target(metric_target)
        _validate_input_tensors(predictions, targets, metric_target=metric_target)

        num_classes = len(classes)
        matrix = np.zeros((num_classes + 1, num_classes + 1))
        for true_batch, detection_batch in zip(targets, predictions):
            matrix += cls.evaluate_detection_batch(
                predictions=detection_batch,
                targets=true_batch,
                num_classes=num_classes,
                conf_threshold=conf_threshold,
                iou_threshold=iou_threshold,
                metric_target=metric_target,
            )
        return cls(
            matrix=matrix,
            classes=classes,
            conf_threshold=conf_threshold,
            iou_threshold=iou_threshold,
            metric_target=metric_target,
        )

    @staticmethod
    def evaluate_detection_batch(
        predictions: npt.NDArray[np.float32],
        targets: npt.NDArray[np.float32],
        num_classes: int,
        conf_threshold: float,
        iou_threshold: float,
        metric_target: MetricTarget = MetricTarget.BOXES,
    ) -> npt.NDArray[np.int32]:
        """
        Calculate confusion matrix for a batch of detections for a single image.

        Args:
            predictions: Batch prediction. Describes a single image and
                has `shape = (M, 6)` or `shape = (M, 10)` depending on
                `metric_target`.
                If `MetricTarget.BOXES`, each row is in
                `(x_min, y_min, x_max, y_max, class, conf)` format.
                If `MetricTarget.ORIENTED_BOUNDING_BOXES`, each row is in
                `(x1, y1, x2, y2, x3, y3, x4, y4, class, conf)` format.
            targets: Batch target labels. Describes a single image and
                has `shape = (N, 5)` or `shape = (N, 9)` depending on
                `metric_target`.
                If `MetricTarget.BOXES`, each row is in
                `(x_min, y_min, x_max, y_max, class)` format.
                If `MetricTarget.ORIENTED_BOUNDING_BOXES`, each row is in
                `(x1, y1, x2, y2, x3, y3, x4, y4, class)` format.
            num_classes: Number of classes.
            conf_threshold: Detection confidence threshold between `0` and `1`.
                Detections with lower confidence will be excluded.
            iou_threshold: Detection iou threshold between `0` and `1`.
                Detections with lower iou will be classified as `FP`.
            metric_target: The type of detection data to use.
                Determines IoU function (`box_iou_batch` vs
                `oriented_box_iou_batch`) and coordinate column count.
                `MetricTarget.MASKS` is not supported.

        Returns:
            Confusion matrix based on a single image.
        """
        _assert_supported_target(metric_target)

        expected_pred_cols = (
            10 if metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES else 6
        )
        expected_target_cols = (
            9 if metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES else 5
        )
        if predictions.ndim != 2 or predictions.shape[1] != expected_pred_cols:
            raise ValueError(
                f"Predictions must have shape (M, {expected_pred_cols}). "
                f"Got {predictions.shape} instead."
            )
        if targets.ndim != 2 or targets.shape[1] != expected_target_cols:
            raise ValueError(
                f"Targets must have shape (N, {expected_target_cols}). "
                f"Got {targets.shape} instead."
            )

        result_matrix = np.zeros((num_classes + 1, num_classes + 1))

        # Filter predictions by confidence threshold
        coords_dim = 8 if metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES else 4
        class_id_idx = coords_dim
        conf_idx = coords_dim + 1

        confidence = predictions[:, conf_idx]
        detection_batch_filtered = predictions[confidence >= conf_threshold]

        if len(detection_batch_filtered) == 0:
            # No detections pass confidence threshold - all GT are FN
            true_classes = np.array(targets[:, class_id_idx], dtype=np.int16)
            for gt_class in true_classes:
                result_matrix[gt_class, num_classes] += 1
            return result_matrix

        if len(targets) == 0:
            # No ground truth - all detections are FP
            detection_classes = np.array(
                detection_batch_filtered[:, class_id_idx], dtype=np.int16
            )
            for det_class in detection_classes:
                result_matrix[num_classes, det_class] += 1
            return result_matrix

        true_classes = np.array(targets[:, class_id_idx], dtype=np.int16)
        detection_classes = np.array(
            detection_batch_filtered[:, class_id_idx], dtype=np.int16
        )
        true_boxes = targets[:, :coords_dim]
        detection_boxes = detection_batch_filtered[:, :coords_dim]

        # Calculate IoU matrix
        if metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES:
            iou_batch = oriented_box_iou_batch(
                boxes_true=true_boxes, boxes_detection=detection_boxes
            )
        else:
            iou_batch = box_iou_batch(
                boxes_true=true_boxes, boxes_detection=detection_boxes
            )

        # Find all valid matches (IoU > threshold, regardless of class)
        # Use vectorized operations to avoid nested Python loops
        iou_mask = iou_batch > iou_threshold
        gt_indices, det_indices = np.nonzero(iou_mask)

        # If no pairs exceed the IoU threshold, skip matching
        if gt_indices.size == 0:
            valid_matches = []
        else:
            ious = iou_batch[gt_indices, det_indices]
            gt_match_classes = true_classes[gt_indices]
            det_match_classes = detection_classes[det_indices]
            class_matches = gt_match_classes == det_match_classes

            # Sort matches by class match first (True before False),
            # then by IoU descending.
            # np.lexsort sorts by the last key first, in ascending order.
            # We use ~class_matches so that True becomes 0
            # and False becomes 1 (True first),
            # and -ious so that larger IoUs come first.
            sort_indices = np.lexsort((-ious, ~class_matches))

            # Build list of matches in the same format as before:
            # (gt_idx, det_idx, iou, class_match)
            valid_matches = [
                (
                    int(gt_indices[idx]),
                    int(det_indices[idx]),
                    float(ious[idx]),
                    bool(class_matches[idx]),
                )
                for idx in sort_indices
            ]
        # Greedily assign matches, ensuring each GT
        # and detection is matched at most once
        matched_gt_idx = set()
        matched_det_idx = set()

        for gt_idx, det_idx, iou, class_match in valid_matches:
            if gt_idx not in matched_gt_idx and det_idx not in matched_det_idx:
                # Valid spatial match - record the class prediction
                gt_class = true_classes[gt_idx]
                det_class = detection_classes[det_idx]

                # This handles both correct classification (TP) and misclassification
                result_matrix[gt_class, det_class] += 1
                matched_gt_idx.add(gt_idx)
                matched_det_idx.add(det_idx)

        # Count unmatched ground truth as FN
        for gt_idx, gt_class in enumerate(true_classes):
            if gt_idx not in matched_gt_idx:
                result_matrix[gt_class, num_classes] += 1

        # Count unmatched detections as FP
        for det_idx, det_class in enumerate(detection_classes):
            if det_idx not in matched_det_idx:
                result_matrix[num_classes, det_class] += 1

        return result_matrix

    @staticmethod
    def _drop_extra_matches(
        matches: npt.NDArray[np.float32],
    ) -> npt.NDArray[np.float32]:
        """
        Deduplicate matches. If there are multiple matches for the same true or
        predicted box, only the one with the highest IoU is kept.
        """
        if matches.shape[0] > 0:
            matches = matches[matches[:, 2].argsort()[::-1]]
            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
            matches = matches[matches[:, 2].argsort()[::-1]]
            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
        result: npt.NDArray[np.float32] = matches
        return result

    @classmethod
    def benchmark(
        cls,
        dataset: DetectionDataset,
        callback: Callable[[npt.NDArray[np.uint8]], Detections],
        conf_threshold: float = 0.3,
        iou_threshold: float = 0.5,
        metric_target: MetricTarget = MetricTarget.BOXES,
    ) -> ConfusionMatrix:
        """
        Calculate confusion matrix from dataset and callback function.

        Args:
            dataset: Object detection dataset used for evaluation.
            callback: Function that takes an image as input and returns a
                Detections object.
            conf_threshold: Detection confidence threshold between `0` and `1`.
                Detections with lower confidence will be excluded.
            iou_threshold: Detection IoU threshold between `0` and `1`.
                Detections with lower IoU will be classified as `FP`.
            metric_target: The type of detection data to use.
                Supports `MetricTarget.BOXES` and
                `MetricTarget.ORIENTED_BOUNDING_BOXES`. Passed through to
                `from_detections`. `MetricTarget.MASKS` is not supported.

        Returns:
            New instance of ConfusionMatrix.

        Example:
            ```python
            import supervision as sv
            from ultralytics import YOLO

            dataset = sv.DetectionDataset.from_yolo(...)

            model = YOLO(...)
            def callback(image: np.ndarray) -> sv.Detections:
                result = model(image)[0]
                return sv.Detections.from_ultralytics(result)

            confusion_matrix = sv.ConfusionMatrix.benchmark(
                dataset = dataset,
                callback = callback
            )

            print(confusion_matrix.matrix)
            # np.array([
            #     [0., 0., 0., 0.],
            #     [0., 1., 0., 1.],
            #     [0., 1., 1., 0.],
            #     [1., 1., 0., 0.]
            # ])
            ```
        """
        predictions, targets = [], []
        for _, image, annotation in dataset:
            predictions_batch = callback(image)
            predictions.append(predictions_batch)
            targets.append(annotation)
        return cls.from_detections(
            predictions=predictions,
            targets=targets,
            classes=dataset.classes,
            conf_threshold=conf_threshold,
            iou_threshold=iou_threshold,
            metric_target=metric_target,
        )

    def plot(
        self,
        save_path: str | None = None,
        title: str | None = None,
        classes: list[str] | None = None,
        normalize: bool = False,
        fig_size: tuple[int, int] = (12, 10),
    ) -> matplotlib.figure.Figure:
        """
        Create confusion matrix plot and save it at selected location.

        Args:
            save_path: Path to save the plot. If not provided,
                plot will be displayed.
            title: Title of the plot.
            classes: List of classes to be displayed on the plot.
                If not provided, all classes will be displayed.
            normalize: If True, normalize the confusion matrix.
            fig_size: Size of the plot.

        Returns:
            Confusion matrix plot.
        """

        array = self.matrix.copy()

        if normalize:
            eps = 1e-8
            array = array / (array.sum(0).reshape(1, -1) + eps)

        array[array < 0.005] = np.nan

        fig, ax = plt.subplots(figsize=fig_size, tight_layout=True, facecolor="white")

        class_names = classes if classes is not None else self.classes
        use_labels_for_ticks = class_names is not None and (0 < len(class_names) < 99)
        if use_labels_for_ticks:
            x_tick_labels = [*class_names, "FN"]
            y_tick_labels = [*class_names, "FP"]
            num_ticks = len(x_tick_labels)
        else:
            x_tick_labels = None
            y_tick_labels = None
            num_ticks = len(array)
        im = ax.imshow(array, cmap="Blues")

        cbar = ax.figure.colorbar(im, ax=ax)
        cbar.mappable.set_clim(vmin=0, vmax=np.nanmax(array))

        if x_tick_labels is None:
            tick_interval = 2
        else:
            tick_interval = 1
        ax.set_xticks(np.arange(0, num_ticks, tick_interval), labels=x_tick_labels)
        ax.set_yticks(np.arange(0, num_ticks, tick_interval), labels=y_tick_labels)

        plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="default")

        labelsize = 10 if num_ticks < 50 else 8
        ax.tick_params(axis="both", which="both", labelsize=labelsize)

        if num_ticks < 30:
            for i in range(array.shape[0]):
                for j in range(array.shape[1]):
                    n_preds = array[i, j]
                    if not np.isnan(n_preds):
                        ax.text(
                            j,
                            i,
                            f"{n_preds:.2f}" if normalize else f"{n_preds:.0f}",
                            ha="center",
                            va="center",
                            color="black"
                            if n_preds < 0.5 * np.nanmax(array)
                            else "white",
                        )

        if title:
            ax.set_title(title, fontsize=20)

        ax.set_xlabel("Predicted")
        ax.set_ylabel("True")
        ax.set_facecolor("white")
        if save_path:
            fig.savefig(
                save_path, dpi=250, facecolor=fig.get_facecolor(), transparent=True
            )
        return fig

Functions

benchmark(dataset: DetectionDataset, callback: Callable[[npt.NDArray[np.uint8]], Detections], conf_threshold: float = 0.3, iou_threshold: float = 0.5, metric_target: MetricTarget = MetricTarget.BOXES) -> ConfusionMatrix classmethod

Calculate confusion matrix from dataset and callback function.

Parameters:

Name Type Description Default
dataset
DetectionDataset

Object detection dataset used for evaluation.

required
callback
Callable[[NDArray[uint8]], Detections]

Function that takes an image as input and returns a Detections object.

required
conf_threshold
float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

0.3
iou_threshold
float

Detection IoU threshold between 0 and 1. Detections with lower IoU will be classified as FP.

0.5
metric_target
MetricTarget

The type of detection data to use. Supports MetricTarget.BOXES and MetricTarget.ORIENTED_BOUNDING_BOXES. Passed through to from_detections. MetricTarget.MASKS is not supported.

BOXES

Returns:

Type Description
ConfusionMatrix

New instance of ConfusionMatrix.

Example
import supervision as sv
from ultralytics import YOLO

dataset = sv.DetectionDataset.from_yolo(...)

model = YOLO(...)
def callback(image: np.ndarray) -> sv.Detections:
    result = model(image)[0]
    return sv.Detections.from_ultralytics(result)

confusion_matrix = sv.ConfusionMatrix.benchmark(
    dataset = dataset,
    callback = callback
)

print(confusion_matrix.matrix)
# np.array([
#     [0., 0., 0., 0.],
#     [0., 1., 0., 1.],
#     [0., 1., 1., 0.],
#     [1., 1., 0., 0.]
# ])
Source code in src/supervision/metrics/detection.py
@classmethod
def benchmark(
    cls,
    dataset: DetectionDataset,
    callback: Callable[[npt.NDArray[np.uint8]], Detections],
    conf_threshold: float = 0.3,
    iou_threshold: float = 0.5,
    metric_target: MetricTarget = MetricTarget.BOXES,
) -> ConfusionMatrix:
    """
    Calculate confusion matrix from dataset and callback function.

    Args:
        dataset: Object detection dataset used for evaluation.
        callback: Function that takes an image as input and returns a
            Detections object.
        conf_threshold: Detection confidence threshold between `0` and `1`.
            Detections with lower confidence will be excluded.
        iou_threshold: Detection IoU threshold between `0` and `1`.
            Detections with lower IoU will be classified as `FP`.
        metric_target: The type of detection data to use.
            Supports `MetricTarget.BOXES` and
            `MetricTarget.ORIENTED_BOUNDING_BOXES`. Passed through to
            `from_detections`. `MetricTarget.MASKS` is not supported.

    Returns:
        New instance of ConfusionMatrix.

    Example:
        ```python
        import supervision as sv
        from ultralytics import YOLO

        dataset = sv.DetectionDataset.from_yolo(...)

        model = YOLO(...)
        def callback(image: np.ndarray) -> sv.Detections:
            result = model(image)[0]
            return sv.Detections.from_ultralytics(result)

        confusion_matrix = sv.ConfusionMatrix.benchmark(
            dataset = dataset,
            callback = callback
        )

        print(confusion_matrix.matrix)
        # np.array([
        #     [0., 0., 0., 0.],
        #     [0., 1., 0., 1.],
        #     [0., 1., 1., 0.],
        #     [1., 1., 0., 0.]
        # ])
        ```
    """
    predictions, targets = [], []
    for _, image, annotation in dataset:
        predictions_batch = callback(image)
        predictions.append(predictions_batch)
        targets.append(annotation)
    return cls.from_detections(
        predictions=predictions,
        targets=targets,
        classes=dataset.classes,
        conf_threshold=conf_threshold,
        iou_threshold=iou_threshold,
        metric_target=metric_target,
    )

evaluate_detection_batch(predictions: npt.NDArray[np.float32], targets: npt.NDArray[np.float32], num_classes: int, conf_threshold: float, iou_threshold: float, metric_target: MetricTarget = MetricTarget.BOXES) -> npt.NDArray[np.int32] staticmethod

Calculate confusion matrix for a batch of detections for a single image.

Parameters:

Name Type Description Default
predictions
NDArray[float32]

Batch prediction. Describes a single image and has shape = (M, 6) or shape = (M, 10) depending on metric_target. If MetricTarget.BOXES, each row is in (x_min, y_min, x_max, y_max, class, conf) format. If MetricTarget.ORIENTED_BOUNDING_BOXES, each row is in (x1, y1, x2, y2, x3, y3, x4, y4, class, conf) format.

required
targets
NDArray[float32]

Batch target labels. Describes a single image and has shape = (N, 5) or shape = (N, 9) depending on metric_target. If MetricTarget.BOXES, each row is in (x_min, y_min, x_max, y_max, class) format. If MetricTarget.ORIENTED_BOUNDING_BOXES, each row is in (x1, y1, x2, y2, x3, y3, x4, y4, class) format.

required
num_classes
int

Number of classes.

required
conf_threshold
float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

required
iou_threshold
float

Detection iou threshold between 0 and 1. Detections with lower iou will be classified as FP.

required
metric_target
MetricTarget

The type of detection data to use. Determines IoU function (box_iou_batch vs oriented_box_iou_batch) and coordinate column count. MetricTarget.MASKS is not supported.

BOXES

Returns:

Type Description
NDArray[int32]

Confusion matrix based on a single image.

Source code in src/supervision/metrics/detection.py
@staticmethod
def evaluate_detection_batch(
    predictions: npt.NDArray[np.float32],
    targets: npt.NDArray[np.float32],
    num_classes: int,
    conf_threshold: float,
    iou_threshold: float,
    metric_target: MetricTarget = MetricTarget.BOXES,
) -> npt.NDArray[np.int32]:
    """
    Calculate confusion matrix for a batch of detections for a single image.

    Args:
        predictions: Batch prediction. Describes a single image and
            has `shape = (M, 6)` or `shape = (M, 10)` depending on
            `metric_target`.
            If `MetricTarget.BOXES`, each row is in
            `(x_min, y_min, x_max, y_max, class, conf)` format.
            If `MetricTarget.ORIENTED_BOUNDING_BOXES`, each row is in
            `(x1, y1, x2, y2, x3, y3, x4, y4, class, conf)` format.
        targets: Batch target labels. Describes a single image and
            has `shape = (N, 5)` or `shape = (N, 9)` depending on
            `metric_target`.
            If `MetricTarget.BOXES`, each row is in
            `(x_min, y_min, x_max, y_max, class)` format.
            If `MetricTarget.ORIENTED_BOUNDING_BOXES`, each row is in
            `(x1, y1, x2, y2, x3, y3, x4, y4, class)` format.
        num_classes: Number of classes.
        conf_threshold: Detection confidence threshold between `0` and `1`.
            Detections with lower confidence will be excluded.
        iou_threshold: Detection iou threshold between `0` and `1`.
            Detections with lower iou will be classified as `FP`.
        metric_target: The type of detection data to use.
            Determines IoU function (`box_iou_batch` vs
            `oriented_box_iou_batch`) and coordinate column count.
            `MetricTarget.MASKS` is not supported.

    Returns:
        Confusion matrix based on a single image.
    """
    _assert_supported_target(metric_target)

    expected_pred_cols = (
        10 if metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES else 6
    )
    expected_target_cols = (
        9 if metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES else 5
    )
    if predictions.ndim != 2 or predictions.shape[1] != expected_pred_cols:
        raise ValueError(
            f"Predictions must have shape (M, {expected_pred_cols}). "
            f"Got {predictions.shape} instead."
        )
    if targets.ndim != 2 or targets.shape[1] != expected_target_cols:
        raise ValueError(
            f"Targets must have shape (N, {expected_target_cols}). "
            f"Got {targets.shape} instead."
        )

    result_matrix = np.zeros((num_classes + 1, num_classes + 1))

    # Filter predictions by confidence threshold
    coords_dim = 8 if metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES else 4
    class_id_idx = coords_dim
    conf_idx = coords_dim + 1

    confidence = predictions[:, conf_idx]
    detection_batch_filtered = predictions[confidence >= conf_threshold]

    if len(detection_batch_filtered) == 0:
        # No detections pass confidence threshold - all GT are FN
        true_classes = np.array(targets[:, class_id_idx], dtype=np.int16)
        for gt_class in true_classes:
            result_matrix[gt_class, num_classes] += 1
        return result_matrix

    if len(targets) == 0:
        # No ground truth - all detections are FP
        detection_classes = np.array(
            detection_batch_filtered[:, class_id_idx], dtype=np.int16
        )
        for det_class in detection_classes:
            result_matrix[num_classes, det_class] += 1
        return result_matrix

    true_classes = np.array(targets[:, class_id_idx], dtype=np.int16)
    detection_classes = np.array(
        detection_batch_filtered[:, class_id_idx], dtype=np.int16
    )
    true_boxes = targets[:, :coords_dim]
    detection_boxes = detection_batch_filtered[:, :coords_dim]

    # Calculate IoU matrix
    if metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES:
        iou_batch = oriented_box_iou_batch(
            boxes_true=true_boxes, boxes_detection=detection_boxes
        )
    else:
        iou_batch = box_iou_batch(
            boxes_true=true_boxes, boxes_detection=detection_boxes
        )

    # Find all valid matches (IoU > threshold, regardless of class)
    # Use vectorized operations to avoid nested Python loops
    iou_mask = iou_batch > iou_threshold
    gt_indices, det_indices = np.nonzero(iou_mask)

    # If no pairs exceed the IoU threshold, skip matching
    if gt_indices.size == 0:
        valid_matches = []
    else:
        ious = iou_batch[gt_indices, det_indices]
        gt_match_classes = true_classes[gt_indices]
        det_match_classes = detection_classes[det_indices]
        class_matches = gt_match_classes == det_match_classes

        # Sort matches by class match first (True before False),
        # then by IoU descending.
        # np.lexsort sorts by the last key first, in ascending order.
        # We use ~class_matches so that True becomes 0
        # and False becomes 1 (True first),
        # and -ious so that larger IoUs come first.
        sort_indices = np.lexsort((-ious, ~class_matches))

        # Build list of matches in the same format as before:
        # (gt_idx, det_idx, iou, class_match)
        valid_matches = [
            (
                int(gt_indices[idx]),
                int(det_indices[idx]),
                float(ious[idx]),
                bool(class_matches[idx]),
            )
            for idx in sort_indices
        ]
    # Greedily assign matches, ensuring each GT
    # and detection is matched at most once
    matched_gt_idx = set()
    matched_det_idx = set()

    for gt_idx, det_idx, iou, class_match in valid_matches:
        if gt_idx not in matched_gt_idx and det_idx not in matched_det_idx:
            # Valid spatial match - record the class prediction
            gt_class = true_classes[gt_idx]
            det_class = detection_classes[det_idx]

            # This handles both correct classification (TP) and misclassification
            result_matrix[gt_class, det_class] += 1
            matched_gt_idx.add(gt_idx)
            matched_det_idx.add(det_idx)

    # Count unmatched ground truth as FN
    for gt_idx, gt_class in enumerate(true_classes):
        if gt_idx not in matched_gt_idx:
            result_matrix[gt_class, num_classes] += 1

    # Count unmatched detections as FP
    for det_idx, det_class in enumerate(detection_classes):
        if det_idx not in matched_det_idx:
            result_matrix[num_classes, det_class] += 1

    return result_matrix

from_detections(predictions: list[Detections], targets: list[Detections], classes: list[str], conf_threshold: float = 0.3, iou_threshold: float = 0.5, metric_target: MetricTarget = MetricTarget.BOXES) -> ConfusionMatrix classmethod

Calculate confusion matrix based on predicted and ground-truth detections.

Parameters:

Name Type Description Default
targets
list[Detections]

Detections objects from ground-truth.

required
predictions
list[Detections]

Detections objects predicted by the model.

required
classes
list[str]

Model class names.

required
conf_threshold
float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

0.3
iou_threshold
float

Detection IoU threshold between 0 and 1. Detections with lower IoU will be classified as FP.

0.5
metric_target
MetricTarget

The type of detection data to use. Supports MetricTarget.BOXES (default) and MetricTarget.ORIENTED_BOUNDING_BOXES. When using MetricTarget.ORIENTED_BOUNDING_BOXES, each Detections object must include OBB coordinates in detections.data[ORIENTED_BOX_COORDINATES] as a float32 array of shape (N, 8) (flat) or (N, 4, 2) (as stored by from_ultralytics); both are normalised to (N, 8) internally. MetricTarget.MASKS is not supported.

BOXES

Returns:

Type Description
ConfusionMatrix

New instance of ConfusionMatrix.

Examples:

>>> import numpy as np
>>> import supervision as sv
>>> targets = [
...     sv.Detections(
...         xyxy=np.array([[0, 0, 10, 10], [50, 50, 60, 60]]),
...         class_id=np.array([0, 0])
...     )
... ]
>>> predictions = [
...     sv.Detections(
...         xyxy=np.array([[0, 0, 10, 10], [100, 100, 110, 110]]),
...         class_id=np.array([0, 0]),
...         confidence=np.array([0.9, 0.8])
...     )
... ]
>>> confusion_matrix = sv.ConfusionMatrix.from_detections(
...     predictions=predictions,
...     targets=targets,
...     classes=['person']
... )
>>> confusion_matrix.matrix
array([[1., 1.],
       [1., 0.]])
Source code in src/supervision/metrics/detection.py
@classmethod
def from_detections(
    cls,
    predictions: list[Detections],
    targets: list[Detections],
    classes: list[str],
    conf_threshold: float = 0.3,
    iou_threshold: float = 0.5,
    metric_target: MetricTarget = MetricTarget.BOXES,
) -> ConfusionMatrix:
    """
    Calculate confusion matrix based on predicted and ground-truth detections.

    Args:
        targets: Detections objects from ground-truth.
        predictions: Detections objects predicted by the model.
        classes: Model class names.
        conf_threshold: Detection confidence threshold between `0` and `1`.
            Detections with lower confidence will be excluded.
        iou_threshold: Detection IoU threshold between `0` and `1`.
            Detections with lower IoU will be classified as `FP`.
        metric_target: The type of detection data to use.
            Supports `MetricTarget.BOXES` (default) and
            `MetricTarget.ORIENTED_BOUNDING_BOXES`. When using
            `MetricTarget.ORIENTED_BOUNDING_BOXES`, each `Detections`
            object must include OBB coordinates in
            `detections.data[ORIENTED_BOX_COORDINATES]` as a float32
            array of shape `(N, 8)` (flat) or `(N, 4, 2)` (as stored by
            `from_ultralytics`); both are normalised to `(N, 8)` internally.
            `MetricTarget.MASKS` is not supported.

    Returns:
        New instance of ConfusionMatrix.

    Examples:
        ```pycon
        >>> import numpy as np
        >>> import supervision as sv
        >>> targets = [
        ...     sv.Detections(
        ...         xyxy=np.array([[0, 0, 10, 10], [50, 50, 60, 60]]),
        ...         class_id=np.array([0, 0])
        ...     )
        ... ]
        >>> predictions = [
        ...     sv.Detections(
        ...         xyxy=np.array([[0, 0, 10, 10], [100, 100, 110, 110]]),
        ...         class_id=np.array([0, 0]),
        ...         confidence=np.array([0.9, 0.8])
        ...     )
        ... ]
        >>> confusion_matrix = sv.ConfusionMatrix.from_detections(
        ...     predictions=predictions,
        ...     targets=targets,
        ...     classes=['person']
        ... )
        >>> confusion_matrix.matrix
        array([[1., 1.],
               [1., 0.]])

        ```
    """
    prediction_tensors = []
    target_tensors = []
    for prediction, target in zip(predictions, targets):
        prediction_tensors.append(
            detections_to_tensor(
                prediction, with_confidence=True, metric_target=metric_target
            )
        )
        target_tensors.append(
            detections_to_tensor(
                target, with_confidence=False, metric_target=metric_target
            )
        )
    return cls.from_tensors(
        predictions=prediction_tensors,
        targets=target_tensors,
        classes=classes,
        conf_threshold=conf_threshold,
        iou_threshold=iou_threshold,
        metric_target=metric_target,
    )

from_tensors(predictions: list[npt.NDArray[np.float32]], targets: list[npt.NDArray[np.float32]], classes: list[str], conf_threshold: float = 0.3, iou_threshold: float = 0.5, metric_target: MetricTarget = MetricTarget.BOXES) -> ConfusionMatrix classmethod

Calculate confusion matrix based on predicted and ground-truth detections.

Parameters:

Name Type Description Default
predictions
list[NDArray[float32]]

Each element of the list describes a single image and has shape = (M, 6) or shape = (M, 10) depending on metric_target. If MetricTarget.BOXES, each row is in (x_min, y_min, x_max, y_max, class, conf) format. If MetricTarget.ORIENTED_BOUNDING_BOXES, each row is in (x1, y1, x2, y2, x3, y3, x4, y4, class, conf) format.

required
targets
list[NDArray[float32]]

Each element of the list describes a single image and has shape = (N, 5) or shape = (N, 9) depending on metric_target. If MetricTarget.BOXES, each row is in (x_min, y_min, x_max, y_max, class) format. If MetricTarget.ORIENTED_BOUNDING_BOXES, each row is in (x1, y1, x2, y2, x3, y3, x4, y4, class) format.

required
classes
list[str]

Model class names.

required
conf_threshold
float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

0.3
iou_threshold
float

Detection iou threshold between 0 and 1. Detections with lower iou will be classified as FP.

0.5
metric_target
MetricTarget

The type of detection data to use. Determines expected tensor shapes (see Args above for column layouts). MetricTarget.MASKS is not supported.

BOXES

Returns:

Type Description
ConfusionMatrix

New instance of ConfusionMatrix.

Examples:

>>> import supervision as sv
>>> import numpy as np
>>> targets = [
...     np.array([
...         [0.0, 0.0, 3.0, 3.0, 0],
...         [2.0, 2.0, 5.0, 5.0, 0],
...         [6.0, 1.0, 8.0, 3.0, 1],
...     ])
... ]
>>> predictions = [
...     np.array([
...         [0.0, 0.0, 3.0, 3.0, 0, 0.9],
...         [0.1, 0.1, 3.0, 3.0, 0, 0.9],
...         [6.0, 1.0, 8.0, 3.0, 1, 0.8],
...     ])
... ]
>>> confusion_matrix = sv.ConfusionMatrix.from_tensors(
...     predictions=predictions,
...     targets=targets,
...     classes=['person', 'dog']
... )
>>> confusion_matrix.matrix
array([[1., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.]])
Source code in src/supervision/metrics/detection.py
@classmethod
def from_tensors(
    cls,
    predictions: list[npt.NDArray[np.float32]],
    targets: list[npt.NDArray[np.float32]],
    classes: list[str],
    conf_threshold: float = 0.3,
    iou_threshold: float = 0.5,
    metric_target: MetricTarget = MetricTarget.BOXES,
) -> ConfusionMatrix:
    """
    Calculate confusion matrix based on predicted and ground-truth detections.

    Args:
        predictions: Each element of the list describes a single
            image and has `shape = (M, 6)` or `shape = (M, 10)` depending on
            `metric_target`.
            If `MetricTarget.BOXES`, each row is in
            `(x_min, y_min, x_max, y_max, class, conf)` format.
            If `MetricTarget.ORIENTED_BOUNDING_BOXES`, each row is in
            `(x1, y1, x2, y2, x3, y3, x4, y4, class, conf)` format.
        targets: Each element of the list describes a single
            image and has `shape = (N, 5)` or `shape = (N, 9)` depending on
            `metric_target`.
            If `MetricTarget.BOXES`, each row is in
            `(x_min, y_min, x_max, y_max, class)` format.
            If `MetricTarget.ORIENTED_BOUNDING_BOXES`, each row is in
            `(x1, y1, x2, y2, x3, y3, x4, y4, class)` format.
        classes: Model class names.
        conf_threshold: Detection confidence threshold between `0` and `1`.
            Detections with lower confidence will be excluded.
        iou_threshold: Detection iou threshold between `0` and `1`.
            Detections with lower iou will be classified as `FP`.
        metric_target: The type of detection data to use.
            Determines expected tensor shapes (see Args above for column
            layouts). `MetricTarget.MASKS` is not supported.

    Returns:
        New instance of ConfusionMatrix.

    Examples:
        ```pycon
        >>> import supervision as sv
        >>> import numpy as np
        >>> targets = [
        ...     np.array([
        ...         [0.0, 0.0, 3.0, 3.0, 0],
        ...         [2.0, 2.0, 5.0, 5.0, 0],
        ...         [6.0, 1.0, 8.0, 3.0, 1],
        ...     ])
        ... ]
        >>> predictions = [
        ...     np.array([
        ...         [0.0, 0.0, 3.0, 3.0, 0, 0.9],
        ...         [0.1, 0.1, 3.0, 3.0, 0, 0.9],
        ...         [6.0, 1.0, 8.0, 3.0, 1, 0.8],
        ...     ])
        ... ]
        >>> confusion_matrix = sv.ConfusionMatrix.from_tensors(
        ...     predictions=predictions,
        ...     targets=targets,
        ...     classes=['person', 'dog']
        ... )
        >>> confusion_matrix.matrix
        array([[1., 0., 1.],
               [0., 1., 0.],
               [1., 0., 0.]])

        ```
    """
    _assert_supported_target(metric_target)
    _validate_input_tensors(predictions, targets, metric_target=metric_target)

    num_classes = len(classes)
    matrix = np.zeros((num_classes + 1, num_classes + 1))
    for true_batch, detection_batch in zip(targets, predictions):
        matrix += cls.evaluate_detection_batch(
            predictions=detection_batch,
            targets=true_batch,
            num_classes=num_classes,
            conf_threshold=conf_threshold,
            iou_threshold=iou_threshold,
            metric_target=metric_target,
        )
    return cls(
        matrix=matrix,
        classes=classes,
        conf_threshold=conf_threshold,
        iou_threshold=iou_threshold,
        metric_target=metric_target,
    )

plot(save_path: str | None = None, title: str | None = None, classes: list[str] | None = None, normalize: bool = False, fig_size: tuple[int, int] = (12, 10)) -> matplotlib.figure.Figure

Create confusion matrix plot and save it at selected location.

Parameters:

Name Type Description Default
save_path
str | None

Path to save the plot. If not provided, plot will be displayed.

None
title
str | None

Title of the plot.

None
classes
list[str] | None

List of classes to be displayed on the plot. If not provided, all classes will be displayed.

None
normalize
bool

If True, normalize the confusion matrix.

False
fig_size
tuple[int, int]

Size of the plot.

(12, 10)

Returns:

Type Description
Figure

Confusion matrix plot.

Source code in src/supervision/metrics/detection.py
def plot(
    self,
    save_path: str | None = None,
    title: str | None = None,
    classes: list[str] | None = None,
    normalize: bool = False,
    fig_size: tuple[int, int] = (12, 10),
) -> matplotlib.figure.Figure:
    """
    Create confusion matrix plot and save it at selected location.

    Args:
        save_path: Path to save the plot. If not provided,
            plot will be displayed.
        title: Title of the plot.
        classes: List of classes to be displayed on the plot.
            If not provided, all classes will be displayed.
        normalize: If True, normalize the confusion matrix.
        fig_size: Size of the plot.

    Returns:
        Confusion matrix plot.
    """

    array = self.matrix.copy()

    if normalize:
        eps = 1e-8
        array = array / (array.sum(0).reshape(1, -1) + eps)

    array[array < 0.005] = np.nan

    fig, ax = plt.subplots(figsize=fig_size, tight_layout=True, facecolor="white")

    class_names = classes if classes is not None else self.classes
    use_labels_for_ticks = class_names is not None and (0 < len(class_names) < 99)
    if use_labels_for_ticks:
        x_tick_labels = [*class_names, "FN"]
        y_tick_labels = [*class_names, "FP"]
        num_ticks = len(x_tick_labels)
    else:
        x_tick_labels = None
        y_tick_labels = None
        num_ticks = len(array)
    im = ax.imshow(array, cmap="Blues")

    cbar = ax.figure.colorbar(im, ax=ax)
    cbar.mappable.set_clim(vmin=0, vmax=np.nanmax(array))

    if x_tick_labels is None:
        tick_interval = 2
    else:
        tick_interval = 1
    ax.set_xticks(np.arange(0, num_ticks, tick_interval), labels=x_tick_labels)
    ax.set_yticks(np.arange(0, num_ticks, tick_interval), labels=y_tick_labels)

    plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="default")

    labelsize = 10 if num_ticks < 50 else 8
    ax.tick_params(axis="both", which="both", labelsize=labelsize)

    if num_ticks < 30:
        for i in range(array.shape[0]):
            for j in range(array.shape[1]):
                n_preds = array[i, j]
                if not np.isnan(n_preds):
                    ax.text(
                        j,
                        i,
                        f"{n_preds:.2f}" if normalize else f"{n_preds:.0f}",
                        ha="center",
                        va="center",
                        color="black"
                        if n_preds < 0.5 * np.nanmax(array)
                        else "white",
                    )

    if title:
        ax.set_title(title, fontsize=20)

    ax.set_xlabel("Predicted")
    ax.set_ylabel("True")
    ax.set_facecolor("white")
    if save_path:
        fig.savefig(
            save_path, dpi=250, facecolor=fig.get_facecolor(), transparent=True
        )
    return fig

supervision.metrics.detection.MeanAveragePrecision dataclass

Deprecated

MeanAveragePrecision is deprecated and will be removed in supervision-0.31.0.

The deprecated implementation provides results that are inconsistent with pycocotools. Please use supervision.metrics.mean_average_precision.MeanAveragePrecision instead, which matches the results of pycocotools and is now the recommended approach.

Mean Average Precision for object detection tasks.

Attributes:

Name Type Description
map50_95 float

Mean Average Precision (mAP) calculated over IoU thresholds ranging from 0.50 to 0.95 with a step size of 0.05.

map50 float

Mean Average Precision (mAP) calculated specifically at an IoU threshold of 0.50.

map75 float

Mean Average Precision (mAP) calculated specifically at an IoU threshold of 0.75.

per_class_ap50_95 NDArray[float64]

Average Precision (AP) values calculated over IoU thresholds ranging from 0.50 to 0.95 with a step size of 0.05, provided for each individual class.

Source code in src/supervision/metrics/detection.py
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
@deprecated_class(
    target=TargetMode.NOTIFY,
    deprecated_in="0.27.0",
    remove_in="0.31.0",
)
@dataclass(frozen=True)
class MeanAveragePrecision:
    """
    !!! deprecated "Deprecated"
        `MeanAveragePrecision` is **deprecated** and will be removed in
        `supervision-0.31.0`.

        The deprecated implementation provides results that are inconsistent with
        `pycocotools`. Please use
        `supervision.metrics.mean_average_precision.MeanAveragePrecision` instead,
        which matches the results of `pycocotools` and is now the recommended approach.

    Mean Average Precision for object detection tasks.

    Attributes:
        map50_95: Mean Average Precision (mAP) calculated over IoU thresholds
            ranging from `0.50` to `0.95` with a step size of `0.05`.
        map50: Mean Average Precision (mAP) calculated specifically at
            an IoU threshold of `0.50`.
        map75: Mean Average Precision (mAP) calculated specifically at
            an IoU threshold of `0.75`.
        per_class_ap50_95: Average Precision (AP) values calculated over
            IoU thresholds ranging from `0.50` to `0.95` with a step size of `0.05`,
            provided for each individual class.
    """

    map50_95: float
    map50: float
    map75: float
    per_class_ap50_95: npt.NDArray[np.float64]

    @classmethod
    def from_detections(
        cls,
        predictions: list[Detections],
        targets: list[Detections],
    ) -> MeanAveragePrecision:
        """
        Calculate mean average precision based on predicted and ground-truth detections.

        Args:
            targets: Detections objects from ground-truth.
            predictions: Detections objects predicted by the model.
        Returns:
            New instance of ConfusionMatrix.

        Examples:
            ```pycon
            >>> import numpy as np
            >>> import supervision as sv
            >>> targets = [
            ...     sv.Detections(
            ...         xyxy=np.array([[0, 0, 10, 10]]),
            ...         class_id=np.array([0])
            ...     )
            ... ]
            >>> predictions = [
            ...     sv.Detections(
            ...         xyxy=np.array([[0, 0, 10, 10]]),
            ...         class_id=np.array([0]),
            ...         confidence=np.array([0.9])
            ...     )
            ... ]
            >>> mAP = sv.MeanAveragePrecision.from_detections(
            ...     predictions=predictions,
            ...     targets=targets,
            ... )
            >>> round(float(mAP.map50), 2)
            0.99

            ```
        """
        prediction_tensors = []
        target_tensors = []
        for prediction, target in zip(predictions, targets):
            prediction_tensors.append(
                detections_to_tensor(prediction, with_confidence=True)
            )
            target_tensors.append(detections_to_tensor(target, with_confidence=False))
        return cls.from_tensors(
            predictions=prediction_tensors,
            targets=target_tensors,
        )

    @classmethod
    def benchmark(
        cls,
        dataset: DetectionDataset,
        callback: Callable[[npt.NDArray[np.uint8]], Detections],
    ) -> MeanAveragePrecision:
        """
        Calculate mean average precision from dataset and callback function.

        Args:
            dataset: Object detection dataset used for evaluation.
            callback: Function that takes
                an image as input and returns Detections object.
        Returns:
            New instance of MeanAveragePrecision.

        Example:
            ```python
            import supervision as sv
            from ultralytics import YOLO

            dataset = sv.DetectionDataset.from_yolo(...)

            model = YOLO(...)
            def callback(image: np.ndarray) -> sv.Detections:
                result = model(image)[0]
                return sv.Detections.from_ultralytics(result)

            mean_average_precision = sv.MeanAveragePrecision.benchmark(
                dataset = dataset,
                callback = callback
            )

            print(mean_average_precision.map50_95)
            # 0.433
            ```
        """
        predictions, targets = [], []
        for _, image, annotation in dataset:
            predictions_batch = callback(image)
            predictions.append(predictions_batch)
            targets.append(annotation)
        return cls.from_detections(
            predictions=predictions,
            targets=targets,
        )

    @classmethod
    def from_tensors(
        cls,
        predictions: list[npt.NDArray[np.float32]],
        targets: list[npt.NDArray[np.float32]],
    ) -> MeanAveragePrecision:
        """
        Calculate Mean Average Precision based on predicted and ground-truth
            detections at different threshold.

        Args:
            predictions: Each element of the list describes
                a single image and has `shape = (M, 6)` where `M` is
                the number of detected objects. Each row is expected to be
                in `(x_min, y_min, x_max, y_max, class, conf)` format.
            targets: Each element of the list describes a single
                image and has `shape = (N, 5)` where `N` is the
                number of ground-truth objects. Each row is expected to be in
                `(x_min, y_min, x_max, y_max, class)` format.
        Returns:
            New instance of MeanAveragePrecision.

        Examples:
            ```pycon
            >>> import supervision as sv
            >>> import numpy as np
            >>> targets = [
            ...     np.array([
            ...         [0.0, 0.0, 3.0, 3.0, 0],
            ...         [2.0, 2.0, 5.0, 5.0, 0],
            ...         [6.0, 1.0, 8.0, 3.0, 1],
            ...     ])
            ... ]
            >>> predictions = [
            ...     np.array([
            ...         [0.0, 0.0, 3.0, 3.0, 0, 0.9],
            ...         [0.1, 0.1, 3.0, 3.0, 0, 0.9],
            ...         [6.0, 1.0, 8.0, 3.0, 1, 0.8],
            ...     ])
            ... ]
            >>> mAP = sv.MeanAveragePrecision.from_tensors(
            ...     predictions=predictions,
            ...     targets=targets,
            ... )
            >>> round(float(mAP.map50), 2)
            0.81

            ```
        """
        _validate_input_tensors(predictions, targets)
        iou_thresholds = np.linspace(0.5, 0.95, 10)
        stats = []

        # Gather matching stats for predictions and targets
        for true_objs, predicted_objs in zip(targets, predictions):
            if predicted_objs.shape[0] == 0:
                if true_objs.shape[0]:
                    stats.append(
                        (
                            np.zeros((0, iou_thresholds.size), dtype=bool),
                            *np.zeros((2, 0)),
                            true_objs[:, 4],
                        )
                    )
                continue

            if true_objs.shape[0]:
                matches = cls._match_detection_batch(
                    predicted_objs, true_objs, iou_thresholds
                )
                stats.append(
                    (
                        matches,
                        predicted_objs[:, 5],
                        predicted_objs[:, 4],
                        true_objs[:, 4],
                    )
                )

        # Compute average precisions if any matches exist
        if stats:
            concatenated_stats = [np.concatenate(items, 0) for items in zip(*stats)]
            average_precisions = cls._average_precisions_per_class(*concatenated_stats)
            map50 = average_precisions[:, 0].mean()
            map75 = average_precisions[:, 5].mean()
            map50_95 = average_precisions.mean()
        else:
            map50, map75, map50_95 = 0, 0, 0
            average_precisions = np.array([])

        return cls(
            map50_95=map50_95,
            map50=map50,
            map75=map75,
            per_class_ap50_95=average_precisions,
        )

    @staticmethod
    def compute_average_precision(
        recall: npt.NDArray[np.float64],
        precision: npt.NDArray[np.float64],
    ) -> float:
        """
        Compute the average precision using 101-point interpolation (COCO), given
            the recall and precision curves.

        Args:
            recall: The recall curve.
            precision: The precision curve.

        Returns:
            Average precision.
        """
        extended_recall = np.concatenate(([0.0], recall, [1.0]))
        extended_precision = np.concatenate(([1.0], precision, [0.0]))
        max_accumulated_precision = np.flip(
            np.maximum.accumulate(np.flip(extended_precision))
        )
        interpolated_recall_levels = np.linspace(0, 1, 101)
        interpolated_precision = np.interp(
            interpolated_recall_levels, extended_recall, max_accumulated_precision
        )

        # Check if we are running on NumPy 2.0+ or older
        if hasattr(np, "trapezoid"):
            average_precision = np.trapezoid(
                interpolated_precision, interpolated_recall_levels
            )
        else:
            average_precision = getattr(np, "trapz")(
                interpolated_precision, interpolated_recall_levels
            )

        return float(average_precision)

    @staticmethod
    def _match_detection_batch(
        predictions: npt.NDArray[np.float32],
        targets: npt.NDArray[np.float32],
        iou_thresholds: npt.NDArray[np.float32],
    ) -> npt.NDArray[np.bool_]:
        """
        Match predictions with target labels based on IoU levels.

        Args:
            predictions: Batch prediction. Describes a single image and
                has `shape = (M, 6)` where `M` is the number of detected objects.
                Each row is expected to be in
                `(x_min, y_min, x_max, y_max, class, conf)` format.
            targets: Batch target labels. Describes a single image and
                has `shape = (N, 5)` where `N` is the number of ground-truth objects.
                Each row is expected to be in
                `(x_min, y_min, x_max, y_max, class)` format.
            iou_thresholds: Array contains different IoU thresholds.

        Returns:
            Matched prediction with target labels result.
        """
        num_predictions, num_iou_levels = predictions.shape[0], iou_thresholds.shape[0]
        correct = np.zeros((num_predictions, num_iou_levels), dtype=bool)
        iou = box_iou_batch(targets[:, :4], predictions[:, :4])
        correct_class = targets[:, 4:5] == predictions[:, 4]

        for i, iou_level in enumerate(iou_thresholds):
            matched_indices = np.where((iou >= iou_level) & correct_class)

            if matched_indices[0].shape[0]:
                combined_indices = np.stack(matched_indices, axis=1)
                iou_values = iou[matched_indices][:, None]
                matches = np.hstack([combined_indices, iou_values])

                if matched_indices[0].shape[0] > 1:
                    matches = matches[matches[:, 2].argsort()[::-1]]
                    matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
                    matches = matches[np.unique(matches[:, 0], return_index=True)[1]]

                correct[matches[:, 1].astype(int), i] = True
        result: npt.NDArray[np.bool_] = correct
        return result

    @staticmethod
    def _average_precisions_per_class(
        matches: npt.NDArray[np.bool_],
        prediction_confidence: npt.NDArray[np.float32],
        prediction_class_ids: npt.NDArray[np.int32],
        true_class_ids: npt.NDArray[np.int32],
        eps: float = 1e-16,
    ) -> npt.NDArray[np.float64]:
        """
        Compute the average precision, given the recall and precision curves.
        Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.

        Args:
            matches: True positives.
            prediction_confidence: Objectness value from 0-1.
            prediction_class_ids: Predicted object classes.
            true_class_ids: True object classes.
            eps: Small value to prevent division by zero.

        Returns:
            Average precision for different IoU levels.
        """
        sorted_indices = np.argsort(-prediction_confidence)
        matches = matches[sorted_indices]
        prediction_class_ids = prediction_class_ids[sorted_indices]

        unique_classes, class_counts = np.unique(true_class_ids, return_counts=True)
        num_classes = unique_classes.shape[0]

        average_precisions: npt.NDArray[np.float64] = np.zeros(
            (num_classes, matches.shape[1]), dtype=np.float64
        )

        for class_idx, class_id in enumerate(unique_classes):
            is_class = prediction_class_ids == class_id
            total_true = class_counts[class_idx]
            total_prediction = is_class.sum()

            if total_prediction == 0 or total_true == 0:
                continue

            false_positives = (1 - matches[is_class]).cumsum(0)
            true_positives = matches[is_class].cumsum(0)
            recall = true_positives / (total_true + eps)
            precision = true_positives / (true_positives + false_positives)

            for iou_level_idx in range(matches.shape[1]):
                average_precisions[class_idx, iou_level_idx] = (
                    MeanAveragePrecision.compute_average_precision(
                        recall[:, iou_level_idx], precision[:, iou_level_idx]
                    )
                )

        result: npt.NDArray[np.float64] = average_precisions
        return result

Functions

benchmark(dataset: DetectionDataset, callback: Callable[[npt.NDArray[np.uint8]], Detections]) -> MeanAveragePrecision classmethod

Calculate mean average precision from dataset and callback function.

Parameters:

Name Type Description Default
dataset
DetectionDataset

Object detection dataset used for evaluation.

required
callback
Callable[[NDArray[uint8]], Detections]

Function that takes an image as input and returns Detections object.

required

Returns: New instance of MeanAveragePrecision.

Example
import supervision as sv
from ultralytics import YOLO

dataset = sv.DetectionDataset.from_yolo(...)

model = YOLO(...)
def callback(image: np.ndarray) -> sv.Detections:
    result = model(image)[0]
    return sv.Detections.from_ultralytics(result)

mean_average_precision = sv.MeanAveragePrecision.benchmark(
    dataset = dataset,
    callback = callback
)

print(mean_average_precision.map50_95)
# 0.433
Source code in src/supervision/metrics/detection.py
@classmethod
def benchmark(
    cls,
    dataset: DetectionDataset,
    callback: Callable[[npt.NDArray[np.uint8]], Detections],
) -> MeanAveragePrecision:
    """
    Calculate mean average precision from dataset and callback function.

    Args:
        dataset: Object detection dataset used for evaluation.
        callback: Function that takes
            an image as input and returns Detections object.
    Returns:
        New instance of MeanAveragePrecision.

    Example:
        ```python
        import supervision as sv
        from ultralytics import YOLO

        dataset = sv.DetectionDataset.from_yolo(...)

        model = YOLO(...)
        def callback(image: np.ndarray) -> sv.Detections:
            result = model(image)[0]
            return sv.Detections.from_ultralytics(result)

        mean_average_precision = sv.MeanAveragePrecision.benchmark(
            dataset = dataset,
            callback = callback
        )

        print(mean_average_precision.map50_95)
        # 0.433
        ```
    """
    predictions, targets = [], []
    for _, image, annotation in dataset:
        predictions_batch = callback(image)
        predictions.append(predictions_batch)
        targets.append(annotation)
    return cls.from_detections(
        predictions=predictions,
        targets=targets,
    )

compute_average_precision(recall: npt.NDArray[np.float64], precision: npt.NDArray[np.float64]) -> float staticmethod

Compute the average precision using 101-point interpolation (COCO), given the recall and precision curves.

Parameters:

Name Type Description Default
recall
NDArray[float64]

The recall curve.

required
precision
NDArray[float64]

The precision curve.

required

Returns:

Type Description
float

Average precision.

Source code in src/supervision/metrics/detection.py
@staticmethod
def compute_average_precision(
    recall: npt.NDArray[np.float64],
    precision: npt.NDArray[np.float64],
) -> float:
    """
    Compute the average precision using 101-point interpolation (COCO), given
        the recall and precision curves.

    Args:
        recall: The recall curve.
        precision: The precision curve.

    Returns:
        Average precision.
    """
    extended_recall = np.concatenate(([0.0], recall, [1.0]))
    extended_precision = np.concatenate(([1.0], precision, [0.0]))
    max_accumulated_precision = np.flip(
        np.maximum.accumulate(np.flip(extended_precision))
    )
    interpolated_recall_levels = np.linspace(0, 1, 101)
    interpolated_precision = np.interp(
        interpolated_recall_levels, extended_recall, max_accumulated_precision
    )

    # Check if we are running on NumPy 2.0+ or older
    if hasattr(np, "trapezoid"):
        average_precision = np.trapezoid(
            interpolated_precision, interpolated_recall_levels
        )
    else:
        average_precision = getattr(np, "trapz")(
            interpolated_precision, interpolated_recall_levels
        )

    return float(average_precision)

from_detections(predictions: list[Detections], targets: list[Detections]) -> MeanAveragePrecision classmethod

Calculate mean average precision based on predicted and ground-truth detections.

Parameters:

Name Type Description Default
targets
list[Detections]

Detections objects from ground-truth.

required
predictions
list[Detections]

Detections objects predicted by the model.

required

Returns: New instance of ConfusionMatrix.

Examples:

>>> import numpy as np
>>> import supervision as sv
>>> targets = [
...     sv.Detections(
...         xyxy=np.array([[0, 0, 10, 10]]),
...         class_id=np.array([0])
...     )
... ]
>>> predictions = [
...     sv.Detections(
...         xyxy=np.array([[0, 0, 10, 10]]),
...         class_id=np.array([0]),
...         confidence=np.array([0.9])
...     )
... ]
>>> mAP = sv.MeanAveragePrecision.from_detections(
...     predictions=predictions,
...     targets=targets,
... )
>>> round(float(mAP.map50), 2)
0.99
Source code in src/supervision/metrics/detection.py
@classmethod
def from_detections(
    cls,
    predictions: list[Detections],
    targets: list[Detections],
) -> MeanAveragePrecision:
    """
    Calculate mean average precision based on predicted and ground-truth detections.

    Args:
        targets: Detections objects from ground-truth.
        predictions: Detections objects predicted by the model.
    Returns:
        New instance of ConfusionMatrix.

    Examples:
        ```pycon
        >>> import numpy as np
        >>> import supervision as sv
        >>> targets = [
        ...     sv.Detections(
        ...         xyxy=np.array([[0, 0, 10, 10]]),
        ...         class_id=np.array([0])
        ...     )
        ... ]
        >>> predictions = [
        ...     sv.Detections(
        ...         xyxy=np.array([[0, 0, 10, 10]]),
        ...         class_id=np.array([0]),
        ...         confidence=np.array([0.9])
        ...     )
        ... ]
        >>> mAP = sv.MeanAveragePrecision.from_detections(
        ...     predictions=predictions,
        ...     targets=targets,
        ... )
        >>> round(float(mAP.map50), 2)
        0.99

        ```
    """
    prediction_tensors = []
    target_tensors = []
    for prediction, target in zip(predictions, targets):
        prediction_tensors.append(
            detections_to_tensor(prediction, with_confidence=True)
        )
        target_tensors.append(detections_to_tensor(target, with_confidence=False))
    return cls.from_tensors(
        predictions=prediction_tensors,
        targets=target_tensors,
    )

from_tensors(predictions: list[npt.NDArray[np.float32]], targets: list[npt.NDArray[np.float32]]) -> MeanAveragePrecision classmethod

Calculate Mean Average Precision based on predicted and ground-truth detections at different threshold.

Parameters:

Name Type Description Default
predictions
list[NDArray[float32]]

Each element of the list describes a single image and has shape = (M, 6) where M is the number of detected objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class, conf) format.

required
targets
list[NDArray[float32]]

Each element of the list describes a single image and has shape = (N, 5) where N is the number of ground-truth objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class) format.

required

Returns: New instance of MeanAveragePrecision.

Examples:

>>> import supervision as sv
>>> import numpy as np
>>> targets = [
...     np.array([
...         [0.0, 0.0, 3.0, 3.0, 0],
...         [2.0, 2.0, 5.0, 5.0, 0],
...         [6.0, 1.0, 8.0, 3.0, 1],
...     ])
... ]
>>> predictions = [
...     np.array([
...         [0.0, 0.0, 3.0, 3.0, 0, 0.9],
...         [0.1, 0.1, 3.0, 3.0, 0, 0.9],
...         [6.0, 1.0, 8.0, 3.0, 1, 0.8],
...     ])
... ]
>>> mAP = sv.MeanAveragePrecision.from_tensors(
...     predictions=predictions,
...     targets=targets,
... )
>>> round(float(mAP.map50), 2)
0.81
Source code in src/supervision/metrics/detection.py
@classmethod
def from_tensors(
    cls,
    predictions: list[npt.NDArray[np.float32]],
    targets: list[npt.NDArray[np.float32]],
) -> MeanAveragePrecision:
    """
    Calculate Mean Average Precision based on predicted and ground-truth
        detections at different threshold.

    Args:
        predictions: Each element of the list describes
            a single image and has `shape = (M, 6)` where `M` is
            the number of detected objects. Each row is expected to be
            in `(x_min, y_min, x_max, y_max, class, conf)` format.
        targets: Each element of the list describes a single
            image and has `shape = (N, 5)` where `N` is the
            number of ground-truth objects. Each row is expected to be in
            `(x_min, y_min, x_max, y_max, class)` format.
    Returns:
        New instance of MeanAveragePrecision.

    Examples:
        ```pycon
        >>> import supervision as sv
        >>> import numpy as np
        >>> targets = [
        ...     np.array([
        ...         [0.0, 0.0, 3.0, 3.0, 0],
        ...         [2.0, 2.0, 5.0, 5.0, 0],
        ...         [6.0, 1.0, 8.0, 3.0, 1],
        ...     ])
        ... ]
        >>> predictions = [
        ...     np.array([
        ...         [0.0, 0.0, 3.0, 3.0, 0, 0.9],
        ...         [0.1, 0.1, 3.0, 3.0, 0, 0.9],
        ...         [6.0, 1.0, 8.0, 3.0, 1, 0.8],
        ...     ])
        ... ]
        >>> mAP = sv.MeanAveragePrecision.from_tensors(
        ...     predictions=predictions,
        ...     targets=targets,
        ... )
        >>> round(float(mAP.map50), 2)
        0.81

        ```
    """
    _validate_input_tensors(predictions, targets)
    iou_thresholds = np.linspace(0.5, 0.95, 10)
    stats = []

    # Gather matching stats for predictions and targets
    for true_objs, predicted_objs in zip(targets, predictions):
        if predicted_objs.shape[0] == 0:
            if true_objs.shape[0]:
                stats.append(
                    (
                        np.zeros((0, iou_thresholds.size), dtype=bool),
                        *np.zeros((2, 0)),
                        true_objs[:, 4],
                    )
                )
            continue

        if true_objs.shape[0]:
            matches = cls._match_detection_batch(
                predicted_objs, true_objs, iou_thresholds
            )
            stats.append(
                (
                    matches,
                    predicted_objs[:, 5],
                    predicted_objs[:, 4],
                    true_objs[:, 4],
                )
            )

    # Compute average precisions if any matches exist
    if stats:
        concatenated_stats = [np.concatenate(items, 0) for items in zip(*stats)]
        average_precisions = cls._average_precisions_per_class(*concatenated_stats)
        map50 = average_precisions[:, 0].mean()
        map75 = average_precisions[:, 5].mean()
        map50_95 = average_precisions.mean()
    else:
        map50, map75, map50_95 = 0, 0, 0
        average_precisions = np.array([])

    return cls(
        map50_95=map50_95,
        map50=map50,
        map75=map75,
        per_class_ap50_95=average_precisions,
    )

Comments