Mean Average Precision¶

MeanAveragePrecision

Bases: Metric

Mean Average Precision (mAP) is a metric used to evaluate object detection models. It is the average of the precision-recall curves at different IoU thresholds.

Example

import supervision as sv
from supervision.metrics import MeanAveragePrecision

predictions = sv.Detections(...)
targets = sv.Detections(...)

map_metric = MeanAveragePrecision()
map_result = map_metric.update(predictions, targets).compute()

print(map_result.map50_95)
# 0.4674

print(map_result)
# MeanAveragePrecisionResult:
# Metric target: MetricTarget.BOXES
# Class agnostic: False
# mAP @ 50:95: 0.4674
# mAP @ 50:    0.5048
# mAP @ 75:    0.4796
# mAP scores: [0.50485  0.50377  0.50377  ...]
# IoU thresh: [0.5  0.55  0.6  ...]
# AP per class:
# 0: [0.67699  0.67699  0.67699  ...]
# ...
# Small objects: ...
# Medium objects: ...
# Large objects: ...

map_result.plot()

example_plot

Source code in supervision/metrics/mean_average_precision.py

class MeanAveragePrecision(Metric):
    """
    Mean Average Precision (mAP) is a metric used to evaluate object detection models.
    It is the average of the precision-recall curves at different IoU thresholds.

    Example:
        ```python
        import supervision as sv
        from supervision.metrics import MeanAveragePrecision

        predictions = sv.Detections(...)
        targets = sv.Detections(...)

        map_metric = MeanAveragePrecision()
        map_result = map_metric.update(predictions, targets).compute()

        print(map_result.map50_95)
        # 0.4674

        print(map_result)
        # MeanAveragePrecisionResult:
        # Metric target: MetricTarget.BOXES
        # Class agnostic: False
        # mAP @ 50:95: 0.4674
        # mAP @ 50:    0.5048
        # mAP @ 75:    0.4796
        # mAP scores: [0.50485  0.50377  0.50377  ...]
        # IoU thresh: [0.5  0.55  0.6  ...]
        # AP per class:
        # 0: [0.67699  0.67699  0.67699  ...]
        # ...
        # Small objects: ...
        # Medium objects: ...
        # Large objects: ...

        map_result.plot()
        ```

    ![example_plot](\
        https://media.roboflow.com/supervision-docs/metrics/mAP_plot_example.png\
        ){ align=center width="800" }
    """

    def __init__(
        self,
        metric_target: MetricTarget = MetricTarget.BOXES,
        class_agnostic: bool = False,
    ):
        """
        Initialize the Mean Average Precision metric.

        Args:
            metric_target (MetricTarget): The type of detection data to use.
            class_agnostic (bool): Whether to treat all data as a single class.
        """
        self._metric_target = metric_target
        self._class_agnostic = class_agnostic

        self._predictions_list: List[Detections] = []
        self._targets_list: List[Detections] = []

    def reset(self) -> None:
        """
        Reset the metric to its initial state, clearing all stored data.
        """
        self._predictions_list = []
        self._targets_list = []

    def update(
        self,
        predictions: Union[Detections, List[Detections]],
        targets: Union[Detections, List[Detections]],
    ) -> MeanAveragePrecision:
        """
        Add new predictions and targets to the metric, but do not compute the result.

        Args:
            predictions (Union[Detections, List[Detections]]): The predicted detections.
            targets (Union[Detections, List[Detections]]): The ground-truth detections.

        Returns:
            (MeanAveragePrecision): The updated metric instance.
        """
        if not isinstance(predictions, list):
            predictions = [predictions]
        if not isinstance(targets, list):
            targets = [targets]

        if len(predictions) != len(targets):
            raise ValueError(
                f"The number of predictions ({len(predictions)}) and"
                f" targets ({len(targets)}) during the update must be the same."
            )

        if self._class_agnostic:
            predictions = deepcopy(predictions)
            targets = deepcopy(targets)

            for prediction in predictions:
                prediction.class_id[:] = -1
            for target in targets:
                target.class_id[:] = -1

        self._predictions_list.extend(predictions)
        self._targets_list.extend(targets)

        return self

    def compute(
        self,
    ) -> MeanAveragePrecisionResult:
        """
        Calculate Mean Average Precision based on predicted and ground-truth
        detections at different thresholds.

        Returns:
            (MeanAveragePrecisionResult): The Mean Average Precision result.
        """
        result = self._compute(self._predictions_list, self._targets_list)

        small_predictions = []
        small_targets = []
        for predictions, targets in zip(self._predictions_list, self._targets_list):
            small_predictions.append(
                self._filter_detections_by_size(predictions, ObjectSizeCategory.SMALL)
            )
            small_targets.append(
                self._filter_detections_by_size(targets, ObjectSizeCategory.SMALL)
            )
        result.small_objects = self._compute(small_predictions, small_targets)

        medium_predictions = []
        medium_targets = []
        for predictions, targets in zip(self._predictions_list, self._targets_list):
            medium_predictions.append(
                self._filter_detections_by_size(predictions, ObjectSizeCategory.MEDIUM)
            )
            medium_targets.append(
                self._filter_detections_by_size(targets, ObjectSizeCategory.MEDIUM)
            )
        result.medium_objects = self._compute(medium_predictions, medium_targets)

        large_predictions = []
        large_targets = []
        for predictions, targets in zip(self._predictions_list, self._targets_list):
            large_predictions.append(
                self._filter_detections_by_size(predictions, ObjectSizeCategory.LARGE)
            )
            large_targets.append(
                self._filter_detections_by_size(targets, ObjectSizeCategory.LARGE)
            )
        result.large_objects = self._compute(large_predictions, large_targets)

        return result

    def _compute(
        self,
        predictions_list: List[Detections],
        targets_list: List[Detections],
    ) -> MeanAveragePrecisionResult:
        iou_thresholds = np.linspace(0.5, 0.95, 10)
        stats = []

        for predictions, targets in zip(predictions_list, targets_list):
            prediction_contents = self._detections_content(predictions)
            target_contents = self._detections_content(targets)

            if len(targets) > 0:
                if len(predictions) == 0:
                    stats.append(
                        (
                            np.zeros((0, iou_thresholds.size), dtype=bool),
                            np.zeros((0,), dtype=np.float32),
                            np.zeros((0,), dtype=int),
                            targets.class_id,
                        )
                    )

                else:
                    if self._metric_target == MetricTarget.BOXES:
                        iou = box_iou_batch(target_contents, prediction_contents)
                    elif self._metric_target == MetricTarget.MASKS:
                        iou = mask_iou_batch(target_contents, prediction_contents)
                    elif self._metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES:
                        iou = oriented_box_iou_batch(
                            target_contents, prediction_contents
                        )
                    else:
                        raise ValueError(
                            "Unsupported metric target for IoU calculation"
                        )

                    matches = self._match_detection_batch(
                        predictions.class_id, targets.class_id, iou, iou_thresholds
                    )

                    stats.append(
                        (
                            matches,
                            predictions.confidence,
                            predictions.class_id,
                            targets.class_id,
                        )
                    )

        # Compute average precisions if any matches exist
        if stats:
            concatenated_stats = [np.concatenate(items, 0) for items in zip(*stats)]
            average_precisions, unique_classes = self._average_precisions_per_class(
                *concatenated_stats
            )
            mAP_scores = np.mean(average_precisions, axis=0)
        else:
            mAP_scores = np.zeros((10,), dtype=np.float32)
            unique_classes = np.empty((0,), dtype=int)
            average_precisions = np.empty((0, len(iou_thresholds)), dtype=np.float32)

        return MeanAveragePrecisionResult(
            metric_target=self._metric_target,
            is_class_agnostic=self._class_agnostic,
            mAP_scores=mAP_scores,
            iou_thresholds=iou_thresholds,
            matched_classes=unique_classes,
            ap_per_class=average_precisions,
        )

    @staticmethod
    def _compute_average_precision(recall: np.ndarray, precision: np.ndarray) -> float:
        """
        Compute the average precision using 101-point interpolation (COCO), given
            the recall and precision curves.

        Args:
            recall (np.ndarray): The recall curve.
            precision (np.ndarray): The precision curve.

        Returns:
            (float): Average precision.
        """
        if len(recall) == 0 and len(precision) == 0:
            return 0.0

        recall_levels = np.linspace(0, 1, 101)
        precision_levels = np.zeros_like(recall_levels)
        for r, p in zip(recall[::-1], precision[::-1]):
            precision_levels[recall_levels <= r] = p

        average_precision = (1 / 101 * precision_levels).sum()
        return average_precision

    @staticmethod
    def _match_detection_batch(
        predictions_classes: np.ndarray,
        target_classes: np.ndarray,
        iou: np.ndarray,
        iou_thresholds: np.ndarray,
    ) -> np.ndarray:
        num_predictions, num_iou_levels = (
            predictions_classes.shape[0],
            iou_thresholds.shape[0],
        )
        correct = np.zeros((num_predictions, num_iou_levels), dtype=bool)
        correct_class = target_classes[:, None] == predictions_classes

        for i, iou_level in enumerate(iou_thresholds):
            matched_indices = np.where((iou >= iou_level) & correct_class)

            if matched_indices[0].shape[0]:
                combined_indices = np.stack(matched_indices, axis=1)
                iou_values = iou[matched_indices][:, None]
                matches = np.hstack([combined_indices, iou_values])

                if matched_indices[0].shape[0] > 1:
                    matches = matches[matches[:, 2].argsort()[::-1]]
                    matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
                    matches = matches[np.unique(matches[:, 0], return_index=True)[1]]

                correct[matches[:, 1].astype(int), i] = True

        return correct

    @staticmethod
    def _average_precisions_per_class(
        matches: np.ndarray,
        prediction_confidence: np.ndarray,
        prediction_class_ids: np.ndarray,
        true_class_ids: np.ndarray,
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Compute the average precision, given the recall and precision curves.
        Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.

        Args:
            matches (np.ndarray): True positives.
            prediction_confidence (np.ndarray): Objectness value from 0-1.
            prediction_class_ids (np.ndarray): Predicted object classes.
            true_class_ids (np.ndarray): True object classes.
            eps (float, optional): Small value to prevent division by zero.

        Returns:
            (Tuple[np.ndarray, np.ndarray]): Average precision for different
                IoU levels, and an array of class IDs that were matched.
        """
        eps = 1e-16

        sorted_indices = np.argsort(-prediction_confidence)
        matches = matches[sorted_indices]
        prediction_class_ids = prediction_class_ids[sorted_indices]

        unique_classes, class_counts = np.unique(true_class_ids, return_counts=True)
        num_classes = unique_classes.shape[0]

        average_precisions = np.zeros((num_classes, matches.shape[1]))

        for class_idx, class_id in enumerate(unique_classes):
            is_class = prediction_class_ids == class_id
            total_true = class_counts[class_idx]
            total_prediction = is_class.sum()

            if total_prediction == 0 or total_true == 0:
                continue

            false_positives = (1 - matches[is_class]).cumsum(0)
            true_positives = matches[is_class].cumsum(0)
            false_negatives = total_true - true_positives

            recall = true_positives / (true_positives + false_negatives + eps)
            precision = true_positives / (true_positives + false_positives)

            for iou_level_idx in range(matches.shape[1]):
                average_precisions[class_idx, iou_level_idx] = (
                    MeanAveragePrecision._compute_average_precision(
                        recall[:, iou_level_idx], precision[:, iou_level_idx]
                    )
                )

        return average_precisions, unique_classes

    def _detections_content(self, detections: Detections) -> np.ndarray:
        """Return boxes, masks or oriented bounding boxes from detections."""
        if self._metric_target == MetricTarget.BOXES:
            return detections.xyxy
        if self._metric_target == MetricTarget.MASKS:
            return (
                detections.mask
                if detections.mask is not None
                else self._make_empty_content()
            )
        if self._metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES:
            obb = detections.data.get(ORIENTED_BOX_COORDINATES)
            if obb is not None and len(obb) > 0:
                return np.array(obb, dtype=np.float32)
            return self._make_empty_content()
        raise ValueError(f"Invalid metric target: {self._metric_target}")

    def _make_empty_content(self) -> np.ndarray:
        if self._metric_target == MetricTarget.BOXES:
            return np.empty((0, 4), dtype=np.float32)
        if self._metric_target == MetricTarget.MASKS:
            return np.empty((0, 0, 0), dtype=bool)
        if self._metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES:
            return np.empty((0, 4, 2), dtype=np.float32)
        raise ValueError(f"Invalid metric target: {self._metric_target}")

    def _filter_detections_by_size(
        self, detections: Detections, size_category: ObjectSizeCategory
    ) -> Detections:
        """Return a copy of detections with contents filtered by object size."""
        new_detections = deepcopy(detections)
        if detections.is_empty() or size_category == ObjectSizeCategory.ANY:
            return new_detections

        sizes = get_detection_size_category(new_detections, self._metric_target)
        size_mask = sizes == size_category.value

        new_detections.xyxy = new_detections.xyxy[size_mask]
        if new_detections.mask is not None:
            new_detections.mask = new_detections.mask[size_mask]
        if new_detections.class_id is not None:
            new_detections.class_id = new_detections.class_id[size_mask]
        if new_detections.confidence is not None:
            new_detections.confidence = new_detections.confidence[size_mask]
        if new_detections.tracker_id is not None:
            new_detections.tracker_id = new_detections.tracker_id[size_mask]
        if new_detections.data is not None:
            for key, value in new_detections.data.items():
                new_detections.data[key] = np.array(value)[size_mask]

        return new_detections

Functions¶

`init(metric_target=MetricTarget.BOXES, class_agnostic=False)` ¶

Initialize the Mean Average Precision metric.

Parameters:

Name	Type	Description	Default
`metric_target` ¶	`MetricTarget`	The type of detection data to use.	`BOXES`
`class_agnostic` ¶	`bool`	Whether to treat all data as a single class.	`False`

Source code in supervision/metrics/mean_average_precision.py

def __init__(
    self,
    metric_target: MetricTarget = MetricTarget.BOXES,
    class_agnostic: bool = False,
):
    """
    Initialize the Mean Average Precision metric.

    Args:
        metric_target (MetricTarget): The type of detection data to use.
        class_agnostic (bool): Whether to treat all data as a single class.
    """
    self._metric_target = metric_target
    self._class_agnostic = class_agnostic

    self._predictions_list: List[Detections] = []
    self._targets_list: List[Detections] = []

`compute()` ¶

Calculate Mean Average Precision based on predicted and ground-truth detections at different thresholds.

Returns:

Type	Description
`MeanAveragePrecisionResult`	The Mean Average Precision result.

Source code in supervision/metrics/mean_average_precision.py

def compute(
    self,
) -> MeanAveragePrecisionResult:
    """
    Calculate Mean Average Precision based on predicted and ground-truth
    detections at different thresholds.

    Returns:
        (MeanAveragePrecisionResult): The Mean Average Precision result.
    """
    result = self._compute(self._predictions_list, self._targets_list)

    small_predictions = []
    small_targets = []
    for predictions, targets in zip(self._predictions_list, self._targets_list):
        small_predictions.append(
            self._filter_detections_by_size(predictions, ObjectSizeCategory.SMALL)
        )
        small_targets.append(
            self._filter_detections_by_size(targets, ObjectSizeCategory.SMALL)
        )
    result.small_objects = self._compute(small_predictions, small_targets)

    medium_predictions = []
    medium_targets = []
    for predictions, targets in zip(self._predictions_list, self._targets_list):
        medium_predictions.append(
            self._filter_detections_by_size(predictions, ObjectSizeCategory.MEDIUM)
        )
        medium_targets.append(
            self._filter_detections_by_size(targets, ObjectSizeCategory.MEDIUM)
        )
    result.medium_objects = self._compute(medium_predictions, medium_targets)

    large_predictions = []
    large_targets = []
    for predictions, targets in zip(self._predictions_list, self._targets_list):
        large_predictions.append(
            self._filter_detections_by_size(predictions, ObjectSizeCategory.LARGE)
        )
        large_targets.append(
            self._filter_detections_by_size(targets, ObjectSizeCategory.LARGE)
        )
    result.large_objects = self._compute(large_predictions, large_targets)

    return result

`reset()` ¶

Reset the metric to its initial state, clearing all stored data.

Source code in supervision/metrics/mean_average_precision.py

def reset(self) -> None:
    """
    Reset the metric to its initial state, clearing all stored data.
    """
    self._predictions_list = []
    self._targets_list = []

`update(predictions, targets)` ¶

Add new predictions and targets to the metric, but do not compute the result.

Parameters:

Name	Type	Description	Default
`predictions` ¶	`Union[Detections, List[Detections]]`	The predicted detections.	required
`targets` ¶	`Union[Detections, List[Detections]]`	The ground-truth detections.	required

Returns:

Type	Description
`MeanAveragePrecision`	The updated metric instance.

Source code in supervision/metrics/mean_average_precision.py

def update(
    self,
    predictions: Union[Detections, List[Detections]],
    targets: Union[Detections, List[Detections]],
) -> MeanAveragePrecision:
    """
    Add new predictions and targets to the metric, but do not compute the result.

    Args:
        predictions (Union[Detections, List[Detections]]): The predicted detections.
        targets (Union[Detections, List[Detections]]): The ground-truth detections.

    Returns:
        (MeanAveragePrecision): The updated metric instance.
    """
    if not isinstance(predictions, list):
        predictions = [predictions]
    if not isinstance(targets, list):
        targets = [targets]

    if len(predictions) != len(targets):
        raise ValueError(
            f"The number of predictions ({len(predictions)}) and"
            f" targets ({len(targets)}) during the update must be the same."
        )

    if self._class_agnostic:
        predictions = deepcopy(predictions)
        targets = deepcopy(targets)

        for prediction in predictions:
            prediction.class_id[:] = -1
        for target in targets:
            target.class_id[:] = -1

    self._predictions_list.extend(predictions)
    self._targets_list.extend(targets)

    return self