Mean Average Recall¶

MeanAverageRecall

`supervision.metrics.mean_average_recall.MeanAverageRecall` ¶

Bases: Metric

Mean Average Recall (mAR) measures how well the model detects and retrieves relevant objects by averaging recall over multiple IoU thresholds, classes and detection limits.

Intuitively, while Recall measures the ability to find all relevant objects, mAR narrows down how many detections are considered for each class. For example, mAR @ 100 considers the top 100 highest confidence detections for each class. mAR @ 1 considers only the highest confidence detection for each class.

Examples:

>>> import numpy as np
>>> import supervision as sv
>>> from supervision.metrics import MeanAverageRecall
>>> predictions = sv.Detections(
...     xyxy=np.array([[0, 0, 10, 10]]),
...     class_id=np.array([0]),
...     confidence=np.array([0.9])
... )
>>> targets = sv.Detections(
...     xyxy=np.array([[0, 0, 10, 10]]),
...     class_id=np.array([0])
... )
>>> mar_metric = MeanAverageRecall()
>>> mar_result = mar_metric.update(predictions, targets).compute()
>>> round(float(mar_result.mAR_at_100), 2)
1.0

example_plot

Source code in src/supervision/metrics/mean_average_recall.py

class MeanAverageRecall(Metric):
    """
    Mean Average Recall (mAR) measures how well the model detects
    and retrieves relevant objects by averaging recall over multiple
    IoU thresholds, classes and detection limits.

    Intuitively, while Recall measures the ability to find all relevant
    objects, mAR narrows down how many detections are considered for each
    class. For example, mAR @ 100 considers the top 100 highest confidence
    detections for each class. mAR @ 1 considers only the highest
    confidence detection for each class.

    Examples:
        ```pycon
        >>> import numpy as np
        >>> import supervision as sv
        >>> from supervision.metrics import MeanAverageRecall
        >>> predictions = sv.Detections(
        ...     xyxy=np.array([[0, 0, 10, 10]]),
        ...     class_id=np.array([0]),
        ...     confidence=np.array([0.9])
        ... )
        >>> targets = sv.Detections(
        ...     xyxy=np.array([[0, 0, 10, 10]]),
        ...     class_id=np.array([0])
        ... )
        >>> mar_metric = MeanAverageRecall()
        >>> mar_result = mar_metric.update(predictions, targets).compute()
        >>> round(float(mar_result.mAR_at_100), 2)
        1.0

        ```

    ![example_plot](
        https://media.roboflow.com/supervision-docs/metrics/mAR_plot_example.png
    ){ align=center width="800" }
    """

    def __init__(
        self,
        metric_target: MetricTarget = MetricTarget.BOXES,
    ):
        """
        Initialize the Mean Average Recall metric.

        Args:
            metric_target: The type of detection data to use.
        """
        self._metric_target = metric_target

        self._predictions_list: list[Detections] = []
        self._targets_list: list[Detections] = []

        self.max_detections = np.array([1, 10, 100])

    def reset(self) -> None:
        """
        Reset the metric to its initial state, clearing all stored data.
        """
        self._predictions_list = []
        self._targets_list = []

    def update(
        self,
        predictions: Detections | list[Detections],
        targets: Detections | list[Detections],
    ) -> MeanAverageRecall:
        """
        Add new predictions and targets to the metric, but do not compute the result.

        Args:
            predictions: The predicted detections.
            targets: The target detections.

        Returns:
            The updated metric instance.
        """
        if not isinstance(predictions, list):
            predictions = [predictions]
        if not isinstance(targets, list):
            targets = [targets]

        if len(predictions) != len(targets):
            raise ValueError(
                f"The number of predictions ({len(predictions)}) and"
                f" targets ({len(targets)}) during the update must be the same."
            )

        self._predictions_list.extend(predictions)
        self._targets_list.extend(targets)

        return self

    def compute(self) -> MeanAverageRecallResult:
        """
        Calculate the Mean Average Recall metric based on the stored predictions
        and ground-truth, at different IoU thresholds and maximum detection counts.

        Returns:
            The Mean Average Recall metric result.
        """
        result = self._compute(self._predictions_list, self._targets_list)

        small_predictions, small_targets = self._filter_predictions_and_targets_by_size(
            self._predictions_list, self._targets_list, ObjectSizeCategory.SMALL
        )
        result.small_objects = self._compute(small_predictions, small_targets)

        medium_predictions, medium_targets = (
            self._filter_predictions_and_targets_by_size(
                self._predictions_list, self._targets_list, ObjectSizeCategory.MEDIUM
            )
        )
        result.medium_objects = self._compute(medium_predictions, medium_targets)

        large_predictions, large_targets = self._filter_predictions_and_targets_by_size(
            self._predictions_list, self._targets_list, ObjectSizeCategory.LARGE
        )
        result.large_objects = self._compute(large_predictions, large_targets)

        return result

    def _compute(
        self, predictions_list: list[Detections], targets_list: list[Detections]
    ) -> MeanAverageRecallResult:
        iou_thresholds = np.linspace(0.5, 0.95, 10)
        stats: list[Any] = []

        for predictions, targets in zip(predictions_list, targets_list):
            prediction_contents = self._detections_content(predictions)
            target_contents = self._detections_content(targets)

            if len(targets) > 0:
                if len(predictions) == 0:
                    stats.append(
                        (
                            np.zeros((0, iou_thresholds.size), dtype=bool),
                            np.zeros((0,), dtype=int),
                            np.zeros((0,), dtype=int),
                            targets.class_id,
                        )
                    )

                else:
                    if self._metric_target == MetricTarget.BOXES:
                        iou = box_iou_batch(target_contents, prediction_contents)
                    elif self._metric_target == MetricTarget.MASKS:
                        iou = mask_iou_batch(target_contents, prediction_contents)
                    elif self._metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES:
                        iou = oriented_box_iou_batch(
                            target_contents, prediction_contents
                        )
                    else:
                        raise ValueError(
                            "Unsupported metric target for IoU calculation"
                        )

                    matches = self._match_detection_batch(
                        predictions.class_id
                        if predictions.class_id is not None
                        else np.array([]),
                        targets.class_id
                        if targets.class_id is not None
                        else np.array([]),
                        iou,
                        iou_thresholds,
                    )

                    sorted_indices = np.argsort(
                        -cast(npt.NDArray[np.float32], predictions.confidence)
                    )
                    stats.append(
                        (
                            matches[sorted_indices],
                            np.arange(len(predictions)),
                            cast(npt.NDArray[np.int32], predictions.class_id)[
                                sorted_indices
                            ],
                            cast(npt.NDArray[np.int32], targets.class_id),
                        )
                    )

        if not stats:
            return MeanAverageRecallResult(
                metric_target=self._metric_target,
                recall_scores=np.zeros(iou_thresholds.shape[0]),
                recall_per_class=np.zeros((0, iou_thresholds.shape[0])),
                max_detections=self.max_detections,
                iou_thresholds=iou_thresholds,
                matched_classes=np.array([], dtype=int),
                small_objects=None,
                medium_objects=None,
                large_objects=None,
            )

        concatenated_stats = [np.concatenate(items, 0) for items in zip(*stats)]
        recall_scores_per_k, recall_per_class, unique_classes = (
            self._compute_average_recall_for_classes(*concatenated_stats)
        )

        return MeanAverageRecallResult(
            metric_target=self._metric_target,
            recall_scores=recall_scores_per_k,
            recall_per_class=recall_per_class,
            max_detections=self.max_detections,
            iou_thresholds=iou_thresholds,
            matched_classes=unique_classes,
            small_objects=None,
            medium_objects=None,
            large_objects=None,
        )

    def _compute_average_recall_for_classes(
        self,
        matches: npt.NDArray[np.bool_],
        prediction_indices: npt.NDArray[np.int32],
        prediction_class_ids: npt.NDArray[np.int32],
        true_class_ids: npt.NDArray[np.int32],
    ) -> tuple[
        npt.NDArray[np.float64],
        npt.NDArray[np.float64],
        npt.NDArray[np.int32],
    ]:
        unique_classes, class_counts = np.unique(true_class_ids, return_counts=True)

        recalls_at_k = []
        for max_detections in self.max_detections:
            # Shape: PxTh,P,C,C -> CxThx3
            confusion_matrix = self._compute_confusion_matrix(
                matches[prediction_indices < max_detections],
                prediction_class_ids[prediction_indices < max_detections],
                unique_classes,
                class_counts,
            )

            # Shape: CxThx3 -> CxTh
            recall_per_class = self._compute_recall(confusion_matrix)
            recalls_at_k.append(recall_per_class)

        # Shape: KxCxTh -> KxC
        recalls_at_k = np.array(recalls_at_k)
        average_recall_per_class = np.mean(recalls_at_k, axis=2)

        # Shape: KxC -> K
        recall_scores = np.mean(average_recall_per_class, axis=1)

        return recall_scores, recall_per_class, unique_classes

    @staticmethod
    def _match_detection_batch(
        predictions_classes: npt.NDArray[np.int32],
        target_classes: npt.NDArray[np.int32],
        iou: npt.NDArray[np.float32],
        iou_thresholds: npt.NDArray[np.float32],
    ) -> npt.NDArray[np.bool_]:
        num_predictions, num_iou_levels = (
            predictions_classes.shape[0],
            iou_thresholds.shape[0],
        )
        correct = np.zeros((num_predictions, num_iou_levels), dtype=bool)
        correct_class = target_classes[:, None] == predictions_classes

        for i, iou_level in enumerate(iou_thresholds):
            matched_indices = np.where((iou >= iou_level) & correct_class)

            if matched_indices[0].shape[0]:
                combined_indices = np.stack(matched_indices, axis=1)
                iou_values = iou[matched_indices][:, None]
                matches = np.hstack([combined_indices, iou_values])

                if matched_indices[0].shape[0] > 1:
                    matches = matches[matches[:, 2].argsort()[::-1]]
                    matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
                    matches = matches[np.unique(matches[:, 0], return_index=True)[1]]

                correct[matches[:, 1].astype(int), i] = True
        result_correct: npt.NDArray[np.bool_] = correct
        return result_correct

    @staticmethod
    def _compute_confusion_matrix(
        sorted_matches: npt.NDArray[np.bool_],
        sorted_prediction_class_ids: npt.NDArray[np.int32],
        unique_classes: npt.NDArray[np.int32],
        class_counts: npt.NDArray[np.int32],
    ) -> npt.NDArray[np.float64]:
        """
        Compute the confusion matrix for each class and IoU threshold.

        Assumes the matches and prediction_class_ids are sorted by confidence
        in descending order.

        Args:
            sorted_matches: shape (P, Th), that is True
                if the prediction is a true positive at the given IoU threshold.
            sorted_prediction_class_ids: shape (P,), containing
                the class id for each prediction.
            unique_classes: shape (C,), containing the unique
                class ids.
            class_counts: shape (C,), containing the number
                of true instances for each class.
            max_detections: The maximum number of detections to
                consider for each class. Extra detections are considered false
                positives. By default, all detections are considered.

        Returns:
            shape (C, Th, 3), containing the true positives, false
                positives, and false negatives for each class and IoU threshold.
        """
        num_thresholds = sorted_matches.shape[1]
        num_classes = unique_classes.shape[0]

        confusion_matrix: npt.NDArray[np.float64] = np.zeros(
            (num_classes, num_thresholds, 3), dtype=np.float64
        )
        for class_idx, class_id in enumerate(unique_classes):
            is_class = sorted_prediction_class_ids == class_id
            num_true = class_counts[class_idx]
            num_predictions = is_class.sum()

            if num_predictions == 0:
                true_positives = np.zeros(num_thresholds)
                false_positives = np.zeros(num_thresholds)
                false_negatives = np.full(num_thresholds, num_true)
            elif num_true == 0:
                true_positives = np.zeros(num_thresholds)
                false_positives = np.full(num_thresholds, num_predictions)
                false_negatives = np.zeros(num_thresholds)
            else:
                limited_matches = sorted_matches[is_class]
                true_positives = limited_matches.sum(0)

                false_positives = (1 - limited_matches).sum(0)
                false_negatives = num_true - true_positives

            confusion_matrix[class_idx] = np.stack(
                [true_positives, false_positives, false_negatives], axis=1
            )

        result_confusion_matrix: npt.NDArray[np.float64] = confusion_matrix
        return result_confusion_matrix

    @staticmethod
    def _compute_recall(
        confusion_matrix: npt.NDArray[np.float64],
    ) -> npt.NDArray[np.float64]:
        """
        Broadcastable function, computing the recall from the confusion matrix.

        Args:
            confusion_matrix: shape (N, ..., 3), where the last dimension
                contains the true positives, false positives, and false negatives.

        Returns:
            shape (N, ...), containing the recall for each element.
        """
        if not confusion_matrix.shape[-1] == 3:
            raise ValueError(
                f"Confusion matrix must have shape (..., 3), got "
                f"{confusion_matrix.shape}"
            )
        true_positives = confusion_matrix[..., 0]
        false_negatives = confusion_matrix[..., 2]

        denominator = true_positives + false_negatives
        recall = np.where(denominator == 0, 0, true_positives / denominator)

        result_recall: npt.NDArray[np.float64] = recall
        return result_recall

    def _detections_content(self, detections: Detections) -> npt.NDArray[Any]:
        """Return boxes, masks or oriented bounding boxes from detections."""
        if self._metric_target == MetricTarget.BOXES:
            result_boxes: npt.NDArray[np.float32] = detections.xyxy
            return result_boxes
        if self._metric_target == MetricTarget.MASKS:
            if detections.mask is not None:
                result_masks: npt.NDArray[np.bool_] = detections.mask
                return result_masks
            return self._make_empty_content()
        if self._metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES:
            obb = detections.data.get(ORIENTED_BOX_COORDINATES)
            if obb is not None and len(obb) > 0:
                result_obb: npt.NDArray[np.float32] = np.array(obb, dtype=np.float32)
                return result_obb
            return self._make_empty_content()
        raise ValueError(f"Invalid metric target: {self._metric_target}")

    def _make_empty_content(self) -> npt.NDArray[Any]:
        if self._metric_target == MetricTarget.BOXES:
            empty_boxes: npt.NDArray[np.float32] = np.empty((0, 4), dtype=np.float32)
            return empty_boxes

        if self._metric_target == MetricTarget.MASKS:
            empty_masks: npt.NDArray[np.bool_] = np.empty((0, 0, 0), dtype=bool)
            return empty_masks

        if self._metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES:
            empty_obb: npt.NDArray[np.float32] = np.empty((0, 4, 2), dtype=np.float32)
            return empty_obb

        raise ValueError(f"Invalid metric target: {self._metric_target}")

    def _filter_detections_by_size(
        self, detections: Detections, size_category: ObjectSizeCategory
    ) -> Detections:
        """Return a copy of detections with contents filtered by object size."""
        new_detections = deepcopy(detections)
        if detections.is_empty() or size_category == ObjectSizeCategory.ANY:
            return new_detections

        sizes = get_detection_size_category(new_detections, self._metric_target)
        size_mask = sizes == size_category.value

        new_detections.xyxy = new_detections.xyxy[size_mask]
        if new_detections.mask is not None:
            new_detections.mask = new_detections.mask[size_mask]
        if new_detections.class_id is not None:
            new_detections.class_id = new_detections.class_id[size_mask]
        if new_detections.confidence is not None:
            new_detections.confidence = new_detections.confidence[size_mask]
        if new_detections.tracker_id is not None:
            new_detections.tracker_id = new_detections.tracker_id[size_mask]
        if new_detections.data is not None:
            for key, value in new_detections.data.items():
                new_detections.data[key] = np.array(value)[size_mask]

        return new_detections

    def _filter_predictions_and_targets_by_size(
        self,
        predictions_list: list[Detections],
        targets_list: list[Detections],
        size_category: ObjectSizeCategory,
    ) -> tuple[list[Detections], list[Detections]]:
        """
        Filter predictions and targets by object size category.
        """
        new_predictions_list = []
        new_targets_list = []
        for predictions, targets in zip(predictions_list, targets_list):
            new_predictions_list.append(
                self._filter_detections_by_size(predictions, size_category)
            )
            new_targets_list.append(
                self._filter_detections_by_size(targets, size_category)
            )
        return new_predictions_list, new_targets_list

Functions¶

`init(metric_target: MetricTarget = MetricTarget.BOXES)` ¶

Initialize the Mean Average Recall metric.

Parameters:

Name	Type	Description	Default
`metric_target` ¶	`MetricTarget`	The type of detection data to use.	`BOXES`

Source code in src/supervision/metrics/mean_average_recall.py

def __init__(
    self,
    metric_target: MetricTarget = MetricTarget.BOXES,
):
    """
    Initialize the Mean Average Recall metric.

    Args:
        metric_target: The type of detection data to use.
    """
    self._metric_target = metric_target

    self._predictions_list: list[Detections] = []
    self._targets_list: list[Detections] = []

    self.max_detections = np.array([1, 10, 100])

`compute() -> MeanAverageRecallResult` ¶

Calculate the Mean Average Recall metric based on the stored predictions and ground-truth, at different IoU thresholds and maximum detection counts.

Returns:

Type	Description
`MeanAverageRecallResult`	The Mean Average Recall metric result.

Source code in src/supervision/metrics/mean_average_recall.py

def compute(self) -> MeanAverageRecallResult:
    """
    Calculate the Mean Average Recall metric based on the stored predictions
    and ground-truth, at different IoU thresholds and maximum detection counts.

    Returns:
        The Mean Average Recall metric result.
    """
    result = self._compute(self._predictions_list, self._targets_list)

    small_predictions, small_targets = self._filter_predictions_and_targets_by_size(
        self._predictions_list, self._targets_list, ObjectSizeCategory.SMALL
    )
    result.small_objects = self._compute(small_predictions, small_targets)

    medium_predictions, medium_targets = (
        self._filter_predictions_and_targets_by_size(
            self._predictions_list, self._targets_list, ObjectSizeCategory.MEDIUM
        )
    )
    result.medium_objects = self._compute(medium_predictions, medium_targets)

    large_predictions, large_targets = self._filter_predictions_and_targets_by_size(
        self._predictions_list, self._targets_list, ObjectSizeCategory.LARGE
    )
    result.large_objects = self._compute(large_predictions, large_targets)

    return result

`reset() -> None` ¶

Reset the metric to its initial state, clearing all stored data.

Source code in src/supervision/metrics/mean_average_recall.py

def reset(self) -> None:
    """
    Reset the metric to its initial state, clearing all stored data.
    """
    self._predictions_list = []
    self._targets_list = []

`update(predictions: Detections | list[Detections], targets: Detections | list[Detections]) -> MeanAverageRecall` ¶

Add new predictions and targets to the metric, but do not compute the result.

Parameters:

Name	Type	Description	Default
`predictions` ¶	`Detections \| list[Detections]`	The predicted detections.	required
`targets` ¶	`Detections \| list[Detections]`	The target detections.	required

Returns:

Type	Description
`MeanAverageRecall`	The updated metric instance.

Source code in src/supervision/metrics/mean_average_recall.py

def update(
    self,
    predictions: Detections | list[Detections],
    targets: Detections | list[Detections],
) -> MeanAverageRecall:
    """
    Add new predictions and targets to the metric, but do not compute the result.

    Args:
        predictions: The predicted detections.
        targets: The target detections.

    Returns:
        The updated metric instance.
    """
    if not isinstance(predictions, list):
        predictions = [predictions]
    if not isinstance(targets, list):
        targets = [targets]

    if len(predictions) != len(targets):
        raise ValueError(
            f"The number of predictions ({len(predictions)}) and"
            f" targets ({len(targets)}) during the update must be the same."
        )

    self._predictions_list.extend(predictions)
    self._targets_list.extend(targets)

    return self