Skip to content

Boxes Utils

supervision.detection.utils.boxes.move_boxes(xyxy: npt.NDArray[np.float64], offset: npt.NDArray[np.int32]) -> npt.NDArray[np.float64]

Parameters:

Name Type Description Default

xyxy

NDArray[float64]

An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]

required

offset

NDArray[int32]

An array of shape (2,) containing offset values in format is [dx, dy].

required

Returns:

Type Description
NDArray[float64]

Repositioned bounding boxes.

Examples:

>>> import numpy as np
>>> import supervision as sv
>>> xyxy = np.array([
...     [10, 10, 20, 20],
...     [30, 30, 40, 40]
... ])
>>> offset = np.array([5, 5])
>>> sv.move_boxes(xyxy=xyxy, offset=offset)
array([[15, 15, 25, 25],
       [35, 35, 45, 45]])
Source code in src/supervision/detection/utils/boxes.py
def move_boxes(
    xyxy: npt.NDArray[np.float64], offset: npt.NDArray[np.int32]
) -> npt.NDArray[np.float64]:
    """
    Parameters:
        xyxy: An array of shape `(n, 4)` containing the
            bounding boxes coordinates in format `[x1, y1, x2, y2]`
        offset: An array of shape `(2,)` containing offset values in format
            is `[dx, dy]`.

    Returns:
        Repositioned bounding boxes.

    Examples:
        ```pycon
        >>> import numpy as np
        >>> import supervision as sv
        >>> xyxy = np.array([
        ...     [10, 10, 20, 20],
        ...     [30, 30, 40, 40]
        ... ])
        >>> offset = np.array([5, 5])
        >>> sv.move_boxes(xyxy=xyxy, offset=offset)
        array([[15, 15, 25, 25],
               [35, 35, 45, 45]])

        ```
    """
    return xyxy + np.hstack([offset, offset])

supervision.detection.utils.boxes.scale_boxes(xyxy: npt.NDArray[np.float64], factor: float) -> npt.NDArray[np.float64]

Scale the dimensions of bounding boxes.

Parameters:

Name Type Description Default

xyxy

NDArray[float64]

An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]

required

factor

float

A float value representing the factor by which the box dimensions are scaled. A factor greater than 1 enlarges the boxes, while a factor less than 1 shrinks them.

required

Returns:

Type Description
NDArray[float64]

Scaled bounding boxes.

Examples:

>>> import numpy as np
>>> import supervision as sv
>>> xyxy = np.array([
...     [10, 10, 20, 20],
...     [30, 30, 40, 40]
... ])
>>> sv.scale_boxes(xyxy=xyxy, factor=1.5)
array([[ 7.5,  7.5, 22.5, 22.5],
       [27.5, 27.5, 42.5, 42.5]])
Source code in src/supervision/detection/utils/boxes.py
def scale_boxes(
    xyxy: npt.NDArray[np.float64], factor: float
) -> npt.NDArray[np.float64]:
    """
    Scale the dimensions of bounding boxes.

    Parameters:
        xyxy: An array of shape `(n, 4)` containing the
            bounding boxes coordinates in format `[x1, y1, x2, y2]`
        factor: A float value representing the factor by which the box
            dimensions are scaled. A factor greater than 1 enlarges the boxes, while a
            factor less than 1 shrinks them.

    Returns:
        Scaled bounding boxes.

    Examples:
        ```pycon
        >>> import numpy as np
        >>> import supervision as sv
        >>> xyxy = np.array([
        ...     [10, 10, 20, 20],
        ...     [30, 30, 40, 40]
        ... ])
        >>> sv.scale_boxes(xyxy=xyxy, factor=1.5)
        array([[ 7.5,  7.5, 22.5, 22.5],
               [27.5, 27.5, 42.5, 42.5]])

        ```
    """
    centers = (xyxy[:, :2] + xyxy[:, 2:]) / 2
    new_sizes = (xyxy[:, 2:] - xyxy[:, :2]) * factor
    return np.concatenate((centers - new_sizes / 2, centers + new_sizes / 2), axis=1)

supervision.detection.utils.boxes.clip_boxes(xyxy: npt.NDArray[np.number], resolution_wh: tuple[int, int]) -> npt.NDArray[np.number]

Clips bounding boxes coordinates to fit within the frame resolution.

Parameters:

Name Type Description Default

xyxy

NDArray[number]

A numpy array of shape (N, 4) where each row corresponds to a bounding box in the format (x_min, y_min, x_max, y_max).

required

resolution_wh

tuple[int, int]

A tuple of the form (width, height) representing the resolution of the frame.

required

Returns:

Type Description
NDArray[number]

A numpy array of shape (N, 4) where each row corresponds to a bounding box with coordinates clipped to fit within the frame resolution.

Examples:

>>> import numpy as np
>>> import supervision as sv
>>> xyxy = np.array([
...     [10, 20, 300, 200],
...     [15, 25, 350, 450],
...     [-10, -20, 30, 40]
... ])
>>> sv.clip_boxes(xyxy=xyxy, resolution_wh=(320, 240))
array([[ 10,  20, 300, 200],
       [ 15,  25, 320, 240],
       [  0,   0,  30,  40]])
Source code in src/supervision/detection/utils/boxes.py
def clip_boxes(
    xyxy: npt.NDArray[np.number],
    resolution_wh: tuple[int, int],
) -> npt.NDArray[np.number]:
    """
    Clips bounding boxes coordinates to fit within the frame resolution.

    Args:
        xyxy: A numpy array of shape `(N, 4)` where each
            row corresponds to a bounding box in
            the format `(x_min, y_min, x_max, y_max)`.
        resolution_wh: A tuple of the form
            `(width, height)` representing the resolution of the frame.

    Returns:
        A numpy array of shape `(N, 4)` where each row
            corresponds to a bounding box with coordinates clipped to fit
            within the frame resolution.

    Examples:
        ```pycon
        >>> import numpy as np
        >>> import supervision as sv
        >>> xyxy = np.array([
        ...     [10, 20, 300, 200],
        ...     [15, 25, 350, 450],
        ...     [-10, -20, 30, 40]
        ... ])
        >>> sv.clip_boxes(xyxy=xyxy, resolution_wh=(320, 240))
        array([[ 10,  20, 300, 200],
               [ 15,  25, 320, 240],
               [  0,   0,  30,  40]])

        ```
    """
    result: npt.NDArray[np.number] = np.copy(xyxy)
    width, height = resolution_wh
    result[:, [0, 2]] = result[:, [0, 2]].clip(0, width)
    result[:, [1, 3]] = result[:, [1, 3]].clip(0, height)
    return result

supervision.detection.utils.boxes.pad_boxes(xyxy: npt.NDArray[np.number], px: int, py: int | None = None) -> npt.NDArray[np.number]

Pads bounding boxes coordinates with a constant padding.

Parameters:

Name Type Description Default

xyxy

NDArray[number]

A numpy array of shape (N, 4) where each row corresponds to a bounding box in the format (x_min, y_min, x_max, y_max).

required

px

int

The padding value to be added to both the left and right sides of each bounding box.

required

py

int | None

The padding value to be added to both the top and bottom sides of each bounding box. If not provided, px will be used for both dimensions.

None

Returns:

Type Description
NDArray[number]

A numpy array of shape (N, 4) where each row corresponds to a bounding box with coordinates padded according to the provided padding values.

Examples:

>>> import numpy as np
>>> import supervision as sv
>>> xyxy = np.array([
...     [10, 20, 30, 40],
...     [15, 25, 35, 45]
... ])
>>> sv.pad_boxes(xyxy=xyxy, px=5, py=10)
array([[ 5, 10, 35, 50],
       [10, 15, 40, 55]])
Source code in src/supervision/detection/utils/boxes.py
def pad_boxes(
    xyxy: npt.NDArray[np.number],
    px: int,
    py: int | None = None,
) -> npt.NDArray[np.number]:
    """
    Pads bounding boxes coordinates with a constant padding.

    Args:
        xyxy: A numpy array of shape `(N, 4)` where each
            row corresponds to a bounding box in the format
            `(x_min, y_min, x_max, y_max)`.
        px: The padding value to be added to both the left and right sides of
            each bounding box.
        py: The padding value to be added to both the top and bottom
            sides of each bounding box. If not provided, `px` will be used for both
            dimensions.

    Returns:
        A numpy array of shape `(N, 4)` where each row corresponds to a
            bounding box with coordinates padded according to the provided padding
            values.

    Examples:
        ```pycon
        >>> import numpy as np
        >>> import supervision as sv
        >>> xyxy = np.array([
        ...     [10, 20, 30, 40],
        ...     [15, 25, 35, 45]
        ... ])
        >>> sv.pad_boxes(xyxy=xyxy, px=5, py=10)
        array([[ 5, 10, 35, 50],
               [10, 15, 40, 55]])

        ```
    """
    if py is None:
        py = px

    result = xyxy.copy()
    result[:, [0, 1]] -= [px, py]
    result[:, [2, 3]] += [px, py]

    return result

supervision.detection.utils.boxes.denormalize_boxes(xyxy: npt.NDArray[np.number], resolution_wh: tuple[int, int], normalization_factor: float = 1.0) -> npt.NDArray[np.number]

Convert normalized bounding box coordinates to absolute pixel coordinates.

Multiplies each bounding box coordinate by image size and divides by normalization_factor, mapping values from normalized [0, normalization_factor] to absolute pixel values for a given resolution.

Parameters:

Name Type Description Default

xyxy

NDArray[number]

Normalized bounding boxes of shape (N, 4), where each row is (x_min, y_min, x_max, y_max), values in [0, normalization_factor].

required

resolution_wh

tuple[int, int]

Target image resolution as (width, height).

required

normalization_factor

float

Maximum value of input coordinate range. Defaults to 1.0.

1.0

Returns:

Type Description
NDArray[number]

Array of shape (N, 4) with absolute coordinates in (x_min, y_min, x_max, y_max) format.

Examples:

>>> import numpy as np
>>> import supervision as sv
>>> xyxy = np.array([
...     [0.1, 0.2, 0.5, 0.6],
...     [0.3, 0.4, 0.7, 0.8],
...     [0.2, 0.1, 0.6, 0.5]
... ])
>>> sv.denormalize_boxes(xyxy, (1280, 720))
array([[128., 144., 640., 432.],
       [384., 288., 896., 576.],
       [256.,  72., 768., 360.]])
>>> xyxy = np.array([
...     [256., 128., 768., 640.]
... ])
>>> sv.denormalize_boxes(xyxy, (1280, 720), normalization_factor=1024.0)
array([[320.,  90., 960., 450.]])
Source code in src/supervision/detection/utils/boxes.py
def denormalize_boxes(
    xyxy: npt.NDArray[np.number],
    resolution_wh: tuple[int, int],
    normalization_factor: float = 1.0,
) -> npt.NDArray[np.number]:
    """
    Convert normalized bounding box coordinates to absolute pixel coordinates.

    Multiplies each bounding box coordinate by image size and divides by
    `normalization_factor`, mapping values from normalized `[0, normalization_factor]`
    to absolute pixel values for a given resolution.

    Args:
        xyxy: Normalized bounding boxes of shape `(N, 4)`,
            where each row is `(x_min, y_min, x_max, y_max)`, values in
            `[0, normalization_factor]`.
        resolution_wh: Target image resolution as `(width, height)`.
        normalization_factor: Maximum value of input coordinate range.
            Defaults to `1.0`.

    Returns:
        Array of shape `(N, 4)` with absolute coordinates in
            `(x_min, y_min, x_max, y_max)` format.

    Examples:
        ```pycon
        >>> import numpy as np
        >>> import supervision as sv
        >>> xyxy = np.array([
        ...     [0.1, 0.2, 0.5, 0.6],
        ...     [0.3, 0.4, 0.7, 0.8],
        ...     [0.2, 0.1, 0.6, 0.5]
        ... ])
        >>> sv.denormalize_boxes(xyxy, (1280, 720))
        array([[128., 144., 640., 432.],
               [384., 288., 896., 576.],
               [256.,  72., 768., 360.]])

        ```

        ```pycon
        >>> xyxy = np.array([
        ...     [256., 128., 768., 640.]
        ... ])
        >>> sv.denormalize_boxes(xyxy, (1280, 720), normalization_factor=1024.0)
        array([[320.,  90., 960., 450.]])

        ```
    """
    width, height = resolution_wh
    result = xyxy.copy()

    result[:, [0, 2]] = (result[:, [0, 2]] * width) / normalization_factor
    result[:, [1, 3]] = (result[:, [1, 3]] * height) / normalization_factor

    return result

Comments