Skip to content

Boxes Utils

supervision.detection.utils.boxes.move_boxes(xyxy, offset)

Parameters:

Name Type Description Default

xyxy

NDArray[float64]

An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]

required

offset

array

An array of shape (2,) containing offset values in format is [dx, dy].

required

Returns:

Type Description
NDArray[float64]

npt.NDArray[np.float64]: Repositioned bounding boxes.

Examples:

import numpy as np
import supervision as sv

xyxy = np.array([
    [10, 10, 20, 20],
    [30, 30, 40, 40]
])
offset = np.array([5, 5])

sv.move_boxes(xyxy=xyxy, offset=offset)
# array([
#    [15, 15, 25, 25],
#    [35, 35, 45, 45]
# ])
Source code in supervision/detection/utils/boxes.py
def move_boxes(
    xyxy: npt.NDArray[np.float64], offset: npt.NDArray[np.int32]
) -> npt.NDArray[np.float64]:
    """
    Parameters:
        xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the
            bounding boxes coordinates in format `[x1, y1, x2, y2]`
        offset (np.array): An array of shape `(2,)` containing offset values in format
            is `[dx, dy]`.

    Returns:
        npt.NDArray[np.float64]: Repositioned bounding boxes.

    Examples:
        ```python
        import numpy as np
        import supervision as sv

        xyxy = np.array([
            [10, 10, 20, 20],
            [30, 30, 40, 40]
        ])
        offset = np.array([5, 5])

        sv.move_boxes(xyxy=xyxy, offset=offset)
        # array([
        #    [15, 15, 25, 25],
        #    [35, 35, 45, 45]
        # ])
        ```
    """
    return xyxy + np.hstack([offset, offset])

supervision.detection.utils.boxes.scale_boxes(xyxy, factor)

Scale the dimensions of bounding boxes.

Parameters:

Name Type Description Default

xyxy

NDArray[float64]

An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]

required

factor

float

A float value representing the factor by which the box dimensions are scaled. A factor greater than 1 enlarges the boxes, while a factor less than 1 shrinks them.

required

Returns:

Type Description
NDArray[float64]

npt.NDArray[np.float64]: Scaled bounding boxes.

Examples:

import numpy as np
import supervision as sv

xyxy = np.array([
    [10, 10, 20, 20],
    [30, 30, 40, 40]
])

sv.scale_boxes(xyxy=xyxy, factor=1.5)
# array([
#    [ 7.5,  7.5, 22.5, 22.5],
#    [27.5, 27.5, 42.5, 42.5]
# ])
Source code in supervision/detection/utils/boxes.py
def scale_boxes(
    xyxy: npt.NDArray[np.float64], factor: float
) -> npt.NDArray[np.float64]:
    """
    Scale the dimensions of bounding boxes.

    Parameters:
        xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the
            bounding boxes coordinates in format `[x1, y1, x2, y2]`
        factor (float): A float value representing the factor by which the box
            dimensions are scaled. A factor greater than 1 enlarges the boxes, while a
            factor less than 1 shrinks them.

    Returns:
        npt.NDArray[np.float64]: Scaled bounding boxes.

    Examples:
        ```python
        import numpy as np
        import supervision as sv

        xyxy = np.array([
            [10, 10, 20, 20],
            [30, 30, 40, 40]
        ])

        sv.scale_boxes(xyxy=xyxy, factor=1.5)
        # array([
        #    [ 7.5,  7.5, 22.5, 22.5],
        #    [27.5, 27.5, 42.5, 42.5]
        # ])
        ```
    """
    centers = (xyxy[:, :2] + xyxy[:, 2:]) / 2
    new_sizes = (xyxy[:, 2:] - xyxy[:, :2]) * factor
    return np.concatenate((centers - new_sizes / 2, centers + new_sizes / 2), axis=1)

supervision.detection.utils.boxes.clip_boxes(xyxy, resolution_wh)

Clips bounding boxes coordinates to fit within the frame resolution.

Parameters:

Name Type Description Default

xyxy

ndarray

A numpy array of shape (N, 4) where each row corresponds to a bounding box in the format (x_min, y_min, x_max, y_max).

required

resolution_wh

Tuple[int, int]

A tuple of the form (width, height) representing the resolution of the frame.

required

Returns:

Type Description
ndarray

np.ndarray: A numpy array of shape (N, 4) where each row corresponds to a bounding box with coordinates clipped to fit within the frame resolution.

Examples:

import numpy as np
import supervision as sv

xyxy = np.array([
    [10, 20, 300, 200],
    [15, 25, 350, 450],
    [-10, -20, 30, 40]
])

sv.clip_boxes(xyxy=xyxy, resolution_wh=(320, 240))
# array([
#     [ 10,  20, 300, 200],
#     [ 15,  25, 320, 240],
#     [  0,   0,  30,  40]
# ])
Source code in supervision/detection/utils/boxes.py
def clip_boxes(xyxy: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarray:
    """
    Clips bounding boxes coordinates to fit within the frame resolution.

    Args:
        xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each
            row corresponds to a bounding box in
            the format `(x_min, y_min, x_max, y_max)`.
        resolution_wh (Tuple[int, int]): A tuple of the form
            `(width, height)` representing the resolution of the frame.

    Returns:
        np.ndarray: A numpy array of shape `(N, 4)` where each row
            corresponds to a bounding box with coordinates clipped to fit
            within the frame resolution.

    Examples:
        ```python
        import numpy as np
        import supervision as sv

        xyxy = np.array([
            [10, 20, 300, 200],
            [15, 25, 350, 450],
            [-10, -20, 30, 40]
        ])

        sv.clip_boxes(xyxy=xyxy, resolution_wh=(320, 240))
        # array([
        #     [ 10,  20, 300, 200],
        #     [ 15,  25, 320, 240],
        #     [  0,   0,  30,  40]
        # ])
        ```
    """
    result = np.copy(xyxy)
    width, height = resolution_wh
    result[:, [0, 2]] = result[:, [0, 2]].clip(0, width)
    result[:, [1, 3]] = result[:, [1, 3]].clip(0, height)
    return result

supervision.detection.utils.boxes.pad_boxes(xyxy, px, py=None)

Pads bounding boxes coordinates with a constant padding.

Parameters:

Name Type Description Default

xyxy

ndarray

A numpy array of shape (N, 4) where each row corresponds to a bounding box in the format (x_min, y_min, x_max, y_max).

required

px

int

The padding value to be added to both the left and right sides of each bounding box.

required

py

Optional[int]

The padding value to be added to both the top and bottom sides of each bounding box. If not provided, px will be used for both dimensions.

None

Returns:

Type Description
ndarray

np.ndarray: A numpy array of shape (N, 4) where each row corresponds to a bounding box with coordinates padded according to the provided padding values.

Examples:

import numpy as np
import supervision as sv

xyxy = np.array([
    [10, 20, 30, 40],
    [15, 25, 35, 45]
])

sv.pad_boxes(xyxy=xyxy, px=5, py=10)
# array([
#     [ 5, 10, 35, 50],
#     [10, 15, 40, 55]
# ])
Source code in supervision/detection/utils/boxes.py
def pad_boxes(xyxy: np.ndarray, px: int, py: int | None = None) -> np.ndarray:
    """
    Pads bounding boxes coordinates with a constant padding.

    Args:
        xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each
            row corresponds to a bounding box in the format
            `(x_min, y_min, x_max, y_max)`.
        px (int): The padding value to be added to both the left and right sides of
            each bounding box.
        py (Optional[int]): The padding value to be added to both the top and bottom
            sides of each bounding box. If not provided, `px` will be used for both
            dimensions.

    Returns:
        np.ndarray: A numpy array of shape `(N, 4)` where each row corresponds to a
            bounding box with coordinates padded according to the provided padding
            values.

    Examples:
        ```python
        import numpy as np
        import supervision as sv

        xyxy = np.array([
            [10, 20, 30, 40],
            [15, 25, 35, 45]
        ])

        sv.pad_boxes(xyxy=xyxy, px=5, py=10)
        # array([
        #     [ 5, 10, 35, 50],
        #     [10, 15, 40, 55]
        # ])
        ```
    """
    if py is None:
        py = px

    result = xyxy.copy()
    result[:, [0, 1]] -= [px, py]
    result[:, [2, 3]] += [px, py]

    return result

supervision.detection.utils.boxes.denormalize_boxes(xyxy, resolution_wh, normalization_factor=1.0)

Convert normalized bounding box coordinates to absolute pixel coordinates.

Multiplies each bounding box coordinate by image size and divides by normalization_factor, mapping values from normalized [0, normalization_factor] to absolute pixel values for a given resolution.

Parameters:

Name Type Description Default

xyxy

`numpy.ndarray`

Normalized bounding boxes of shape (N, 4), where each row is (x_min, y_min, x_max, y_max), values in [0, normalization_factor].

required

resolution_wh

`tuple[int, int]`

Target image resolution as (width, height).

required

normalization_factor

`float`

Maximum value of input coordinate range. Defaults to 1.0.

1.0

Returns:

Type Description
`numpy.ndarray`

Array of shape (N, 4) with absolute coordinates in (x_min, y_min, x_max, y_max) format.

Examples:

import numpy as np
import supervision as sv

xyxy = np.array([
    [0.1, 0.2, 0.5, 0.6],
    [0.3, 0.4, 0.7, 0.8],
    [0.2, 0.1, 0.6, 0.5]
])

sv.denormalize_boxes(xyxy, (1280, 720))
# array([
#     [128., 144., 640., 432.],
#     [384., 288., 896., 576.],
#     [256.,  72., 768., 360.]
# ])
import numpy as np
import supervision as sv

xyxy = np.array([
    [256., 128., 768., 640.]
])

sv.denormalize_boxes(xyxy, (1280, 720), normalization_factor=1024.0)
# array([
#     [320.,  90., 960., 450.]
# ])
Source code in supervision/detection/utils/boxes.py
def denormalize_boxes(
    xyxy: np.ndarray,
    resolution_wh: tuple[int, int],
    normalization_factor: float = 1.0,
) -> np.ndarray:
    """
    Convert normalized bounding box coordinates to absolute pixel coordinates.

    Multiplies each bounding box coordinate by image size and divides by
    `normalization_factor`, mapping values from normalized `[0, normalization_factor]`
    to absolute pixel values for a given resolution.

    Args:
        xyxy (`numpy.ndarray`): Normalized bounding boxes of shape `(N, 4)`,
            where each row is `(x_min, y_min, x_max, y_max)`, values in
            `[0, normalization_factor]`.
        resolution_wh (`tuple[int, int]`): Target image resolution as `(width, height)`.
        normalization_factor (`float`): Maximum value of input coordinate range.
            Defaults to `1.0`.

    Returns:
        (`numpy.ndarray`): Array of shape `(N, 4)` with absolute coordinates in
            `(x_min, y_min, x_max, y_max)` format.

    Examples:
        ```python
        import numpy as np
        import supervision as sv

        xyxy = np.array([
            [0.1, 0.2, 0.5, 0.6],
            [0.3, 0.4, 0.7, 0.8],
            [0.2, 0.1, 0.6, 0.5]
        ])

        sv.denormalize_boxes(xyxy, (1280, 720))
        # array([
        #     [128., 144., 640., 432.],
        #     [384., 288., 896., 576.],
        #     [256.,  72., 768., 360.]
        # ])
        ```

        ```
        import numpy as np
        import supervision as sv

        xyxy = np.array([
            [256., 128., 768., 640.]
        ])

        sv.denormalize_boxes(xyxy, (1280, 720), normalization_factor=1024.0)
        # array([
        #     [320.,  90., 960., 450.]
        # ])
        ```
    """
    width, height = resolution_wh
    result = xyxy.copy()

    result[:, [0, 2]] = (result[:, [0, 2]] * width) / normalization_factor
    result[:, [1, 3]] = (result[:, [1, 3]] * height) / normalization_factor

    return result

Comments