Skip to content

File Utils

supervision.utils.file.list_files_with_extensions(directory: str | Path, extensions: list[str] | None = None) -> list[Path]

List files in a directory with specified extensions or all files if no extensions are provided.

Parameters:

Name Type Description Default

directory

str | Path

The directory path as a string or Path object.

required

extensions

list[str] | None

A list of file extensions to filter. Extensions may be supplied with or without a leading dot (e.g. 'jpg' and '.jpg' are equivalent). Matching is case-insensitive. Multi-part extensions are supported (e.g. 'tar.gz'). Pass None (default) to list all files; pass an empty list to return no files.

None

Returns:

Type Description
list[Path]

A list of Path objects for the matching files.

Examples:

>>> import supervision as sv
>>> from pathlib import Path
>>> import tempfile
>>> # Keep a reference to the directory object
>>> tmp_dir_obj = tempfile.TemporaryDirectory()
>>> tmpdir = tmp_dir_obj.name
>>> # Create test files
>>> (Path(tmpdir) / "test1.txt").touch()
>>> (Path(tmpdir) / "test2.md").touch()
>>> (Path(tmpdir) / "test3.py").touch()
>>> # List all files in the directory
>>> files = sv.list_files_with_extensions(directory=tmpdir)
>>> len(files)
3
>>> # Leading dot accepted; matching is case-insensitive
>>> files = sv.list_files_with_extensions(
...     directory=tmpdir, extensions=['.txt', 'md'])
>>> len(files)
2
Source code in src/supervision/utils/file.py
def list_files_with_extensions(
    directory: str | Path, extensions: list[str] | None = None
) -> list[Path]:
    """
    List files in a directory with specified extensions or
        all files if no extensions are provided.

    Args:
        directory: The directory path as a string or Path object.
        extensions: A list of file extensions to filter. Extensions may be
            supplied with or without a leading dot (e.g. ``'jpg'`` and
            ``'.jpg'`` are equivalent). Matching is case-insensitive.
            Multi-part extensions are supported (e.g. ``'tar.gz'``). Pass
            ``None`` (default) to list all files; pass an empty list to
            return no files.

    Returns:
        A list of Path objects for the matching files.

    Examples:
        ```pycon
        >>> import supervision as sv
        >>> from pathlib import Path
        >>> import tempfile
        >>> # Keep a reference to the directory object
        >>> tmp_dir_obj = tempfile.TemporaryDirectory()
        >>> tmpdir = tmp_dir_obj.name
        >>> # Create test files
        >>> (Path(tmpdir) / "test1.txt").touch()
        >>> (Path(tmpdir) / "test2.md").touch()
        >>> (Path(tmpdir) / "test3.py").touch()
        >>> # List all files in the directory
        >>> files = sv.list_files_with_extensions(directory=tmpdir)
        >>> len(files)
        3
        >>> # Leading dot accepted; matching is case-insensitive
        >>> files = sv.list_files_with_extensions(
        ...     directory=tmpdir, extensions=['.txt', 'md'])
        >>> len(files)
        2

        ```
    """

    directory = Path(directory)
    files_with_extensions: list[Path] = []

    if extensions is not None:
        candidates = [p for p in directory.glob("*") if p.is_file()]
        path_index: dict[Path, set[str]] = {}
        for path in candidates:
            suffixes = [suffix.lower().lstrip(".") for suffix in path.suffixes]
            path_index[path] = {
                ".".join(suffixes[index:]) for index in range(len(suffixes))
            }
        seen_paths: set[Path] = set()
        for ext in extensions:
            normalized_extension = ext.lower().lstrip(".")
            if not normalized_extension:
                continue
            for path, path_extensions in path_index.items():
                if path not in seen_paths and normalized_extension in path_extensions:
                    files_with_extensions.append(path)
                    seen_paths.add(path)
    else:
        files_with_extensions.extend(directory.glob("*"))

    return files_with_extensions

Comments