Source code for pipeline.utils.loaderutils.pathandling
"""Utilies to handles datasets without loading them.
"""
from __future__ import annotations
import typing
import os
import numpy as np
[docs]def get_input_paths(
input_dir: str,
n_events: int | None = None,
shuffle: bool = False,
seed: int | None = None,
) -> typing.List[str]:
"""Get the paths of the datasets located in a given directory.
Args:
input_dir: input directory
n_events: number of events to load
shuffle: whether to shuffle the input paths (applied before
selected the first ``n_events``)
seed: seed for the shuffling
**kwargs: Other keyword arguments passed to
:py:func:`ModelBase.fetch_dataset`
Returns:
List of paths to the PyTorch Data objects
"""
all_input_paths = [
entry.path
for entry in os.scandir(input_dir)
if entry.is_file() and entry.name != "done"
]
if shuffle:
rng = np.random.default_rng(seed=seed)
rng.shuffle(all_input_paths)
if n_events is not None:
all_input_paths = all_input_paths[:n_events]
return all_input_paths