Source code for pipeline.utils.loaderutils.pathandling

"""Utilies to handles datasets without loading them.
"""
from __future__ import annotations
import typing
import os
import numpy as np


[docs]def get_input_paths( input_dir: str, n_events: int | None = None, shuffle: bool = False, seed: int | None = None, ) -> typing.List[str]: """Get the paths of the datasets located in a given directory. Args: input_dir: input directory n_events: number of events to load shuffle: whether to shuffle the input paths (applied before selected the first ``n_events``) seed: seed for the shuffling **kwargs: Other keyword arguments passed to :py:func:`ModelBase.fetch_dataset` Returns: List of paths to the PyTorch Data objects """ all_input_paths = [ entry.path for entry in os.scandir(input_dir) if entry.is_file() and entry.name != "done" ] if shuffle: rng = np.random.default_rng(seed=seed) rng.shuffle(all_input_paths) if n_events is not None: all_input_paths = all_input_paths[:n_events] return all_input_paths