Source code for pipeline.Processing.compute

"""A module that defines how to compute certain columns.
"""
import typing
import numpy as np
import numpy.typing as npt
import pandas as pd


#: Associates a column name with a lambda function that takes as input the dataframe
#: of hits and returns the column computed
column_to_computation_fct: typing.Dict[
    str, typing.Callable[[pd.DataFrame], npt.NDArray]
] = {
    "r": lambda df: np.sqrt(df["x"] ** 2 + df["y"] ** 2),
    "phi": lambda df: np.arctan2(df["y"], df["x"]),
    "theta": lambda df: np.arctan2(df["r"], df["z"]),
    "eta": lambda df: -np.log(np.tan(df["theta"] / 2.0)),
}

#: Associates a column name the list of columns needed to compute it
#: ``x`` and ``y`` are already assumed to belong the dataframe so they are
#: not included in this dictionary
column_to_required_columns = {
    "theta": ["r"],
    "eta": ["theta"],
}


[docs]def compute_column(hits: pd.DataFrame, column: str):
    """Compute a column and store it in the dataframe of hits.

    Args:
        hits: dataframe of hits
        column: column to compute
    """
    if column not in column_to_computation_fct:
        raise ValueError(
            f"The column `{column}` is not recognised as a column that can be "
            "computed. Columns that are computed are defined in "
            "`column_to_computation_fct` "
        )
    else:
        # Compute the columns that are needed to compute the columns
        required_columns = column_to_required_columns.get(column, [])
        for required_column in required_columns:
            if required_column not in hits:
                compute_column(hits, required_column)

        hits.loc[:, column] = column_to_computation_fct[column](hits)


[docs]def compute_columns(hits: pd.DataFrame, columns: typing.List[str]):
    """Compute required columns to the dataframe of hits.

    Args:
        hits: dataframe of hits
        columns: columns to compute

    Notes:
        If the column is already in the dataframe, it will not be computed.
    """
    for column in columns:
        if column not in hits:
            compute_column(hits, column)