Source code for pipeline.Processing.compute

"""A module that defines how to compute certain columns.
"""
import typing
import numpy as np
import numpy.typing as npt
import pandas as pd


#: Associates a column name with a lambda function that takes as input the dataframe
#: of hits and returns the column computed
column_to_computation_fct: typing.Dict[
    str, typing.Callable[[pd.DataFrame], npt.NDArray]
] = {
    "r": lambda df: np.sqrt(df["x"] ** 2 + df["y"] ** 2),
    "phi": lambda df: np.arctan2(df["y"], df["x"]),
    "theta": lambda df: np.arctan2(df["r"], df["z"]),
    "eta": lambda df: -np.log(np.tan(df["theta"] / 2.0)),
}

#: Associates a column name the list of columns needed to compute it
#: ``x`` and ``y`` are already assumed to belong the dataframe so they are
#: not included in this dictionary
column_to_required_columns = {
    "theta": ["r"],
    "eta": ["theta"],
}


[docs]def compute_column(hits: pd.DataFrame, column: str): """Compute a column and store it in the dataframe of hits. Args: hits: dataframe of hits column: column to compute """ if column not in column_to_computation_fct: raise ValueError( f"The column `{column}` is not recognised as a column that can be " "computed. Columns that are computed are defined in " "`column_to_computation_fct` " ) else: # Compute the columns that are needed to compute the columns required_columns = column_to_required_columns.get(column, []) for required_column in required_columns: if required_column not in hits: compute_column(hits, required_column) hits.loc[:, column] = column_to_computation_fct[column](hits)
[docs]def compute_columns(hits: pd.DataFrame, columns: typing.List[str]): """Compute required columns to the dataframe of hits. Args: hits: dataframe of hits columns: columns to compute Notes: If the column is already in the dataframe, it will not be computed. """ for column in columns: if column not in hits: compute_column(hits, column)