Source code for pipeline.Processing.planewise_edges

"""A module that defines a way of defines the edges by sorting the hits by plane
(instead of by distance from the origin vertex).

This way, we define the edge orientation using a left to right convention. However,
if a plane a multiple hits for the same particle, the edges can be not well defined.
"""
import typing
import numpy as np
import numpy.typing as npt
import pandas as pd
import numba as nb
from montetracko.array_utils.groupby import group_lengths
from utils.tools.tgroupby import get_group_indices_from_group_lengths


@nb.jit(nopython=True, cache=True)
def get_edges_from_sorted_impl(
    hit_ids: npt.NDArray,
    particle_ids: npt.NDArray,
    plane_ids: npt.NDArray,
) -> typing.List[npt.NDArray]:
    """Fill the array of plane-wise edges by grouping by particle ID already sorted
    by plane, and forming edge by linking "adjacent" planes.

    Args:
        edges: Pre-allocated empty array of edges to fill
        hit_ids: List of hit IDs, sorted by particle IDs and planes
        particle_group_indices: Start and end indices in ``hit_ids``
            that delimits hits that have same particle ID.
    """
    n_hits_per_particles = group_lengths(particle_ids)[0]
    particle_group_indices = get_group_indices_from_group_lengths(n_hits_per_particles)

    list_edges = [np.zeros(dtype=hit_ids.dtype, shape=(2, 1)) for _ in range(0)]

    for particle_start_idx, particle_end_idx in zip(
        particle_group_indices[:-1], particle_group_indices[1:]
    ):
        particle_hit_ids = hit_ids[particle_start_idx:particle_end_idx]
        n_planes_per_hits = group_lengths(
            plane_ids[particle_start_idx:particle_end_idx]
        )[0]
        plane_group_indices = get_group_indices_from_group_lengths(n_planes_per_hits)

        n_edges = np.sum(np.multiply(n_planes_per_hits[:-1], n_planes_per_hits[1:]))
        edges = np.full(shape=(2, n_edges), dtype=hit_ids.dtype, fill_value=-1)
        edge_idx = 0
        for plane_group in range(
            len(plane_group_indices) - 2
        ):  # up second to last plane
            plane_start_idx = plane_group_indices[plane_group]
            plane_end_idx = plane_group_indices[plane_group + 1]
            next_plane_end_idx = plane_group_indices[plane_group + 2]
            for hit_idx in range(plane_start_idx, plane_end_idx):
                n_edges_to_add = next_plane_end_idx - plane_end_idx
                edges[0, edge_idx : edge_idx + n_edges_to_add] = particle_hit_ids[
                    hit_idx
                ]
                edges[1, edge_idx : edge_idx + n_edges_to_add] = particle_hit_ids[
                    plane_end_idx:next_plane_end_idx
                ]
                edge_idx += n_edges_to_add

        # Sanity check
        assert edge_idx == n_edges
        list_edges.append(edges)

    return list_edges


[docs]def get_planewise_edges_impl( hit_ids: npt.NDArray, particle_ids: npt.NDArray, plane_ids: npt.NDArray, ) -> npt.NDArray: """Get the plane-wise edges Args: hit_ids: array of hit IDs, sorted by particle IDs particle_ids: Sorted array of particle IDs for every hit plane_ids: Sorted array of plane IDs for every hit Returns: Two-dimensional array where every column represent an edge. In this array, for every edge, a hit is referred to by its index in the dataframe of hits. """ # Create, fill and return array of edges list_edges = get_edges_from_sorted_impl( hit_ids=hit_ids, particle_ids=particle_ids, plane_ids=plane_ids, ) if list_edges: return np.hstack(list_edges) else: return np.zeros(shape=(2, 0), dtype=hit_ids.dtype)
[docs]def get_planewise_edges( hits: pd.DataFrame, drop_duplicates: bool = False ) -> npt.NDArray: """Get edges by sorting the hits by plane number for every particle in the event, and linking the adjacent hits by edges. Args: hits: dataframe of hits, with columns ``particle_id`` and ``plane`` drop_duplicates: whether to drop hits of a particle that belong to the same plane Returns: Two-dimensional array where every column represent an edge. In this array, for every edge, a hit is referred to by its index in the dataframe of hits. """ # Exclude noise signal_hits = hits[hits.particle_id != 0] # Remove hits on the same plane belonging to the same particle if drop_duplicates: signal_hits = signal_hits.drop_duplicates(subset=["particle_id", "plane"]) # Sort by particle ID and plane in order to group by particle ID and plane in Numba signal_hits = signal_hits.sort_values(["particle_id", "plane"]).reset_index( drop=False ) # produce `index`, the indices before sorting # Get edges return get_planewise_edges_impl( hit_ids=signal_hits["index"].to_numpy(), particle_ids=signal_hits["particle_id"].to_numpy(), plane_ids=signal_hits["plane"].to_numpy(), )
[docs]def get_planewise_custom_edges( hits: pd.DataFrame, grouped_planes: typing.Iterable[typing.Collection[int]] | None = None, ) -> npt.NDArray: if grouped_planes is None: grouped_planes = [ # xx connections (0, 3, 4, 7, 8, 11), # xu connections (0, 1), (4, 5), (8, 9), # vx connections (2, 3), (6, 7), (10, 11), ] return np.unique( np.concatenate( [ get_planewise_edges(hits=hits[hits["plane"].isin(planes)]) for planes in grouped_planes ], axis=1, ), axis=1, )