Source code for pipeline.Processing.sortedwise_edges

"""A module that defines a way of defines the edges by sorting the hits by z-abscissa
(instead of by distance from the origin vertex).

This way, we define the edge orientation using a left to right convention.
"""
import numpy as np
import pandas as pd
import numba as nb
from montetracko.array_utils.groupby import group_lengths
from utils.tools.tgroupby import get_group_indices_from_group_lengths


@nb.jit(nopython=True, cache=True)
def get_edges_from_sorted_impl(
    edges: np.ndarray,
    hit_ids: np.ndarray,
    particle_group_indices: np.ndarray,
) -> None:
    """Fill the array of sorted-wise edges by grouping by hits belonging to the
    same particle, already sorted by z, and forming edge by linking "adjacent" hit IDs.

    Args:
        edges: Pre-allocated empty array of edges to fill
        hit_ids: List of hit IDs, sorted by particle IDs and z-coordinates.
        particle_group_indices: Start and end indices in ``hit_ids``
            that delimits hits that have same particle ID.
    """
    edge_idx = 0
    for start_idx, end_idx in zip(
        particle_group_indices[:-1], particle_group_indices[1:]
    ):
        n_edges = end_idx - start_idx - 1
        next_edge_idx = edge_idx + n_edges
        edges[0, edge_idx:next_edge_idx] = hit_ids[start_idx : end_idx - 1]
        edges[1, edge_idx:next_edge_idx] = hit_ids[start_idx + 1 : end_idx]
        edge_idx = next_edge_idx

    # Sanity check
    assert edge_idx == edges.shape[1]


[docs]def get_sortedwise_edges_impl( hit_ids: np.ndarray, particle_ids: np.ndarray, ) -> np.ndarray: """Get the sorted-wise edges Args: hit_ids: array of hit IDs, sorted by particle IDs particle_ids: z-sorted array of particle IDs for every hit Returns: Two-dimensional array where every column represent an edge. In this array, for every edge, a hit is referred to by its index in the dataframe of hits. """ n_hits_per_particles = group_lengths(particle_ids)[0] particle_group_indices = get_group_indices_from_group_lengths(n_hits_per_particles) # Create, fill and return array of edges n_edges = (n_hits_per_particles - 1).sum() edges = np.zeros(shape=(2, n_edges), dtype=int) get_edges_from_sorted_impl( edges=edges, hit_ids=hit_ids, particle_group_indices=particle_group_indices, ) return edges
[docs]def get_sortedwise_edges( hits: pd.DataFrame, drop_duplicates: bool = False ) -> np.ndarray: """Get edges by sorting the hits by ``z`` for every particle in the event, and linking the adjacent hits by edges. Args: hits: dataframe of hits, with columns ``particle_id`` and ``z`` drop_duplicates: whether to drop hits of a particle that belong to the same z Returns: Two-dimensional array where every column represent an edge. In this array, for every edge, a hit is referred to by its index in the dataframe of hits. """ # Exclude noise signal_hits = hits[hits.particle_id != 0] # Remove hits on the same z belonging to the same particle if drop_duplicates: signal_hits = signal_hits.drop_duplicates(subset=["particle_id", "z"]) # Sort by particle ID and z in order to group by particle ID and z in Numba signal_hits = signal_hits.sort_values(["particle_id", "z"]).reset_index( drop=False ) # produce `index`, the indices before sorting # Get edges return get_sortedwise_edges_impl( hit_ids=signal_hits["index"].to_numpy(), particle_ids=signal_hits["particle_id"].to_numpy(), )