Source code for pipeline.Processing.sortedwise_edges

"""A module that defines a way of defines the edges by sorting the hits by z-abscissa
(instead of by distance from the origin vertex).

This way, we define the edge orientation using a left to right convention.
"""
import numpy as np
import pandas as pd
import numba as nb
from montetracko.array_utils.groupby import group_lengths
from utils.tools.tgroupby import get_group_indices_from_group_lengths


@nb.jit(nopython=True, cache=True)
def get_edges_from_sorted_impl(
    edges: np.ndarray,
    hit_ids: np.ndarray,
    particle_group_indices: np.ndarray,
) -> None:
    """Fill the array of sorted-wise edges by grouping by hits belonging to the
    same particle, already sorted by z, and forming edge by linking "adjacent" hit IDs.

    Args:
        edges: Pre-allocated empty array of edges to fill
        hit_ids: List of hit IDs, sorted by particle IDs and z-coordinates.
        particle_group_indices: Start and end indices in ``hit_ids``
            that delimits hits that have same particle ID.
    """
    edge_idx = 0
    for start_idx, end_idx in zip(
        particle_group_indices[:-1], particle_group_indices[1:]
    ):
        n_edges = end_idx - start_idx - 1
        next_edge_idx = edge_idx + n_edges
        edges[0, edge_idx:next_edge_idx] = hit_ids[start_idx : end_idx - 1]
        edges[1, edge_idx:next_edge_idx] = hit_ids[start_idx + 1 : end_idx]
        edge_idx = next_edge_idx

    # Sanity check
    assert edge_idx == edges.shape[1]


[docs]def get_sortedwise_edges_impl(
    hit_ids: np.ndarray,
    particle_ids: np.ndarray,
) -> np.ndarray:
    """Get the sorted-wise edges
    
    Args:
        hit_ids: array of hit IDs, sorted by particle IDs
        particle_ids: z-sorted array of particle IDs for every hit
    
    Returns:
        Two-dimensional array where every column represent an edge. In this array,
        for every edge, a hit is referred to by its index in the dataframe of hits.
    """
    n_hits_per_particles = group_lengths(particle_ids)[0]
    particle_group_indices = get_group_indices_from_group_lengths(n_hits_per_particles)

    # Create, fill and return array of edges
    n_edges = (n_hits_per_particles - 1).sum()
    edges = np.zeros(shape=(2, n_edges), dtype=int)
    get_edges_from_sorted_impl(
        edges=edges,
        hit_ids=hit_ids,
        particle_group_indices=particle_group_indices,
    )
    return edges


[docs]def get_sortedwise_edges(
    hits: pd.DataFrame, drop_duplicates: bool = False
) -> np.ndarray:
    """Get edges by sorting the hits by ``z`` for every particle in the event,
    and linking the adjacent hits by edges.

    Args:
        hits: dataframe of hits, with columns ``particle_id`` and ``z``
        drop_duplicates: whether to drop hits of a particle that belong to the same
            z

    Returns:
        Two-dimensional array where every column represent an edge. In this array,
        for every edge, a hit is referred to by its index in the dataframe of hits.
    """
    # Exclude noise
    signal_hits = hits[hits.particle_id != 0]

    # Remove hits on the same z belonging to the same particle
    if drop_duplicates:
        signal_hits = signal_hits.drop_duplicates(subset=["particle_id", "z"])

    # Sort by particle ID and z in order to group by particle ID and z in Numba
    signal_hits = signal_hits.sort_values(["particle_id", "z"]).reset_index(
        drop=False
    )  # produce `index`, the indices before sorting

    # Get edges
    return get_sortedwise_edges_impl(
        hit_ids=signal_hits["index"].to_numpy(),
        particle_ids=signal_hits["particle_id"].to_numpy(),
    )