Source code for pipeline.Processing.sortedwise_edges
"""A module that defines a way of defines the edges by sorting the hits by z-abscissa
(instead of by distance from the origin vertex).
This way, we define the edge orientation using a left to right convention.
"""
import numpy as np
import pandas as pd
import numba as nb
from montetracko.array_utils.groupby import group_lengths
from utils.tools.tgroupby import get_group_indices_from_group_lengths
@nb.jit(nopython=True, cache=True)
def get_edges_from_sorted_impl(
edges: np.ndarray,
hit_ids: np.ndarray,
particle_group_indices: np.ndarray,
) -> None:
"""Fill the array of sorted-wise edges by grouping by hits belonging to the
same particle, already sorted by z, and forming edge by linking "adjacent" hit IDs.
Args:
edges: Pre-allocated empty array of edges to fill
hit_ids: List of hit IDs, sorted by particle IDs and z-coordinates.
particle_group_indices: Start and end indices in ``hit_ids``
that delimits hits that have same particle ID.
"""
edge_idx = 0
for start_idx, end_idx in zip(
particle_group_indices[:-1], particle_group_indices[1:]
):
n_edges = end_idx - start_idx - 1
next_edge_idx = edge_idx + n_edges
edges[0, edge_idx:next_edge_idx] = hit_ids[start_idx : end_idx - 1]
edges[1, edge_idx:next_edge_idx] = hit_ids[start_idx + 1 : end_idx]
edge_idx = next_edge_idx
# Sanity check
assert edge_idx == edges.shape[1]
[docs]def get_sortedwise_edges_impl(
hit_ids: np.ndarray,
particle_ids: np.ndarray,
) -> np.ndarray:
"""Get the sorted-wise edges
Args:
hit_ids: array of hit IDs, sorted by particle IDs
particle_ids: z-sorted array of particle IDs for every hit
Returns:
Two-dimensional array where every column represent an edge. In this array,
for every edge, a hit is referred to by its index in the dataframe of hits.
"""
n_hits_per_particles = group_lengths(particle_ids)[0]
particle_group_indices = get_group_indices_from_group_lengths(n_hits_per_particles)
# Create, fill and return array of edges
n_edges = (n_hits_per_particles - 1).sum()
edges = np.zeros(shape=(2, n_edges), dtype=int)
get_edges_from_sorted_impl(
edges=edges,
hit_ids=hit_ids,
particle_group_indices=particle_group_indices,
)
return edges
[docs]def get_sortedwise_edges(
hits: pd.DataFrame, drop_duplicates: bool = False
) -> np.ndarray:
"""Get edges by sorting the hits by ``z`` for every particle in the event,
and linking the adjacent hits by edges.
Args:
hits: dataframe of hits, with columns ``particle_id`` and ``z``
drop_duplicates: whether to drop hits of a particle that belong to the same
z
Returns:
Two-dimensional array where every column represent an edge. In this array,
for every edge, a hit is referred to by its index in the dataframe of hits.
"""
# Exclude noise
signal_hits = hits[hits.particle_id != 0]
# Remove hits on the same z belonging to the same particle
if drop_duplicates:
signal_hits = signal_hits.drop_duplicates(subset=["particle_id", "z"])
# Sort by particle ID and z in order to group by particle ID and z in Numba
signal_hits = signal_hits.sort_values(["particle_id", "z"]).reset_index(
drop=False
) # produce `index`, the indices before sorting
# Get edges
return get_sortedwise_edges_impl(
hit_ids=signal_hits["index"].to_numpy(),
particle_ids=signal_hits["particle_id"].to_numpy(),
)