Source code for scripts.plotfactory.plot_plane_edge_statistics

#!/usr/bin/env python3
"""A script that plot the histogram of plane differences between the hits
of the true edges.
"""
import typing
import os.path as op
from argparse import ArgumentParser

from tqdm.auto import tqdm
from joblib import Parallel, delayed
import numpy as np
import numpy.typing as npt
import pandas as pd
import matplotlib.pyplot as plt

from montetracko.lhcb.category import category_velo
from Processing.planewise_edges import get_planewise_edges
from utils.loaderutils.preprocessing import load_dataframes
from utils.commonutils.config import cdirs
from utils.plotutils.plotconfig import configure_matplotlib
from utils.plotutils.plotools import save_fig, add_text
from utils.scriptutils.parser import add_predefined_arguments

configure_matplotlib()


[docs]def get_df_hits_particles(test_dataset_name: str, detector: str) -> pd.DataFrame: df_hits_particles, df_particles = load_dataframes( indir=op.join(cdirs.reference_directory, test_dataset_name, "xdigi2csv"), particles_columns=["eta", "has_velo"], hits_particles_columns=["plane"], use_run_number=True, **cdirs.get_filenames_from_detector(detector=detector), ) # Remove noise hits df_hits_particles = df_hits_particles[df_hits_particles["particle_id"] != 0] # Add particle information df_hits_particles = df_hits_particles.merge( df_particles, on=["event_id", "particle_id"], how="left" ) # Only keep hits that belong to velo-reconstructible particles df_hits_particles = category_velo.filter(df_hits_particles) return df_hits_particles
if __name__ == "__main__": parser = ArgumentParser("Plot skip planes statistics.") add_predefined_arguments(parser, ["test_dataset_name", "output_path", "detector"]) parsed_args = parser.parse_args() test_dataset_name: str = parsed_args.test_dataset_name detector: str = parsed_args.detector output_path = ( op.join(cdirs.analysis_directory, f"hist_plane_diff_{test_dataset_name}") if parsed_args.output_path is None else parsed_args.output_path ) df_hits_particles = get_df_hits_particles(test_dataset_name, detector) df_hits = df_hits_particles[["hit_id", "plane"]].drop_duplicates() max_plane_diff = 4 bin_centers = np.arange(1, max_plane_diff + 1) bin_edges = np.arange(1, max_plane_diff + 2) - 0.5 def get_hist_plane_diff(df_hits_particles_event: pd.DataFrame): df_hits_particles_event["index"] = df_hits_particles_event["hit_id"] edge_indices = get_planewise_edges(hits=df_hits_particles_event) df_edges = pd.DataFrame( { "hit_id_left": edge_indices[0], "hit_id_right": edge_indices[1], } ).drop_duplicates() # Add plane information for side in ("left", "right"): df_edges = df_edges.merge( df_hits.rename( columns={column: f"{column}_{side}" for column in df_hits.columns} ), on=f"hit_id_{side}", how="left", ) assert not df_edges.duplicated( ["hit_id_left", "hit_id_right"], keep=False ).any() plane_diff = df_edges["plane_right"] - df_edges["plane_left"] plane_diff_hist, _ = np.histogram(plane_diff, bins=bin_edges, density=False) return plane_diff_hist list_plane_diff_hists: typing.List[npt.NDArray] = Parallel(n_jobs=32)( # type: ignore delayed(get_hist_plane_diff)(df_hits_particles_event) for _, df_hits_particles_event in tqdm(df_hits_particles.groupby("event_id")) ) plane_diff_hist: npt.NDArray = sum(list_plane_diff_hists) # type: ignore fig, ax = plt.subplots(figsize=(8, 6)) ax.set_xlabel("Plane difference") ax.set_yscale("log") ax.set_ylabel("Proportion of edges") ax.bar( x=bin_centers, height=plane_diff_hist / plane_diff_hist.sum(), width=1.0, color="k", ) ax.locator_params(axis="x", integer=True) ax.grid(color="grey", alpha=0.3) add_text(ax, ha="right", va="top") save_fig(fig, path=output_path)