#!/usr/bin/env python3
"""A script that plot the histogram of plane differences between the hits
of the true edges.
"""
import typing
import os.path as op
from argparse import ArgumentParser
from tqdm.auto import tqdm
from joblib import Parallel, delayed
import numpy as np
import numpy.typing as npt
import pandas as pd
import matplotlib.pyplot as plt
from montetracko.lhcb.category import category_velo
from Processing.planewise_edges import get_planewise_edges
from utils.loaderutils.preprocessing import load_dataframes
from utils.commonutils.config import cdirs
from utils.plotutils.plotconfig import configure_matplotlib
from utils.plotutils.plotools import save_fig, add_text
from utils.scriptutils.parser import add_predefined_arguments
configure_matplotlib()
[docs]def get_df_hits_particles(test_dataset_name: str, detector: str) -> pd.DataFrame:
df_hits_particles, df_particles = load_dataframes(
indir=op.join(cdirs.reference_directory, test_dataset_name, "xdigi2csv"),
particles_columns=["eta", "has_velo"],
hits_particles_columns=["plane"],
use_run_number=True,
**cdirs.get_filenames_from_detector(detector=detector),
)
# Remove noise hits
df_hits_particles = df_hits_particles[df_hits_particles["particle_id"] != 0]
# Add particle information
df_hits_particles = df_hits_particles.merge(
df_particles, on=["event_id", "particle_id"], how="left"
)
# Only keep hits that belong to velo-reconstructible particles
df_hits_particles = category_velo.filter(df_hits_particles)
return df_hits_particles
if __name__ == "__main__":
parser = ArgumentParser("Plot skip planes statistics.")
add_predefined_arguments(parser, ["test_dataset_name", "output_path", "detector"])
parsed_args = parser.parse_args()
test_dataset_name: str = parsed_args.test_dataset_name
detector: str = parsed_args.detector
output_path = (
op.join(cdirs.analysis_directory, f"hist_plane_diff_{test_dataset_name}")
if parsed_args.output_path is None
else parsed_args.output_path
)
df_hits_particles = get_df_hits_particles(test_dataset_name, detector)
df_hits = df_hits_particles[["hit_id", "plane"]].drop_duplicates()
max_plane_diff = 4
bin_centers = np.arange(1, max_plane_diff + 1)
bin_edges = np.arange(1, max_plane_diff + 2) - 0.5
def get_hist_plane_diff(df_hits_particles_event: pd.DataFrame):
df_hits_particles_event["index"] = df_hits_particles_event["hit_id"]
edge_indices = get_planewise_edges(hits=df_hits_particles_event)
df_edges = pd.DataFrame(
{
"hit_id_left": edge_indices[0],
"hit_id_right": edge_indices[1],
}
).drop_duplicates()
# Add plane information
for side in ("left", "right"):
df_edges = df_edges.merge(
df_hits.rename(
columns={column: f"{column}_{side}" for column in df_hits.columns}
),
on=f"hit_id_{side}",
how="left",
)
assert not df_edges.duplicated(
["hit_id_left", "hit_id_right"], keep=False
).any()
plane_diff = df_edges["plane_right"] - df_edges["plane_left"]
plane_diff_hist, _ = np.histogram(plane_diff, bins=bin_edges, density=False)
return plane_diff_hist
list_plane_diff_hists: typing.List[npt.NDArray] = Parallel(n_jobs=32)( # type: ignore
delayed(get_hist_plane_diff)(df_hits_particles_event)
for _, df_hits_particles_event in tqdm(df_hits_particles.groupby("event_id"))
)
plane_diff_hist: npt.NDArray = sum(list_plane_diff_hists) # type: ignore
fig, ax = plt.subplots(figsize=(8, 6))
ax.set_xlabel("Plane difference")
ax.set_yscale("log")
ax.set_ylabel("Proportion of edges")
ax.bar(
x=bin_centers,
height=plane_diff_hist / plane_diff_hist.sum(),
width=1.0,
color="k",
)
ax.locator_params(axis="x", integer=True)
ax.grid(color="grey", alpha=0.3)
add_text(ax, ha="right", va="top")
save_fig(fig, path=output_path)