Source code for pipeline.utils.commonutils.cfeatures

"""A module that defines common utilies for data-handling.
"""
from __future__ import annotations
import typing
import torch
from torch_geometric.data import Data

from utils.commonutils.config import load_config


[docs]def get_input_features( all_features: torch.Tensor, feature_indices: int | typing.List[int] | None ) -> torch.Tensor: """Extract the features that are trained on, from the ``batch`` pytorch geometric data object. Args: batch: all features feature_indices: if it is an integer, corresponds to the number of features to include in the array of features. If it is a list of integers, it corresponds to the indices of the features to include in ``all_features`` Returns: Array of features """ if feature_indices is None: input_features = all_features elif isinstance(feature_indices, int): assert feature_indices > 0, ( "If `feature_indices` is an integer, it should be strictly " "positive. " ) assert feature_indices <= all_features.shape[1], ( f"`feature_indices` was set to {feature_indices}, but this " "number is larger than the number of columns in `all_features`." ) input_features = all_features[:, :feature_indices] elif isinstance(feature_indices, list): input_features = torch.cat( tuple(all_features[:, i] for i in feature_indices), dim=-1 ) else: raise TypeError( "The type of `feature_indices` is " f"{type(feature_indices).__name__}, which is not supported." ) # TODO: remove after test assert not torch.any(input_features != input_features) return input_features
[docs]def get_number_input_features(feature_indices: int | typing.List[int]) -> int: """Get the number of input features. Args: feature_indices: if it is an integer, corresponds to the number of features to include in the array of features. If it is a list of integers, it corresponds to the indices of the features to include in ``batch.x`` Returns: Number of input features """ if isinstance(feature_indices, int): return feature_indices elif isinstance(feature_indices, list): return len(feature_indices) else: raise TypeError( "The type of `feature_indices` is " f"{type(feature_indices).__name__}, which is not supported." )
[docs]def get_unnormalised_features( batch: Data, path_or_config: str | dict, feature_names: typing.List[str] ) -> typing.List[torch.Tensor]: """Get the unnormalised features from the PyTorch Geometric data object, according to the configuration. Args: batch: PyTorch geometric data object, that contains the ``x`` attribute, which corresponds to the array of the features path_or_config: configuration dictionary, or path to the YAML file that contains the configuration feature_names: list of the names of the features to extract the unnormalised values of Returns: List of PyTorch tensors, corresponding the the arrays of values of the features whose names are given by ``features_names`` """ configs = load_config(path_or_config) all_feature_names = configs["processing"]["features"] all_feature_means = configs["processing"]["feature_means"] all_feature_scales = configs["processing"]["feature_scales"] list_feature_values = [] for feature_name in feature_names: feature_idx = all_feature_names.index(feature_name) mean = all_feature_means[feature_idx] scale = all_feature_scales[feature_idx] array_values = batch.x[:, feature_idx].cpu() list_feature_values.append(array_values * scale + mean) return list_feature_values