Source code for pipeline.utils.commonutils.cfeatures
"""A module that defines common utilies for data-handling.
"""
from __future__ import annotations
import typing
import torch
from torch_geometric.data import Data
from utils.commonutils.config import load_config
[docs]def get_input_features(
all_features: torch.Tensor, feature_indices: int | typing.List[int] | None
) -> torch.Tensor:
"""Extract the features that are trained on, from the ``batch`` pytorch geometric
data object.
Args:
batch: all features
feature_indices: if it is an integer, corresponds to the number of features
to include in the array of features.
If it is a list of integers, it corresponds to the indices of the features
to include in ``all_features``
Returns:
Array of features
"""
if feature_indices is None:
input_features = all_features
elif isinstance(feature_indices, int):
assert feature_indices > 0, (
"If `feature_indices` is an integer, it should be strictly " "positive. "
)
assert feature_indices <= all_features.shape[1], (
f"`feature_indices` was set to {feature_indices}, but this "
"number is larger than the number of columns in `all_features`."
)
input_features = all_features[:, :feature_indices]
elif isinstance(feature_indices, list):
input_features = torch.cat(
tuple(all_features[:, i] for i in feature_indices), dim=-1
)
else:
raise TypeError(
"The type of `feature_indices` is "
f"{type(feature_indices).__name__}, which is not supported."
)
# TODO: remove after test
assert not torch.any(input_features != input_features)
return input_features
[docs]def get_number_input_features(feature_indices: int | typing.List[int]) -> int:
"""Get the number of input features.
Args:
feature_indices: if it is an integer, corresponds to the number of features
to include in the array of features.
If it is a list of integers, it corresponds to the indices of the features
to include in ``batch.x``
Returns:
Number of input features
"""
if isinstance(feature_indices, int):
return feature_indices
elif isinstance(feature_indices, list):
return len(feature_indices)
else:
raise TypeError(
"The type of `feature_indices` is "
f"{type(feature_indices).__name__}, which is not supported."
)
[docs]def get_unnormalised_features(
batch: Data, path_or_config: str | dict, feature_names: typing.List[str]
) -> typing.List[torch.Tensor]:
"""Get the unnormalised features from the PyTorch Geometric data object,
according to the configuration.
Args:
batch: PyTorch geometric data object, that contains the ``x`` attribute,
which corresponds to the array of the features
path_or_config: configuration dictionary, or path to the YAML file that contains
the configuration
feature_names: list of the names of the features to extract the unnormalised
values of
Returns:
List of PyTorch tensors, corresponding the the arrays of values of the
features whose names are given by ``features_names``
"""
configs = load_config(path_or_config)
all_feature_names = configs["processing"]["features"]
all_feature_means = configs["processing"]["feature_means"]
all_feature_scales = configs["processing"]["feature_scales"]
list_feature_values = []
for feature_name in feature_names:
feature_idx = all_feature_names.index(feature_name)
mean = all_feature_means[feature_idx]
scale = all_feature_scales[feature_idx]
array_values = batch.x[:, feature_idx].cpu()
list_feature_values.append(array_values * scale + mean)
return list_feature_values