Source code for lore_sa.encoder_decoder.enc_dec

from abc import abstractmethod
import numpy as np

__all__ = ["EncDec"]
[docs]class EncDec(): """ Abstract base class for encoding and decoding features. The EncDec class provides an interface for transforming features between their original representation and an encoded representation suitable for machine learning models. This is particularly important for: - Categorical features: Converting to one-hot encoding or ordinal encoding - Numerical features: Scaling or normalization - Feature engineering: Creating derived features The encoder is used by LORE to: 1. Encode instances before generating neighborhoods 2. Decode synthetic instances back to original feature space 3. Decode rules to make them interpretable Attributes: dataset_descriptor (dict): Descriptor containing information about feature types, ranges, and categorical values encoded_features (dict): Mapping from encoded feature indices to feature names encoded_descriptor (dict): Descriptor for the encoded feature space Methods: encode: Transform features to encoded representation decode: Transform encoded features back to original representation encode_target_class: Encode target class labels decode_target_class: Decode target class labels get_encoded_features: Get mapping of encoded features get_encoded_intervals: Get index intervals for encoded features Example: >>> from lore_sa.encoder_decoder import ColumnTransformerEnc >>> >>> # Create encoder from dataset descriptor >>> encoder = ColumnTransformerEnc(dataset.descriptor) >>> >>> # Encode a sample >>> encoded = encoder.encode([sample]) >>> >>> # Decode back to original space >>> decoded = encoder.decode(encoded) See Also: ColumnTransformerEnc: Concrete implementation using sklearn's ColumnTransformer """
[docs] def __init__(self,dataset_descriptor): """ Initialize the encoder/decoder. Args: dataset_descriptor (dict): Dictionary containing feature information including 'numeric', 'categorical', and 'ordinal' feature descriptors """ self.dataset_descriptor = dataset_descriptor self.encoded_features = {} self.encoded_descriptor = None
[docs] @abstractmethod def encode(self, x: np.array): """ Transform features from original to encoded representation. This method applies the encoding transformation to convert features from their original space (e.g., with categorical labels) to an encoded space suitable for machine learning (e.g., with one-hot encoded categorical features). Args: x (np.array): Array of shape (n_samples, n_features) containing samples in the original feature space Returns: np.array: Encoded array of shape (n_samples, n_encoded_features) where n_encoded_features may be larger than n_features due to one-hot encoding Example: >>> # Original: [['red', 25], ['blue', 30]] >>> # Encoded: [[1, 0, 0, 25], [0, 1, 0, 30]] # one-hot for color >>> encoded = encoder.encode(original_data) """ return
[docs] @abstractmethod def get_encoded_features(self): """ Get a mapping of encoded feature indices to feature names. Returns: dict: Dictionary mapping encoded feature indices to descriptive names. For one-hot encoded features, names include the category value (e.g., 'color=red', 'color=blue'). Example: >>> features = encoder.get_encoded_features() >>> # {0: 'age', 1: 'color=red', 2: 'color=blue', 3: 'color=green'} """ return
[docs] def get_encoded_intervals(self): """ Get index intervals for each original feature in the encoded space. This method returns a list of (start, end) tuples indicating the range of encoded indices that correspond to each original feature. This is useful when an original categorical feature is one-hot encoded into multiple columns. Returns: list: List of (start_idx, end_idx) tuples, one for each original feature. For numerical features, start_idx == end_idx. For one-hot encoded categorical features, the interval spans multiple indices. Example: >>> intervals = encoder.get_encoded_intervals() >>> # [(0, 1), (1, 4), (4, 5)] # age (1 col), color (3 cols), income (1 col) """ return
[docs] @abstractmethod def decode(self, x: np.array): """ Transform features from encoded to original representation. This method reverses the encoding transformation, converting features from the encoded space back to their original representation. This is essential for making explanations interpretable to users. Args: x (np.array): Array of shape (n_samples, n_encoded_features) containing samples in the encoded feature space Returns: np.array: Decoded array of shape (n_samples, n_features) in the original feature space Example: >>> # Encoded: [[1, 0, 0, 25], [0, 1, 0, 30]] >>> # Original: [['red', 25], ['blue', 30]] >>> decoded = encoder.decode(encoded_data) """ return
[docs] @abstractmethod def decode_target_class(self, x: np.array): return
[docs] @abstractmethod def encode_target_class(self, param): pass