Source code for humancompatible.detect.data_handler.features.Binary

from __future__ import annotations

from typing import Optional

import numpy as np
import pandas as pd

from ..types import CategValue, OneDimData

from .Feature import Feature, Monotonicity


[docs] class Binary(Feature): def __init__( self, training_vals: OneDimData, value_names: Optional[list[CategValue]] = None, name: Optional[str] = None, monotone: Monotonicity = Monotonicity.NONE, modifiable: bool = True, ): super().__init__(training_vals, name, monotone, modifiable) if value_names is None: value_names = np.unique(training_vals) else: valid_vals = np.isin(training_vals, value_names) if not np.all(valid_vals): raise ValueError( f"""Incorrect value in a binary feature {self.name}. Values {np.unique(training_vals[~valid_vals])} are not one of {value_names}""" ) self.__negative_val, self.__positive_val = value_names self._MAD = np.asarray( [1.48 * np.nanstd(self.encode(training_vals, one_hot=False))] ) @Feature._check_dims_on_encode def encode( self, vals: OneDimData, normalize: bool = True, one_hot: bool = True ) -> np.ndarray[np.float64]: """ Encode to a single-column 0/1 array for the negative/positive value, respectively. """ positive = vals == self.__positive_val if np.any(vals[~positive] != self.__negative_val): unknown = vals[~positive] != self.__negative_val raise ValueError( f"""Incorrect value in a binary feature {self.name}. Values {vals[~positive][unknown]} are not one of [{self.__negative_val},{self.__positive_val}]""" ) return self._to_numpy(positive).astype(np.float64)
[docs] def decode( self, vals: np.ndarray[np.float64], denormalize: bool = True, return_series: bool = True, discretize: bool = False, ) -> OneDimData: """ Take a flat 0/1 column (or array) and map back to the two original category values. """ if not np.isin(vals, [0, 1]).all(): raise ValueError( f"""Incorrect value in an encoded feature {self.name}. All values must be either 0 or 1. Found values {np.unique(vals[~np.isin(vals, [0,1])])}.""" ) vals = vals.flatten() # TODO put the shape handlings outside, similar to encode res = np.empty(vals.shape, dtype=object) res[vals == 0] = self.__negative_val res[vals == 1] = self.__positive_val if return_series: return pd.Series(res, name=self.name) return res
[docs] def encoding_width(self, one_hot: bool) -> int: return 1
[docs] def allowed_change( self, pre_val: CategValue, post_val: CategValue, encoded=True ) -> bool: if not encoded: pre_val = self.encode([pre_val], one_hot=False)[0] post_val = self.encode([post_val], one_hot=False)[0] if self.modifiable: if self.monotone == Monotonicity.INCREASING: return pre_val == self.__negative_val or post_val == self.__positive_val if self.monotone == Monotonicity.DECREASING: return pre_val == self.__positive_val or post_val == self.__negative_val return True return pre_val == post_val
@property def value_mapping(self) -> dict[CategValue, int]: return {self.__positive_val: 1, self.__negative_val: 0} def __eq__(self, other): if isinstance(other, Binary): return ( self.name == other.name and self.monotone == other.monotone and self.modifiable == other.modifiable and self.value_mapping == other.value_mapping and self._MAD == other._MAD ) return False