Source code for humancompatible.detect.methods.msd.mapping_msd

import numpy as np
import pandas as pd
from typing import List, Tuple, Any


[docs] def subgroup_map_from_conjuncts_binarized( conjuncts: List[int], X: np.ndarray[np.bool_] ) -> np.ndarray[np.bool_]: """ Generates a boolean subgroup mapping based on the conjunction (AND) of specified features. This function creates a boolean array where each element is `True` only if the corresponding row in `X` has `True` values across all columns specified in `conjuncts`. Essentially, it identifies individuals who meet all criteria defined by the conjuncts. Args: conjuncts (List[int]): A list of integer indices (column indices) from the input array `X`. Each index represents a feature that must be `True` for an individual to be included in the subgroup. X (np.ndarray[`np.bool_`]): A 2D NumPy array of boolean values, where rows represent individuals and columns represent features. Returns: np.ndarray[`np.bool_`]: A 1D boolean NumPy array (`mapping`) of the same length as the number of rows in `X`. An element `mapping[i]` is `True` if `X[i, conj]` is `True` for all `conj` in `conjuncts`, and `False` otherwise. Raises: IndexError: If any index in `conjuncts` is out of bounds for the columns of `X`. Examples: >>> import numpy as np >>> X_data = np.array([ ... [True, True, False, True], # Row 0 ... [True, False, True, True], # Row 1 ... [False, True, True, False], # Row 2 ... [True, True, True, True] # Row 3 ... ]) >>> >>> # Subgroup where feature at index 0 AND feature at index 1 are True >>> conjuncts_1 = [0, 1] >>> subgroup_map_from_conjuncts_binarized(conjuncts_1, X_data) array([ True, False, False, True]) >>> # Explanation: Only Row 0 and Row 3 have both X[:,0] and X[:,1] as True. >>> # Subgroup where feature at index 2 is True >>> conjuncts_2 = [2] >>> subgroup_map_from_conjuncts_binarized(conjuncts_2, X_data) array([False, True, True, True]) >>> # Subgroup where feature at index 0 AND feature at index 2 are True >>> conjuncts_3 = [0, 2] >>> subgroup_map_from_conjuncts_binarized(conjuncts_3, X_data) array([False, True, False, True]) >>> # Test with an empty list of conjuncts (should return all True) >>> subgroup_map_from_conjuncts_binarized([], X_data) array([ True, True, True, True]) >>> # Test with an invalid conjunct index (will raise IndexError) >>> try: ... subgroup_map_from_conjuncts_binarized([0, 99], X_data) ... except IndexError as e: ... print(e) index 99 is out of bounds for axis 1 with size 4 """ # Initialize the mapping with all True values. This ensures that if conjuncts # is empty, all individuals are included (logical AND of no conditions is True). mapping = np.ones((X.shape[0],), dtype=bool) # Iterate through each specified conjunct (feature index) for conj in conjuncts: # Perform a logical AND operation between the current mapping and the # specified feature column. This filters down the subgroup. mapping &= X[:, conj] # This will raise IndexError if `conj` is out of bounds return mapping
[docs] def subgroup_map_from_conjuncts_dataframe( rule: List[Tuple[int, Any]], X: pd.DataFrame ) -> np.ndarray[np.bool_]: """ Build a boolean mask for an MSD rule over a pandas DataFrame. Each (index, Bin) in *rule* comes from `detect_bias` or `detect_bias_two_samples`. We ignore the positional index and use the Bin's `.feature.name`, so this is robust to column re-ordering. Args: rule (List[Tuple[int, Any]]): The rule identifying the subgroup, as returned by `detect_bias(...)`. X (pd.DataFrame): The original (protected-only) DataFrame passed to `detect_bias`. Must contain all columns named in the rule's Bins. Returns: np.ndarray[`np.bool_`]: A 1-D boolean array where True marks rows belonging to the subgroup. Raises: KeyError: If `X` is missing a column required by the rule. """ mask = np.ones(len(X), dtype=bool) for _idx, binop in rule: feat = binop.feature.name if feat not in X.columns: raise KeyError(f"Column '{feat}' required by rule is missing.") col_values = X[feat].to_numpy() mask &= binop.evaluate(col_values) return mask