Source code for humancompatible.detect.methods.msd.msd

import logging
from typing import List, Tuple

import numpy as np
import pandas as pd

from humancompatible.detect.binarizer.Binarizer import Bin
from humancompatible.detect.helpers.utils import evaluate_subgroup_discrepancy, signed_subgroup_discrepancy
from .mapping_msd import subgroup_map_from_conjuncts_dataframe

from .one_rule import OneRule

logger = logging.getLogger(__name__)


[docs] def get_conjuncts_MSD( X_bin: np.ndarray[np.bool_], y_bin: np.ndarray[np.bool_], time_limit: int = 600, n_min: int = 0, solver: str = "appsi_highs", check_optimality: bool = True, verbose: int = 1, **kwargs ) -> List[int]: """ Run the One-Rule MILP and return the indices of literals that form the Maximum-Subgroup-Discrepancy (MSD) rule. Args: X_bin (np.ndarray[bool]): Binary feature matrix (shape n_samples * n_features). y_bin (np.ndarray[bool]): Binary target vector (length n_samples). time_limit (int, default 600): Wall-clock limit for the solver, in seconds. n_min (int, default 0): Minimum support the subgroup must have. solver (str, default "appsi_highs"): Name of the MIP solver recognised by Pyomo (e.g. "gurobi", "cplex", "glpk", "xpress", "appsi_highs"). check_optimality (bool, default True): If True, returns the optimal solution if found, or raises a `ValueError`. Otherwise, returns the best-known solution. verbose (int, default 1): Verbosity level. 0 = silent, 1 = logger output only, 2 = all detailed logs (including solver output). Returns: List[int]: A list of feature-column indices whose conjunction defines the subgroup with maximal discrepancy. Raises: ValueError: Propagated from ``OneRule.find_rule`` when the solver stops with an unexpected termination condition. ValueError: If `check_optimality` is True and no optimal solution is found. Tip: Set `check_optimality` to False to return the best-known solution, or try to increase the time limit. """ mio = OneRule() indices, is_optimal = mio.find_rule( X_bin, y_bin, n_min=n_min, time_limit=time_limit, solver_name=solver, verbose=verbose ) if check_optimality and not is_optimal: raise ValueError("Subgroup discovery failed to find an optimal solution.") return indices
[docs] def evaluate_MSD( X: pd.DataFrame, y: pd.Series | np.ndarray, rule: List[Tuple[int, Bin]], signed: bool = False, verbose: int = 1, **kwargs ) -> float: """ Compute the MSD value (delta or abs(delta)) for already calculated rules. Args: X (pd.DataFrame): DataFrame with the protected columns referenced by `rule`. y (pd.Series | np.ndarray): Binary target vector aligned with `X`. rule (list[tuple[int, Bin]]): Conjunctive rule describing the subgroup. Each element is a pair `(feature_index, Bin)`. Produced by `most_biased_subgroup`. signed (bool, default False): If True, return the signed subgroup discrepancy; otherwise, return the absolute value. verbose (int, default 1): Verbosity level. 0 = silent, 1 = logger output only, 2 = all detailed logs (including solver output). Returns: float: The subgroup discrepancy value. """ mask = subgroup_map_from_conjuncts_dataframe(rule, X) fn = signed_subgroup_discrepancy if signed else evaluate_subgroup_discrepancy return float(fn(mask, np.asarray(y).ravel(), verbose=verbose))