Source code for humancompatible.detect.methods.l_inf.lp_tools

import numpy as np
from random import randrange
import scipy.optimize as optimize


[docs] def lin_prog_feas( hist1: np.ndarray, hist2: np.ndarray, delta: float, num_samples: float = 1.0, ) -> int: """Specifies a number of samples as a fraction of the total histogram bins and checks whether all the sampled bins satisfy `|hist1 - hist2| <= delta` Args: hist1 (np.ndarray): 1-D array (or (n,1) column vector) of histogram bin densities for the full dataset. hist2 (np.ndarray): 1-D array (or (n,1) column vector) of histogram bin densities for the subgroup. delta (float): Threshold for the absolute difference `|hist1 - hist2|`. num_samples (float): Fraction of total bins to sample. The function draws int(num_samples * (len(hist1) - 1)) random samples. Returns: int: Status code from `scipy.optimize.linprog`. A status of 0 indicates the constraints are feasible (i.e., `|hist1 - hist2| <= delta` for all sampled bins); other codes signal infeasibility or solver errors. """ h1_raw = np.asarray(hist1, dtype=float) h2_raw = np.asarray(hist2, dtype=float) def _is_vector(x: np.ndarray) -> bool: return x.ndim == 1 or (x.ndim == 2 and 1 in x.shape) if not _is_vector(h1_raw) or not _is_vector(h2_raw): raise ValueError(f"histograms must be 1-D or (n,1)/(1,n); got {h1_raw.shape} and {h2_raw.shape}") # Normalize to 1-D h1 = h1_raw.reshape(-1) h2 = h2_raw.reshape(-1) rand_lst1 = [] rand_lst2 = [] if num_samples != 1: for _ in range(0, int(num_samples * (h1.shape[0] - 1))): i = randrange(0, h1.shape[0] - 1) rand_lst1.append(float(h1[i])) rand_lst2.append(float(h2[i])) rand_arr1 = np.expand_dims(np.array(rand_lst1), axis=1) rand_arr2 = np.expand_dims(np.array(rand_lst2), axis=1) else: # = Case in which no sampling occurs, whole histograms are compared rand_arr1 = np.expand_dims(h1,axis=1) rand_arr2 = np.expand_dims(h2,axis=1) # We are not interested in the optimization itself, but in the # feasibility of the problem, therefore the coefficient in the # objective function is set to 0 and the only variable (x_0) is # fixed at 1 c = 0 x0_bounds = (1, 1) # Accomodate for the + & - signs of the absolute value in # |r_a1 - r_a2| <= delta A_ub = np.vstack((rand_arr1, -rand_arr1)) b_ub = np.vstack((delta + rand_arr2, delta - rand_arr2)) res = optimize.linprog(c, A_ub, b_ub, bounds=[x0_bounds]) return res.status