Source code for revscoring.scoring.statistics.classification.threshold_optimization

import re


[docs]class ThresholdOptimization: STRING_PATTERN = re.compile( r"(maximum|minimum) " r"((!|[^\W\d])[\w]*) @ " # target_stat r"((!|[^\W\d])[\w]*) " # cond_stat r"(>=|<=) " # greater r"([-+]?([0-9]*\.[0-9]+|[0-9]+))") # cond_value def __init__(self, maximize, target_stat, cond_stat, greater, cond_value): """ Construct a structured statement about an optimization metric. :Parameters: maximize : `bool` If True, maximize, else minimize target_stat : `str` The name of the target statistic that will be optimized cond_stat : `str` The name of the conditional statistic greater : `bool` The relationship between the conditional statistic and the conditional value. If True, cond_stat >= cond_value, else cond_stat <= cond_value cond_value : `float` The conditional value """ self.maximize = maximize self.target_stat = target_stat self.cond_stat = cond_stat self.greater = greater self.cond_value = cond_value def __str__(self): return "{0} {1} @ {2} {3} {4}" \ .format("maximum" if self.maximize else "minimum", self.target_stat, self.cond_stat, ">=" if self.greater else "<=", self.cond_value) def repr(self): return "{0}.p({1!r})".format(self.__class__.__name__, str(self))
[docs] def optimize_from(self, threshold_statistics): """ Generates an optimized value by scanning a sequence of :class:`~revscoring.scoring.statistics.classification.ScaledThresholdStatistics` for a the best threshold that matches the conditional criteria. This function returns the value of the optimized target statistic (or None). """ # noqa val_tstats = self.get_optimal(threshold_statistics) if val_tstats is not None: return val_tstats[0] else: return None
[docs] def get_optimal(self, threshold_statistics): """ Generates an optimized value by scanning a sequence of :class:`~revscoring.scoring.statistics.classification.ScaledThresholdStatistics` for a the best threshold that matches the conditional criteria. This function returns the entire :class:`~revscoring.scoring.statistics.classification.ScaledPredictionStatistics` mapping at the optimal threshold. """ # noqa if self.greater: filtered = [(tstats[self.target_stat], t, tstats) for t, tstats in threshold_statistics if tstats[self.cond_stat] is not None and tstats[self.target_stat] is not None and tstats[self.cond_stat] >= self.cond_value] else: filtered = [(tstats[self.target_stat], t, tstats) for t, tstats in threshold_statistics if tstats[self.cond_stat] is not None and tstats[self.target_stat] is not None and tstats[self.cond_stat] <= self.cond_value] if not filtered: return None if self.maximize: optimal = max(filtered) return optimal[1], optimal[2] else: optimal = min(filtered) return optimal[1], optimal[2]
[docs] @classmethod def parse(cls, pattern): """ Parse a formatted string representing a threshold optimization. E.g. 'maximum recall @ precision >= 0.9' or 'minimum match_rate @ recall >= 0.9'. :Parameters: pattern : `str` The optimization pattern to parse """ match = cls.STRING_PATTERN.match(pattern.strip().lower()) if match is None: raise ValueError('{0!r} does not match optimization pattern: ' .format(pattern) + '"(maximum|minimum) <target> @ ' + '<cond> (>=|<=) [float]"') maximize, target, _, cond, _, greater, cond_value, _ = match.groups() return cls(maximize == "maximum", target, cond, greater == ">=", float(cond_value))
@classmethod def from_string(cls, p): return cls.parse(p) @classmethod def p(cls, p): return cls.parse(p)