Source code for revscoring.scoring.statistics.classification.scaled_prediction_statistics

import logging
from collections import OrderedDict

from tabulate import tabulate

from ... import util
from .scaled_classification_matrix import ScaledClassificationMatrix

logger = logging.getLogger(__name__)


[docs]class ScaledPredictionStatistics(ScaledClassificationMatrix): FIELDS = ['match_rate', 'filter_rate', 'precision', '!precision', 'recall', '!recall', 'accuracy', 'fpr', 'f1', '!f1'] def format_json(self, path_tree, ndigits=3, **kwargs): return OrderedDict( (field, util.round(self[field], ndigits)) for field in (path_tree.keys() or self.keys())) def format_str(self, path_tree, ndigits=3, **kwargs): table_data = [[util.round(self[field], ndigits) for field in path_tree.keys() or self.keys()]] return tabulate(table_data, headers=path_tree.keys() or self.keys()) def __getitem__(self, field): if field in self.FIELDS: method_name = field.replace("!", "_") return getattr(self, method_name)() else: raise KeyError(field) def keys(self): return self.FIELDS def __iter__(self): return iter(self.keys())
[docs] def match_rate(self): """ The proportion of observations that are matched in prediction. match-rate = positives / n """ return (self.positives / self.n) if self.n is not 0 else None
[docs] def filter_rate(self): """ The proportion of observations that are not matched. filter-rate = 1 - match-rate """ return (1 - self.match_rate()) \ if self.match_rate() is not None else None
[docs] def accuracy(self): """ The proportion of predictions that were right. accuracy = correct / n """ return (self.correct / self.n) if self.n != 0 else None
[docs] def recall(self): """ The proportion of the target class that the classifier matches. AKA "true-positive rate" and "sensitivity". recall = true-positives / target-class """ return (self.tp / self.trues) if self.trues != 0 else None
def _recall(self): """ The inverse recall. The proportion of non-target class items that are not matched. !recall = true-negatives / !target-class """ return (self.tn / self.falses) if self.falses != 0 else None
[docs] def fpr(self): """ False-positive rate. The proportion of proportion of non-target class items that are not matched. fpr = false-positives / !target-class """ return (self.fp / self.falses) if self.falses != 0 else None
[docs] def precision(self): """ The proportion of matched observations that are correctly matched. AKA "positive predictive value". precision = true-positives / true-predicions """ return (self.tp / self.positives) if self.positives != 0 else None
def _precision(self): """ The proportion of non-matched observations that are correctly not matched. AKA "negative predictive value" !precision = true-negatives / false-predictions """ return (self.tn / self.negatives) if self.negatives != 0 else None
[docs] def f1(self): """ An information theoretic statistic that balances specificity with sensitivity. """ return (2 * ((self.precision() * self.recall()) / (self.precision() + self.recall())) if self.precision() is not None and self.recall() is not None and self.precision() + self.recall() > 0 else None)
def _f1(self): """ The inverse f1. The same information theoretic statistic applied to non-matched observations. """ return (2 * ((self._precision() * self._recall()) / (self._precision() + self._recall())) if self._precision() is not None and self._recall() is not None and self._precision() + self._recall() > 0 else None)