Source code for revscoring.scoring.statistics.classification.scaled_prediction_statistics
import logging
from collections import OrderedDict
from tabulate import tabulate
from ... import util
from .scaled_classification_matrix import ScaledClassificationMatrix
logger = logging.getLogger(__name__)
[docs]class ScaledPredictionStatistics(ScaledClassificationMatrix):
FIELDS = ['match_rate', 'filter_rate',
'precision', '!precision',
'recall', '!recall',
'accuracy', 'fpr',
'f1', '!f1']
def format_json(self, path_tree, ndigits=3, **kwargs):
return OrderedDict(
(field, util.round(self[field], ndigits))
for field in (path_tree.keys() or self.keys()))
def format_str(self, path_tree, ndigits=3, **kwargs):
table_data = [[util.round(self[field], ndigits)
for field in path_tree.keys() or self.keys()]]
return tabulate(table_data, headers=path_tree.keys() or self.keys())
def __getitem__(self, field):
if field in self.FIELDS:
method_name = field.replace("!", "_")
return getattr(self, method_name)()
else:
raise KeyError(field)
def keys(self):
return self.FIELDS
def __iter__(self):
return iter(self.keys())
[docs] def match_rate(self):
"""
The proportion of observations that are matched in prediction.
match-rate = positives / n
"""
return (self.positives / self.n) if self.n is not 0 else None
[docs] def filter_rate(self):
"""
The proportion of observations that are not matched.
filter-rate = 1 - match-rate
"""
return (1 - self.match_rate()) \
if self.match_rate() is not None else None
[docs] def accuracy(self):
"""
The proportion of predictions that were right.
accuracy = correct / n
"""
return (self.correct / self.n) if self.n != 0 else None
[docs] def recall(self):
"""
The proportion of the target class that the classifier matches.
AKA "true-positive rate" and "sensitivity".
recall = true-positives / target-class
"""
return (self.tp / self.trues) if self.trues != 0 else None
def _recall(self):
"""
The inverse recall. The proportion of non-target class items that are
not matched.
!recall = true-negatives / !target-class
"""
return (self.tn / self.falses) if self.falses != 0 else None
[docs] def fpr(self):
"""
False-positive rate. The proportion of proportion of non-target class
items that are not matched.
fpr = false-positives / !target-class
"""
return (self.fp / self.falses) if self.falses != 0 else None
[docs] def precision(self):
"""
The proportion of matched observations that are correctly matched.
AKA "positive predictive value".
precision = true-positives / true-predicions
"""
return (self.tp / self.positives) if self.positives != 0 else None
def _precision(self):
"""
The proportion of non-matched observations that are correctly not
matched. AKA "negative predictive value"
!precision = true-negatives / false-predictions
"""
return (self.tn / self.negatives) if self.negatives != 0 else None
[docs] def f1(self):
"""
An information theoretic statistic that balances specificity with
sensitivity.
"""
return (2 * ((self.precision() * self.recall()) /
(self.precision() + self.recall()))
if self.precision() is not None and
self.recall() is not None and
self.precision() + self.recall() > 0 else None)
def _f1(self):
"""
The inverse f1. The same information theoretic statistic applied to
non-matched observations.
"""
return (2 * ((self._precision() * self._recall()) /
(self._precision() + self._recall()))
if self._precision() is not None and
self._recall() is not None and
self._precision() + self._recall() > 0 else None)