Source code for revscoring.languages.features.regex_matches.features

from ....datasources.meta import dicts, filters
from ....dependencies import DependentSet
from ....features.meta import aggregators


[docs]class Revision(DependentSet): def __init__(self, name, regexes, revision_datasources): super().__init__(name) self.datasources = revision_datasources self.matches = aggregators.len(self.datasources.matches) "`int` : A count of the number of matches found in the text" if hasattr(revision_datasources, 'parent'): self.parent = Revision( name + ".parent", regexes, revision_datasources.parent ) """ :class:`~revscoring.languages.features.regex_matches.Revision` : The parent revision """ if hasattr(revision_datasources, 'diff'): self.diff = Diff( name + ".diff", regexes, revision_datasources.diff ) """ :class:`~revscoring.languages.features.regex_matches.Diff` : The difference made by this revision """
[docs]class Diff(DependentSet): def __init__(self, name, regexes, diff_datasources): super().__init__(name) self.datasources = diff_datasources self.matches_added = aggregators.len(self.datasources.matches_added) "`int` : The number of matches added in the edit" self.matches_removed = \ aggregators.len(self.datasources.matches_removed) "`int` : The number of matches removed in the edit" match_delta_values = dicts.values(self.datasources.match_delta) self.match_delta_sum = aggregators.sum( match_delta_values, name=name + ".match_delta_sum", returns=int ) "`int` : The sum of frequency delta for matched strings" self.match_delta_increase = aggregators.sum( filters.positive(match_delta_values), name=name + ".match_delta_increase", returns=int ) "`int` : The sum of frequency delta increases for matched strings" self.match_delta_decrease = aggregators.sum( filters.negative(match_delta_values), name=name + ".match_delta_decrease", returns=int ) "`int` : The sum of frequency delta decreases for matched strings" match_prop_delta_values = \ dicts.values(self.datasources.match_prop_delta) self.match_prop_delta_sum = aggregators.sum( match_prop_delta_values, name=name + ".match_prop_delta_sum", returns=float ) """ `int` : The sum of proportional frequency delta for matched strings """ self.match_prop_delta_increase = aggregators.sum( filters.positive(match_prop_delta_values), name=name + ".match_prop_delta_increase", returns=float ) """ `int` : The sum of proportional frequency delta increases for matched strings """ self.match_prop_delta_decrease = aggregators.sum( filters.negative(match_prop_delta_values), name=name + ".match_prop_delta_decrease", returns=float ) """ `int` : The sum of proportional frequency delta decreases for matched strings """