Source code for revscoring.languages.features.stemmed.features

from ....datasources.meta import dicts, filters, mappers
from ....dependencies import DependentSet
from ....features.meta import aggregators


[docs]class Revision(DependentSet): def __init__(self, name, revision_datasources): super().__init__(name) self.datasources = revision_datasources self.unique_stems = aggregators.len( dicts.keys(self.datasources.stem_frequency), name=name + ".stems" ) """ `int` : A count of unique stemmed words. """ self.stem_chars = aggregators.sum( mappers.map(len, self.datasources.stems), name=name + ".stems_length", returns=int ) """ `int` : A count of characters in stemmed words. """ if hasattr(self.datasources, 'parent'): self.parent = Revision(name + ".parent", self.datasources.parent) """ :class:`~revscoring.languages.features.stemmed.Revision` : The parent revision """ if hasattr(self.datasources, 'diff'): self.diff = Diff(name + ".diff", self.datasources.diff) """ :class:`~revscoring.languages.features.stemmed.Diff` : The diff between the parent and current revision. """
[docs]class Diff(DependentSet): def __init__(self, name, diff_datasources): super().__init__(name) self.datasources = diff_datasources self.stem_delta_sum = aggregators.sum( dicts.values(self.datasources.stem_delta), name=name + ".stem_delta_sum" ) "`int` : The sum of frequency deltas for stemmed words" self.stem_delta_increase = aggregators.sum( filters.positive(dicts.values(self.datasources.stem_delta)), name=name + ".stem_delta_increase" ) "`int` : The sum of frequency delta increases for stemmed words" self.stem_delta_decrease = aggregators.sum( filters.negative(dicts.values(self.datasources.stem_delta)), name=name + ".stem_delta_decrease" ) "`int` : The sum of frequency delta decreases for stemmed words" self.stem_prop_delta_sum = aggregators.sum( dicts.values(self.datasources.stem_prop_delta), name=name + ".stem_prop_delta_sum" ) "`int` : The sum of proportional frequency deltas for stemmed words" self.stem_prop_delta_increase = aggregators.sum( filters.positive(dicts.values(self.datasources.stem_prop_delta)), name=name + ".stem_prop_delta_increase" ) """ `int` : The sum of proportional frequency delta increases for stemmed words """ self.stem_prop_delta_decrease = aggregators.sum( filters.negative(dicts.values(self.datasources.stem_prop_delta)), name=name + ".stem_prop_delta_decrease" ) """ `int` : The sum of proportional frequency delta decreases for stemmed words """