Source code for revscoring.languages.features.dictionary.features
from ....datasources.meta import dicts, filters
from ....dependencies import DependentSet
from ....features.meta import aggregators
[docs]class Revision(DependentSet):
def __init__(self, name, revision_datasources):
super().__init__(name)
self.datasources = revision_datasources
self.dict_words = aggregators.len(self.datasources.dict_words)
"`int` : A count of the number of dictionary words in the revision"
self.non_dict_words = \
aggregators.len(self.datasources.non_dict_words)
"`int` : A count of the number of non-dictionary words in the revision"
if hasattr(self.datasources, 'parent'):
self.parent = Revision(name + ".parent", self.datasources.parent)
"""
:class:`~revscoring.languages.features.dictionary.Revision` : The
parent revision
"""
if hasattr(self.datasources, 'diff'):
self.diff = Diff(name + ".diff", self.datasources.diff)
"""
:class:`~revscoring.languages.features.dictionary.Diff` : The
diff between the parent and current revision.
"""
[docs]class Diff(DependentSet):
def __init__(self, name, diff_datasources):
super().__init__(name)
self.datasources = diff_datasources
# Simple counts (based on wikitext.edit.diff)
self.dict_words_added = \
aggregators.len(self.datasources.dict_words_added)
"`int` : A count of the number of dictionary words added"
self.dict_words_removed = \
aggregators.len(self.datasources.dict_words_removed)
"`int` : A count of the number of dictionary words removed"
self.non_dict_words_added = \
aggregators.len(self.datasources.non_dict_words_added)
"`int` : A count of the number of non-dictionary words added"
self.non_dict_words_removed = \
aggregators.len(self.datasources.non_dict_words_removed)
"`int` : A count of the number of non-dictionary words removed"
# Word frequency deltas
dict_word_delta_values = dicts.values(self.datasources.dict_word_delta)
self.dict_word_delta_sum = aggregators.sum(
dict_word_delta_values,
name=name + ".dict_word_delta_sum",
returns=int
)
"`int` : The sum of word frequency deltas for dictionary words"
self.dict_word_delta_increase = aggregators.sum(
filters.positive(dict_word_delta_values),
name=name + ".dict_word_delta_increase",
returns=int
)
"""
`int` : The sum of word frequency delta increases for dictionary words
"""
self.dict_word_delta_decrease = aggregators.sum(
filters.negative(dict_word_delta_values),
name=name + ".dict_word_delta_decrease",
returns=int
)
"""
`int` : The sum of word frequency delta decreases for dictionary
words
"""
non_dict_word_delta_values = \
dicts.values(self.datasources.non_dict_word_delta)
self.non_dict_word_delta_sum = aggregators.sum(
non_dict_word_delta_values,
name=name + ".non_dict_word_delta_sum",
returns=int
)
"`int` : The sum of word frequency deltas for non-dictionary words"
self.non_dict_word_delta_increase = aggregators.sum(
filters.positive(non_dict_word_delta_values),
name=name + ".non_dict_word_delta_increase",
returns=int
)
"""
`int` : The sum of word frequency delta increases for non-dictionary
words
"""
self.non_dict_word_delta_decrease = aggregators.sum(
filters.negative(non_dict_word_delta_values),
name=name + ".non_dict_word_delta_decrease",
returns=int
)
"""
`int` : The sum of word frequency delta decreases for non-dictionary
words
"""
# Proportional word frequency deltas
dict_word_prop_delta_values = \
dicts.values(self.datasources.dict_word_prop_delta)
self.dict_word_prop_delta_sum = aggregators.sum(
dict_word_prop_delta_values,
name=name + ".dict_word_prop_delta_sum"
)
"""
`float` : The sum of word frequency proportional delta for
dictionary words
"""
self.dict_word_prop_delta_increase = aggregators.sum(
filters.positive(dict_word_prop_delta_values),
name=name + ".dict_word_prop_delta_increase"
)
"""
`float` : The sum of word frequency proportional delta increases for
dictionary words
"""
self.dict_word_prop_delta_decrease = aggregators.sum(
filters.negative(dict_word_prop_delta_values),
name=name + ".dict_word_prop_delta_decrease"
)
"""
`float` : The sum of word frequency proportional delta decreases for
dictionary words
"""
non_dict_word_prop_delta_values = \
dicts.values(self.datasources.non_dict_word_prop_delta)
self.non_dict_word_prop_delta_sum = aggregators.sum(
non_dict_word_prop_delta_values,
name=name + ".non_dict_word_prop_delta_sum"
)
"""
`float` : The sum of word frequency proportional delta for
non-dictionary words
"""
self.non_dict_word_prop_delta_increase = aggregators.sum(
filters.positive(non_dict_word_prop_delta_values),
name=name + ".non_dict_word_prop_delta_increase"
)
"""
`float` : The sum of word frequency proportional delta increase for
non-dictionary words
"""
self.non_dict_word_prop_delta_decrease = aggregators.sum(
filters.negative(non_dict_word_prop_delta_values),
name=name + ".non_dict_word_prop_delta_decrease"
)
"""
`float` : The sum of word frequency proportional delta decrease for
non-dictionary words
"""