Source code for revscoring.languages.features.stopwords.stopwords
from ....dependencies import DependentSet
from ....features import wikitext
from . import datasources, features
[docs]class Stopwords(DependentSet):
"""
:Parameters:
name : `str`
A name for the collection
stopword_set : `set` ( `str` )
A set of stopwords
"""
def __init__(self, name, stopword_set):
super().__init__(name)
word_is_stopword = WordIsInStopwordSet(stopword_set)
self.revision = features.Revision(
name + ".revision",
datasources.Revision(name + ".revision", word_is_stopword,
wikitext.revision.datasources)
)
"""
:class:`~revscoring.languages.features.stopwords.Revision` :
The base revision feature set.
"""
class WordIsInStopwordSet:
def __init__(self, stopword_set, cleanup=None):
self.stopword_set = stopword_set
def __call__(self, word):
return word.lower() in self.stopword_set