Source code for revscoring.languages.features.regex_matches.regex_matches
from . import datasources, features
from ....dependencies import DependentSet
from ....features import wikitext
[docs]class RegexMatches(DependentSet):
"""
:Parameters:
name : `str`
A name for the collection
regexes : `list` ( `str` )
A list of regex patterns to match.
exclusions : `list` ( `str` )
A list of terms to explicitly not match
wrapping : `tuple` ( `str`, `str` )
Insert these characters around matches in the regular expression
"""
def __init__(self, name, regexes, exclusions=None,
wrapping=(r'\b', r'\b'), text_preprocess=None):
super().__init__(name)
self._regexes = regexes
self._exclusions = exclusions
self._wrapping = wrapping
self.revision = features.Revision(
name + ".revision", regexes,
datasources.Revision(
name + ".revision", regexes,
wikitext.revision.datasources,
exclusions=exclusions,
wrapping=wrapping,
text_preprocess=text_preprocess
)
)
"""
:class:`~revscoring.languages.features.regex_matches.Revision` :
The base revision feature set.
"""
[docs] def excluding(self, exclusions, name=None):
"""
Returns a new :class:`~revscoring.languages.features.RegexMatches`
that includes a set of exclusions.
:Parameters:
exclusions : `list` ( `str` )
A list of terms to explicitly not match
name : `str`
A new name for the collection. If unspecified, the old name
will be used
"""
return self.__class__(
name or self._name + ".excluding({0!r})".format(exclusions),
self._regexes,
exclusions=(self._exclusions or []) + exclusions,
wrapping=self._wrapping)