Source code for revscoring.datasources.meta.mappers
"""
These meta-datasources operate on :class:`revscoring.Datasource`'s that
return `list`'s and apply a specific function to each item.
.. autoclass:: revscoring.datasources.meta.mappers.map
.. autoclass:: revscoring.datasources.meta.mappers.lower_case
.. autoclass:: revscoring.datasources.meta.mappers.derepeat
.. autoclass:: revscoring.datasources.meta.mappers.abs
"""
from itertools import groupby
from ..datasource import Datasource
absolute_value = abs
[docs]class map(Datasource):
"""
Returns a :class:`revscoring.Datasource` that applies a function over
a set of items generated by another datasource.
:Parameters:
apply : `func`
A function to apply to each item generated by `items_datasource`
items_datasource : :class:`revscoring.Datasource`
A datasource that generates a list of some item
name : `str`
A name for the datasource.
"""
def __init__(self, apply, items_datasource, name=None):
self.apply = apply
name = self._format_name(name, [apply, items_datasource])
super().__init__(name, self.process, depends_on=[items_datasource])
def process(self, items):
return [self.apply(item) for item in items]
[docs]class lower_case(map):
"""
Returns a :class:`revscoring.Datasource` that lower cases a list of `str`
returned by another datasource.
:Parameters:
strs_datasource : :class:`revscoring.Datasource`
A datasource that generates a list of `str`
name : `str`
A name for the datasource.
"""
def __init__(self, strs_datasource, name=None):
name = self._format_name(name, [strs_datasource])
super().__init__(self.lower, strs_datasource, name=name)
def lower(self, s):
return s.replace(u'İ', 'i').lower()
[docs]class derepeat(map):
"""
Returns a :class:`revscoring.Datasource` that prevents a list of `str` from
having repeated characters (e.g. "foo" --> "fo").
:Parameters:
strs_datasource : :class:`revscoring.Datasource`
A datasource that generates a list of `str`
name : `str`
A name for the datasource.
"""
def __init__(self, strs_datasource, name=None):
name = self._format_name(name, [strs_datasource])
super().__init__(self.no_repeat, strs_datasource, name=name)
def no_repeat(self, s):
return ''.join(char for char, group in groupby(s))
class de1337(map):
"""
Returns a :class:`revscoring.Datasource` that converts numbers in the
middle of words into the characters they are often used
to represent (e.g. "he7d3r" --> "hetder").
:Parameters:
strs_datasource : :class:`revscoring.Datasource`
A datasource that generates a list of `str`
name : `str`
A name for the datasource.
"""
MAP = {'1': "l", '3': "e", '4': "a", '5': "s",
'6': "g", '7': "t", '0': "o", "#": "h", "(": "c"}
def __init__(self, strs_datasource, name=None):
name = self._format_name(name, [strs_datasource])
super().__init__(self.no_1337, strs_datasource, name=name)
def no_1337(self, s):
return ''.join(self.MAP.get(c, c) for c in s)
[docs]class abs(map):
"""
Returns a :class:`revscoring.Datasource` that converts a list of numeric
values into a list of absolute numeric values.
:Parameters:
numbers_datasource : :class:`revscoring.Datasource`
A datasource that generates a list of numeric values
name : `str`
A name for the datasource.
"""
def __init__(self, numbers_datasource, name=None):
super().__init__(absolute_value, numbers_datasource, name=name)