Source code for revscoring.datasources.meta.mappers

"""
These meta-datasources operate on :class:`revscoring.Datasource`'s that
return `list`'s and apply a specific function to each item.

.. autoclass:: revscoring.datasources.meta.mappers.map

.. autoclass:: revscoring.datasources.meta.mappers.lower_case

.. autoclass:: revscoring.datasources.meta.mappers.derepeat

.. autoclass:: revscoring.datasources.meta.mappers.abs
"""
from itertools import groupby

from ..datasource import Datasource

absolute_value = abs


[docs]class map(Datasource): """ Returns a :class:`revscoring.Datasource` that applies a function over a set of items generated by another datasource. :Parameters: apply : `func` A function to apply to each item generated by `items_datasource` items_datasource : :class:`revscoring.Datasource` A datasource that generates a list of some item name : `str` A name for the datasource. """ def __init__(self, apply, items_datasource, name=None): self.apply = apply name = self._format_name(name, [apply, items_datasource]) super().__init__(name, self.process, depends_on=[items_datasource]) def process(self, items): return [self.apply(item) for item in items]
[docs]class lower_case(map): """ Returns a :class:`revscoring.Datasource` that lower cases a list of `str` returned by another datasource. :Parameters: strs_datasource : :class:`revscoring.Datasource` A datasource that generates a list of `str` name : `str` A name for the datasource. """ def __init__(self, strs_datasource, name=None): name = self._format_name(name, [strs_datasource]) super().__init__(self.lower, strs_datasource, name=name) def lower(self, s): return s.replace(u'İ', 'i').lower()
[docs]class derepeat(map): """ Returns a :class:`revscoring.Datasource` that prevents a list of `str` from having repeated characters (e.g. "foo" --> "fo"). :Parameters: strs_datasource : :class:`revscoring.Datasource` A datasource that generates a list of `str` name : `str` A name for the datasource. """ def __init__(self, strs_datasource, name=None): name = self._format_name(name, [strs_datasource]) super().__init__(self.no_repeat, strs_datasource, name=name) def no_repeat(self, s): return ''.join(char for char, group in groupby(s))
class de1337(map): """ Returns a :class:`revscoring.Datasource` that converts numbers in the middle of words into the characters they are often used to represent (e.g. "he7d3r" --> "hetder"). :Parameters: strs_datasource : :class:`revscoring.Datasource` A datasource that generates a list of `str` name : `str` A name for the datasource. """ MAP = {'1': "l", '3': "e", '4': "a", '5': "s", '6': "g", '7': "t", '0': "o", "#": "h", "(": "c"} def __init__(self, strs_datasource, name=None): name = self._format_name(name, [strs_datasource]) super().__init__(self.no_1337, strs_datasource, name=name) def no_1337(self, s): return ''.join(self.MAP.get(c, c) for c in s)
[docs]class abs(map): """ Returns a :class:`revscoring.Datasource` that converts a list of numeric values into a list of absolute numeric values. :Parameters: numbers_datasource : :class:`revscoring.Datasource` A datasource that generates a list of numeric values name : `str` A name for the datasource. """ def __init__(self, numbers_datasource, name=None): super().__init__(absolute_value, numbers_datasource, name=name)