Source code for revscoring.extractors.extractor
"""
.. autoclass:: revscoring.Extractor
.. autoclass:: revscoring.extractors.OfflineExtractor
"""
import logging
import yamlconf
from ..datasources import revision_oriented
from ..dependencies import Context
logger = logging.getLogger(__name__)
[docs]class Extractor(Context):
"""
Implements a context for extracting dependents for a revision or a set of
revisions.
"""
def extract(self, rev_ids, dependents, context=None, caches=None,
cache=None, profile=None):
raise NotImplementedError()
@classmethod
def from_config(cls, config, name, section_key="extractors"):
section = config[section_key][name]
if 'module' in section:
return yamlconf.import_module(section['module'])
elif 'class' in section:
Class = yamlconf.import_module(section['class'])
return Class.from_config(config, name)
[docs]class OfflineExtractor(Extractor):
"""
Implements a context for extracting features for a revision or a set of
revisions that is 100% offline and will fetch no data.
"""
def __init__(self):
super().__init__()
logger.warning("Loading OfflineExtractor. You probably want an " +
"APIExtractor unless this is the test server.")
def extract(self, rev_ids, dependents, context=None, caches=None,
cache=None, profile=None):
caches = caches or {}
if hasattr(rev_ids, "__iter__"):
return self._extract_many(rev_ids, dependents, context=context,
caches=caches, cache=cache,
profile=profile)
else:
rev_id = rev_ids
cache = cache or caches
return self._extract(rev_id, dependents, context=context,
cache=cache, profile=profile)
def _extract(self, rev_id, dependents, context=None, cache=None,
profile=None):
solve_cache = cache if cache is not None else {}
solve_cache[revision_oriented.revision.id] = rev_id
return self.solve(dependents, context=context, cache=solve_cache,
profile=profile)
def _extract_many(self, rev_ids, features, context=None, caches=None,
cache=None, profile=None):
for rev_id in rev_ids:
yield None, self._extract(rev_id, features, context=context,
cache=caches.get(rev_id, cache),
profile=profile)
@classmethod
def from_config(cls, config, name, section_key="extractors"):
return cls()