import logging
from datetime import datetime
import mwtypes
from pytz import utc
from revscoring.datasources import revision_oriented
from revscoring.dependencies import DependentSet
from ..feature import Feature
MW_REGISTRATION_EPOCH = mwtypes.Timestamp("2006-01-01T00:00:00Z")
logger = logging.getLogger(__name__)
[docs]class Revision(DependentSet):
"Represents a revision"
def __init__(self, name, revision_datasources):
super().__init__(name)
self.datasources = revision_datasources
self.day_of_week = Feature(
name + ".day_of_week", _process_day_of_week,
returns=int,
depends_on=[revision_datasources.timestamp]
)
"`int` : the day of week when the edit was made (in UTC)"
self.hour_of_day = Feature(
name + ".hour_of_day", _process_hour_of_day,
returns=int,
depends_on=[revision_datasources.timestamp]
)
"`int` : the hour of day when the edit was made (in UTC)"
if hasattr(revision_datasources, "parent"):
self.parent = ParentRevision(
name + ".parent",
revision_datasources
)
"""
:class:`~revscoring.features.temporal.Revision` : The parent (aka
"previous") revision of the page.
"""
if hasattr(revision_datasources, "page") and \
hasattr(revision_datasources.page, "creation"):
self.page = Page(
name + ".page",
revision_datasources
)
"""
:class:`~revscoring.features.temporal.Page` : The
page in which the revision was saved.
"""
if hasattr(revision_datasources, "user") and \
hasattr(revision_datasources.user, "info"):
self.user = User(
name + ".user",
revision_datasources
)
"""
:class:`~revscoring.features.temporal.User` : The user who saved
the revision.
"""
[docs]class ParentRevision(Revision):
"Represents a parent revision"
def __init__(self, name, revision_datasources):
super().__init__(name, revision_datasources.parent)
self.seconds_since = Feature(
name + ".seconds_since",
_process_seconds_since,
returns=int,
depends_on=[revision_datasources.parent.timestamp,
revision_datasources.timestamp])
"`int` : The number of seconds since the parent revision was saved."
[docs]class User(DependentSet):
"Represents a revision user"
def __init__(self, name, revision_datasources):
super().__init__(name)
self.datasources = revision_datasources.user
if hasattr(self.datasources, 'info'):
self.seconds_since_registration = Feature(
name + ".seconds_since_registration",
_process_seconds_since_registration,
returns=int,
depends_on=[revision_datasources.user.id,
revision_datasources.user.info.registration,
revision_datasources.timestamp])
"""
`int` : The number of seconds since the user registered their
account -- or zero in the case of anons. If the user has a
registration date that is *after* the revision timestamp
(should be implossible, but happens sometimes), the user is assumed
to be 1 year old.
"""
if hasattr(self.datasources, 'last_revision'):
self.last_revision = LastUserRevision(
name + ".last_revision",
revision_datasources
)
"""
:class:`~revscoring.features.temporal.Revision` : The last revision
saved by the user.
"""
[docs]class LastUserRevision(Revision):
"Represents a revision user's last revision"
def __init__(self, name, revision_datasources):
super().__init__(name, revision_datasources.user.last_revision)
self.seconds_since = Feature(
name + ".seconds_since",
_process_seconds_since,
returns=int,
depends_on=[revision_datasources.user.last_revision.timestamp,
revision_datasources.timestamp])
"`int`: The number of seconds since the user last saved an edit"
[docs]class Page(DependentSet):
"Represents a revision's page"
def __init__(self, name, revision_datasources):
super().__init__(name)
self.creation = PageCreation(
name + ".creation",
revision_datasources
)
"""
:class:`~revscoring.features.temporal.PageCreation` : The first
revision of the page
"""
[docs]class PageCreation(DependentSet):
"Represents a page's creating revision"
def __init__(self, name, revision_datasources):
super().__init__(name)
self.seconds_since = Feature(
name + ".seconds_since",
_process_seconds_since,
returns=int,
depends_on=[revision_datasources.page.creation.timestamp,
revision_datasources.timestamp])
"`int`: The number of seconds since the page was created"
def _process_day_of_week(timestamp):
if timestamp is None:
return 7 # The day after Sunday.
dt = datetime.fromtimestamp(timestamp.unix(), tz=utc)
return dt.weekday()
def _process_hour_of_day(timestamp):
if timestamp is None:
return 24 # The hour after midnight
dt = datetime.fromtimestamp(timestamp.unix(), tz=utc)
return dt.hour
def _process_seconds_since(old_timestamp, current_timestamp):
if old_timestamp is None:
return 0
else:
return current_timestamp - old_timestamp
def _process_seconds_since_registration(id, registration, timestamp):
if id == 0: # User is anon
return 0
else:
# Handles users who registered before registration dates were
# recorded
registration = registration or MW_REGISTRATION_EPOCH
if registration > timestamp:
# Something is weird. Probably an old user.
logger.info("Timestamp chronology issue {0} < {1}"
.format(timestamp, registration))
return 60 * 60 * 24 * 365 # one year
else:
return _process_seconds_since(registration, timestamp)
revision = Revision("temporal.revision", revision_oriented.revision)
"""
Represents the base revision of interest. Implements this a basic structure:
* revision: :class:`~revscoring.features.temporal.Revision`
* user: :class:`~revscoring.features.temporal.User`
* last_revision: :class:`~revscoring.features.temporal.LastUserRevision`
* page: :class:`~revscoring.features.temporal.Page`
* creation: :class:`~revscoring.features.temporal.PageCreation`
* parent: :class:`~revscoring.features.temporal.ParentRevision`
""" # noqa