Source code for revscoring.datasources.revision_oriented

"""
Implements a set of datasources oriented off of a single revision.  This is
useful for extracting features of edit and article quality.

.. autodata:: revscoring.datasources.revision_oriented.revision

Supporting classes
++++++++++++++++++

.. autoclass:: revscoring.datasources.revision_oriented.Revision
    :members:
    :member-order: bysource

.. autoclass:: revscoring.datasources.revision_oriented.Diff
    :members:
    :member-order: bysource

.. autoclass:: revscoring.datasources.revision_oriented.Page
    :members:
    :member-order: bysource

.. autoclass:: revscoring.datasources.revision_oriented.Namespace
    :members:
    :member-order: bysource

.. autoclass:: revscoring.datasources.revision_oriented.User
    :members:
    :member-order: bysource

.. autoclass:: revscoring.datasources.revision_oriented.UserInfo
    :members:
    :member-order: bysource

"""
import mwtypes

from ..dependencies import DependentSet
from ..dependencies.util import or_none
from .datasource import Datasource


[docs]class Revision(DependentSet): """ Represents a revision """ def __init__(self, name, include_parent=True, include_user=True, include_user_info=True, include_user_last_revision=False, include_page=True, include_page_creation=False, include_page_suggested=False, include_content=False): super().__init__(name) self.id = Datasource(name + ".id") "`int` : Revision ID" self.timestamp_str = Datasource(name + ".timestamp_str") "`str` : Timestamp the revision was saved in ISO format" self.timestamp = Datasource( name + ".timestamp", or_none(mwtypes.Timestamp), depends_on=[self.timestamp_str]) ":class:`mwtypes.Timestamp`: Timestamp the revision was saved" self.comment = Datasource(name + ".comment") "`str` : The comment saved with the revision" self.byte_len = Datasource(name + ".byte_length") "`int` : The length of the revision content in bytes" self.minor = Datasource(name + ".minor") "`bool` : Was the revision flagged as minor?" self.content_model = Datasource(name + ".content_model") "`str` : Describes the format of revision content" if include_content: self.text = Datasource(name + ".text") "`str` : The decoded (Unicode) text of the revision content" if include_parent: self.parent = Revision( name + ".parent", include_parent=False, include_user_info=False, include_page=False, include_content=include_content, include_page_suggested=False ) """ :class:`~revscoring.datasources.revision_oriented.Revision` : The parent (aka "previous") revision of the page. """ if include_page: self.page = Page( name + ".page", include_creation=include_page_creation, include_suggested=include_page_suggested ) """ :class:`~revscoring.datasources.revision_oriented.Page` : The page in which the revision was saved. """ if include_user: self.user = User( name + ".user", include_info=include_user_info, include_last_revision=include_user_last_revision ) """ :class:`~revscoring.datasources.revision_oriented.User` : The user who saved the revision. """ if include_content and include_parent: self.diff = Diff( name + ".diff" ) """ :class:`~revscoring.datasources.revision_oriented.Diff` : The difference between this revision and the parent revision. """
[docs]class User(DependentSet): """ Represents a user's id and name/ip """ def __init__(self, name, include_info=True, include_last_revision=False): super().__init__(name) self.id = Datasource(name + ".id") "`int` : The id of the user who saved the edit. 0 for IPs." self.text = Datasource(name + ".text") "`str` : The user's name or IP address" if include_info: self.info = UserInfo(name + ".info") """ :class:`~revscoring.datasources.revision_oriented.UserInfo` : Information about the user. """ if include_last_revision: self.last_revision = Revision( name + ".last_revision", include_parent=False, include_user=False, include_content=False, include_page_suggested=False ) """ :class:`~revscoring.datasources.revision_oriented.Revision` : The last revision the user saved before the revision of reference. """
[docs]class UserInfo(DependentSet): """ Represents a user's information """ def __init__(self, name): super().__init__(name) self.editcount = Datasource(name + ".editcount") "`int` : A count of edits the user has ever saved" self.registration_str = Datasource(name + ".registration_str") self.registration = Datasource( name + ".registration", or_none(mwtypes.Timestamp), depends_on=[self.registration_str]) ":class:`mwtypes.Timestamp` : The date the user registered or None" "`str` : The date the user registered in ISO format" self.groups = Datasource(name + ".groups") "`list` ( `str` ) : The groups the user is a member of" self.emailable = Datasource(name + ".emailable") "`bool` : `True` if the users is emailable, `False` otherwise" self.gender = Datasource(name + ".gender") "`str` : A string representing the user's ``gender`` preference."
[docs]class Page(DependentSet): """ Represents a revision's page """ def __init__(self, name, include_creation=False, include_suggested=False): super().__init__(name) self.id = Datasource(name + ".id") "`int` : The page's ID" self.title = Datasource(name + ".title") "`str` : The page's title (namespace stripped)" self.namespace = Namespace(name + ".namespace") """ :class:`~revscoring.datasources.revision_oriented.Namespace` : The namespace information. """ if include_creation: self.creation = Revision( name + ".creation", include_parent=False, include_page=False, include_content=False, include_user_last_revision=False, include_page_suggested=False ) """ :class:`~revscoring.datasources.revision_oriented.Revision` : The first revision to the page. """ if include_suggested: self.suggested = Suggested(name + ".suggestions") """ :class:`~revscoring.datasources.revision_oriented.Suggested" : The set of suggestions for a page. """
class Suggested(DependentSet): """ Represents a set of intelligent suggestions about the structure of a page. """ def __init__(self, name): super().__init__(name) self.properties = Datasource(name + ".properties") "`list` : The set of property suggestions for this page"
[docs]class Namespace(DependentSet): """ Represents a page's namespace """ def __init__(self, name): super().__init__(name) self.id = Datasource(name + ".id") "`int` : The namespace's ID" self.name = Datasource(name + ".name") "`str` : The name of the namespace"
[docs]class Diff(DependentSet): """ Represents the difference between two sequential revisions. """ def __init__(self, name): super().__init__(name)
revision = Revision( "revision", include_page_creation=True, include_content=True, include_user_last_revision=True, include_page_suggested=True ) """ Represents the base revision of interest. Implements this structure: * revision: :class:`~revscoring.datasources.revision_oriented.Revision` * diff: :class:`~revscoring.datasources.revision_oriented.Diff` * user: :class:`~revscoring.datasources.revision_oriented.User` * info: :class:`~revscoring.datasources.revision_oriented.UserInfo` * last_revision: * page: :class:`~revscoring.datasources.revision_oriented.Page` * namespace: :class:`~revscoring.datasources.revision_oriented.Namespace` * page: :class:`~revscoring.datasources.revision_oriented.Page` * namespace: :class:`~revscoring.datasources.revision_oriented.Namespace` * creation: :class:`~revscoring.datasources.revision_oriented.Revision` * parent: :class:`~revscoring.datasources.revision_oriented.Revision` * user: :class:`~revscoring.datasources.revision_oriented.User` """ # noqa