Current File : /home/jvzmxxx/wiki1/extensions/EventLogging/server/eventlogging/schema.py
# -*- coding: utf-8 -*-
"""
  eventlogging.schema
  ~~~~~~~~~~~~~~~~~~~

  This module implements schema retrieval and validation. Schemas are
  referenced via SCIDs, which are tuples of (Schema name, Revision ID).
  Schemas are retrieved via HTTP and then cached in-memory. Validation
  uses :module:`jsonschema`.

"""
from __future__ import unicode_literals

import re

import jsonschema

import socket
import time

from .compat import integer_types, json, http_get, string_types

import uuid

__all__ = (
    'CAPSULE_SCID', 'create_event_error', 'get_schema',
    'SCHEMA_URL_FORMAT', 'validate'
)


# Regular expression which matches valid schema names.
SCHEMA_RE_PATTERN = r'[a-zA-Z0-9_-]{1,63}'
SCHEMA_RE = re.compile(r'^{0}$'.format(SCHEMA_RE_PATTERN))

# These REs will be used when constructing an ErrorEvent
# to extract the schema and revision out of a raw event
# string in the case it cannot be parsed as JSON.
RAW_SCHEMA_RE = re.compile(
    r'%22schema%22%3A%22({0})%22'.format(SCHEMA_RE_PATTERN)
)
RAW_REVISION_RE = re.compile(r'%22revision%22%3A(\d+)')

# URL of index.php on the schema wiki (same as
# '$wgEventLoggingSchemaApiUri').
SCHEMA_WIKI_API = 'https://meta.wikimedia.org/w/api.php'

# Template for schema article URLs. Interpolates SCIDs.
SCHEMA_URL_FORMAT = (
    SCHEMA_WIKI_API + '?action=jsonschema&title=%s&revid=%s&formatversion=2'
)

# Schemas retrieved via HTTP are cached in this dictionary.
schema_cache = {}

# SCID of the metadata object which wraps each event.
CAPSULE_SCID = ('EventCapsule', 10981547)

# TODO:
ERROR_SCID = ('EventError', 14035058)


def get_schema(scid, encapsulate=False):
    """Get schema from memory or HTTP."""
    schema = schema_cache.get(scid)
    if schema is None:
        schema = http_get_schema(scid)
        schema_cache[scid] = schema
    # We depart from the JSON Schema specifications by disallowing
    # additional properties by default.
    # See `<https://bugzilla.wikimedia.org/show_bug.cgi?id=44454>`_.
    schema.setdefault('additionalProperties', False)
    if encapsulate:
        capsule = get_schema(CAPSULE_SCID)
        capsule['properties']['event'] = schema
        return capsule
    return schema


def http_get_schema(scid):
    """Retrieve schema via HTTP."""
    validate_scid(scid)
    url = SCHEMA_URL_FORMAT % scid
    try:
        schema = json.loads(http_get(url))
    except (ValueError, EnvironmentError) as ex:
        raise jsonschema.SchemaError('Schema fetch failure: %s' % ex)
    jsonschema.Draft3Validator.check_schema(schema)
    return schema


def validate_scid(scid):
    """Validates an SCID.
    :raises :exc:`jsonschema.ValidationError`: If SCID is invalid.
    """
    schema, revision = scid
    if not isinstance(revision, integer_types) or revision < 1:
        raise jsonschema.ValidationError('Invalid revision ID: %s' % revision)
    if not isinstance(schema, string_types) or not SCHEMA_RE.match(schema):
        raise jsonschema.ValidationError('Invalid schema name: %s' % schema)


def validate(capsule):
    """Validates an encapsulated event.
    :raises :exc:`jsonschema.ValidationError`: If event is invalid.
    """
    try:
        scid = capsule['schema'], capsule['revision']
    except KeyError as ex:
        # If `schema` or `revision` keys are missing, a KeyError
        # exception will be raised. We re-raise it as a
        # :exc:`ValidationError` to provide a simpler API for callers.
        raise jsonschema.ValidationError('Missing key: %s' % ex)
    schema = get_schema(scid, encapsulate=True)
    jsonschema.Draft3Validator(schema).validate(capsule)


def create_event_error(
    raw_event,
    error_message,
    error_code,
    parsed_event=None
):
    """
    Creates an EventError around this raw_event string.
    If parsed_event is provided, The raw event's schema and revision
    will be included in the ErrorEvent as event.schema and event.revision.
    Otherwise these will be attempted to be extracted from the raw_event via
    a regex.  If this still fails, these will be set to 'unknown' and -1.
    """
    errored_schema = 'unknown'
    errored_revision = -1

    # If we've got a parsed event, then we can just get the schema
    # and revision out of the object.
    if parsed_event:
        errored_schema = parsed_event.get('schema', 'unknown')
        errored_revision = int(parsed_event.get('revision', -1))

    # otherwise attempt to get them out of the raw_event with a regex
    else:
        schema_match = RAW_SCHEMA_RE.search(raw_event)
        if schema_match:
            errored_schema = schema_match.group(1)

        revision_match = RAW_REVISION_RE.search(raw_event)
        if revision_match:
            errored_revision = int(revision_match.group(1))

    return {
        'schema': ERROR_SCID[0],
        'revision': ERROR_SCID[1],
        'wiki': '',
        'uuid': '%032x' % uuid.uuid1().int,
        'recvFrom': socket.getfqdn(),
        'timestamp': int(round(time.time())),
        'event': {
            'rawEvent': raw_event,
            'message': error_message,
            'code': error_code,
            'schema': errored_schema,
            'revision': errored_revision
        }
    }