Module clean

Produces a clean test database.

The clean slate database does not have contributions, assessments, reviews.

It does have all value tables, but with simplified contents.

Expand source code
"""Produces a clean test database.

The clean slate database does not have contributions, assessments, reviews.

It does have all value tables, but with simplified contents.
"""

import sys
import collections

# from datetime import datetime as dt
from pymongo import MongoClient
from bson.objectid import ObjectId
from hashlib import md5

import magic  # noqa
from config import Config as C, Names as N
from control.utils import now


CB = C.base
CC = C.clean
CT = C.tables

CREATOR = CB.creator
DATABASE = CB.database["test"]

COUNTRY = CC.country
GROUP = CC.group
USER = CC.user
VALUES = CC.values
DECISION = CC.decision
KEY_FIELD = CC.keyField
PROCEDURE = CC.procedure

VALUE_TABLES = CT.valueTables


def info(x):
    sys.stdout.write("{}\n".format(x))


def warning(x):
    sys.stderr.write("{}\n".format(x))


def toHexName(name):
    return md5(bytes(name, "utf-8")).hexdigest()[:10]


def toHexNumber(number):
    return "{:0>6x}".format(number)


def toHexMongo(name, number):
    return "{:0>8x}{}{}".format(0, toHexName(name), toHexNumber(number))


class IdIndex:
    def __init__(self):
        self._idFromName = {}
        self._nameFromId = {}

    def getId(self, name):
        _id = self._idFromName.get(name, None)
        if _id is None:
            _id = ObjectId(name)
            self._idFromName[name] = _id
            self._nameFromId[_id] = name
        return _id

    def getName(self, _id):
        return self._nameFromId[_id]


class MongoId(IdIndex):
    def __init__(self):
        super().__init__()
        self.cur = collections.Counter()

    def newId(self, table):
        self.cur[table] += 1
        return self.getId(toHexMongo(table, self.cur[table]))


def clean():
    db = MongoId()
    allData = collections.defaultdict(list)
    valueDict = collections.defaultdict(dict)
    countryMapping = {}
    userMapping = {}
    groupMapping = {}

    def countryTable():
        table = "country"
        for (iso, info) in sorted(COUNTRY.items()):
            _id = db.newId(table)
            countryMapping[iso] = _id
            allData[table].append(
                dict(
                    _id=_id,
                    iso=iso,
                    name=info["name"],
                    isMember=info["isMember"],
                    latitude=info["latitude"],
                    longitude=info["longitude"],
                )
            )

    def groupTable():
        table = "permissionGroup"
        for (name, description) in GROUP:
            _id = db.newId(table)
            groupMapping[name] = _id
            allData[table].append(dict(_id=_id, rep=name, description=description))

    def userTable():
        table = "user"
        for user in USER:
            _id = db.newId(table)
            u = dict(x for x in user.items())
            u["_id"] = _id
            userMapping[u["eppn"]] = _id
            u["group"] = groupMapping[u["group"]]
            if "country" in u:
                u["country"] = countryMapping[u["country"]]
            allData[table].append(u)

    def relTables():
        for (table, values) in VALUES.items():
            for value in values:
                _id = db.newId(table)
                valueDict[table][value] = _id
                v = dict(_id=_id, rep=value)
                allData[table].append(v)

    def yearTable():
        table = "year"
        targetInterval = list(range(2010, 2030))
        allData[table] = [
            dict(_id=db.newId(table), rep=year) for year in targetInterval
        ]

    def decisionTable():
        table = "decision"
        allData[table] = [
            dict(_id=db.newId(table), **DECISION["values"][decision])
            for decision in DECISION["order"]
        ]

    def backoffice():
        relIndex = collections.defaultdict(dict)

        for tableInfo in PROCEDURE:
            table = tableInfo["name"]
            rows = tableInfo["rows"]
            keyField = KEY_FIELD[table]

            for row in rows:  # deterministic order
                _id = db.newId(table)
                newRow = dict()
                newRow["_id"] = _id
                relIndex[table][row[keyField]] = _id
                for (field, value) in row.items():
                    if field in {"startDate", "endDate"}:
                        # newRow[field] = dt.fromisoformat(value)
                        newRow[field] = value  # yaml has already converted the datetime
                    elif field == "creator":
                        newRow[field] = userMapping[value]
                    elif (
                        table == N.package
                        and field == N.typeContribution
                        or table == N.criteria
                        and field == N.typeContribution
                    ):
                        newRow[field] = [relIndex[field][val] for val in value]
                    elif (
                        table == N.criteria
                        and field == N.package
                        or table == N.score
                        and field == N.criteria
                    ):
                        newRow[field] = relIndex[field][value]
                    else:
                        newRow[field] = value
                allData[table].append(newRow)
        for tableInfo in PROCEDURE:
            table = tableInfo["name"]
            keyField = KEY_FIELD[table]

            if keyField == "key":
                for row in allData[table]:
                    del row["key"]

    def importMongo():
        client = MongoClient()
        sys.stdout.write(f"RESET the DATABASE {DATABASE} ... ")
        client.drop_database(DATABASE)
        db = client[DATABASE]
        for (table, rows) in allData.items():
            db[table].insert_many(list(rows))

        justNow = now()
        for table in VALUE_TABLES:
            db.collect.update_one(
                {"table": table}, {"$set": {"dateCollected": justNow}}, upsert=True,
            )
        sys.stdout.write("DONE\n")

    countryTable()
    groupTable()
    userTable()
    relTables()
    yearTable()
    decisionTable()
    backoffice()
    importMongo()


if __name__ == "__main__":
    clean()

Functions

def clean()
Expand source code
def clean():
    db = MongoId()
    allData = collections.defaultdict(list)
    valueDict = collections.defaultdict(dict)
    countryMapping = {}
    userMapping = {}
    groupMapping = {}

    def countryTable():
        table = "country"
        for (iso, info) in sorted(COUNTRY.items()):
            _id = db.newId(table)
            countryMapping[iso] = _id
            allData[table].append(
                dict(
                    _id=_id,
                    iso=iso,
                    name=info["name"],
                    isMember=info["isMember"],
                    latitude=info["latitude"],
                    longitude=info["longitude"],
                )
            )

    def groupTable():
        table = "permissionGroup"
        for (name, description) in GROUP:
            _id = db.newId(table)
            groupMapping[name] = _id
            allData[table].append(dict(_id=_id, rep=name, description=description))

    def userTable():
        table = "user"
        for user in USER:
            _id = db.newId(table)
            u = dict(x for x in user.items())
            u["_id"] = _id
            userMapping[u["eppn"]] = _id
            u["group"] = groupMapping[u["group"]]
            if "country" in u:
                u["country"] = countryMapping[u["country"]]
            allData[table].append(u)

    def relTables():
        for (table, values) in VALUES.items():
            for value in values:
                _id = db.newId(table)
                valueDict[table][value] = _id
                v = dict(_id=_id, rep=value)
                allData[table].append(v)

    def yearTable():
        table = "year"
        targetInterval = list(range(2010, 2030))
        allData[table] = [
            dict(_id=db.newId(table), rep=year) for year in targetInterval
        ]

    def decisionTable():
        table = "decision"
        allData[table] = [
            dict(_id=db.newId(table), **DECISION["values"][decision])
            for decision in DECISION["order"]
        ]

    def backoffice():
        relIndex = collections.defaultdict(dict)

        for tableInfo in PROCEDURE:
            table = tableInfo["name"]
            rows = tableInfo["rows"]
            keyField = KEY_FIELD[table]

            for row in rows:  # deterministic order
                _id = db.newId(table)
                newRow = dict()
                newRow["_id"] = _id
                relIndex[table][row[keyField]] = _id
                for (field, value) in row.items():
                    if field in {"startDate", "endDate"}:
                        # newRow[field] = dt.fromisoformat(value)
                        newRow[field] = value  # yaml has already converted the datetime
                    elif field == "creator":
                        newRow[field] = userMapping[value]
                    elif (
                        table == N.package
                        and field == N.typeContribution
                        or table == N.criteria
                        and field == N.typeContribution
                    ):
                        newRow[field] = [relIndex[field][val] for val in value]
                    elif (
                        table == N.criteria
                        and field == N.package
                        or table == N.score
                        and field == N.criteria
                    ):
                        newRow[field] = relIndex[field][value]
                    else:
                        newRow[field] = value
                allData[table].append(newRow)
        for tableInfo in PROCEDURE:
            table = tableInfo["name"]
            keyField = KEY_FIELD[table]

            if keyField == "key":
                for row in allData[table]:
                    del row["key"]

    def importMongo():
        client = MongoClient()
        sys.stdout.write(f"RESET the DATABASE {DATABASE} ... ")
        client.drop_database(DATABASE)
        db = client[DATABASE]
        for (table, rows) in allData.items():
            db[table].insert_many(list(rows))

        justNow = now()
        for table in VALUE_TABLES:
            db.collect.update_one(
                {"table": table}, {"$set": {"dateCollected": justNow}}, upsert=True,
            )
        sys.stdout.write("DONE\n")

    countryTable()
    groupTable()
    userTable()
    relTables()
    yearTable()
    decisionTable()
    backoffice()
    importMongo()
def info(x)
Expand source code
def info(x):
    sys.stdout.write("{}\n".format(x))
def toHexMongo(name, number)
Expand source code
def toHexMongo(name, number):
    return "{:0>8x}{}{}".format(0, toHexName(name), toHexNumber(number))
def toHexName(name)
Expand source code
def toHexName(name):
    return md5(bytes(name, "utf-8")).hexdigest()[:10]
def toHexNumber(number)
Expand source code
def toHexNumber(number):
    return "{:0>6x}".format(number)
def warning(x)
Expand source code
def warning(x):
    sys.stderr.write("{}\n".format(x))

Classes

class IdIndex
Expand source code
class IdIndex:
    def __init__(self):
        self._idFromName = {}
        self._nameFromId = {}

    def getId(self, name):
        _id = self._idFromName.get(name, None)
        if _id is None:
            _id = ObjectId(name)
            self._idFromName[name] = _id
            self._nameFromId[_id] = name
        return _id

    def getName(self, _id):
        return self._nameFromId[_id]

Subclasses

Methods

def getId(self, name)
Expand source code
def getId(self, name):
    _id = self._idFromName.get(name, None)
    if _id is None:
        _id = ObjectId(name)
        self._idFromName[name] = _id
        self._nameFromId[_id] = name
    return _id
def getName(self, _id)
Expand source code
def getName(self, _id):
    return self._nameFromId[_id]
class MongoId
Expand source code
class MongoId(IdIndex):
    def __init__(self):
        super().__init__()
        self.cur = collections.Counter()

    def newId(self, table):
        self.cur[table] += 1
        return self.getId(toHexMongo(table, self.cur[table]))

Ancestors

Methods

def newId(self, table)
Expand source code
def newId(self, table):
    self.cur[table] += 1
    return self.getId(toHexMongo(table, self.cur[table]))