Zdrojový kód pro aleph.datastructures.semanticinfo

#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Interpreter version: python 2.7
#
"""
Definition of structures, which are used to hold informations about
catalogization process.
"""
# Imports =====================================================================
from collections import namedtuple


from remove_hairs import remove_hairs
from marcxml_parser import MARCXMLRecord


# Functions ===================================================================
def _parse_summaryRecordSysNumber(summaryRecordSysNumber):
    """
    Try to parse vague, not likely machine-readable description and return
    first token, which contains enough numbers in it.
    """
    def number_of_digits(token):
        digits = filter(lambda x: x.isdigit(), token)
        return len(digits)

    tokens = map(
        lambda x: remove_hairs(x, r" .,:;<>(){}[]\/"),
        summaryRecordSysNumber.split()
    )

    # pick only tokens that contains 3 digits
    contains_digits = filter(lambda x: number_of_digits(x) > 3, tokens)

    if not contains_digits:
        return ""

    return contains_digits[0]


# Structures ==================================================================
[dokumentace]class SemanticInfo(namedtuple("SemanticInfo", ["hasAcquisitionFields", "acquisitionFields", "ISBNAgencyFields", "descriptiveCatFields", "descriptiveCatReviewFields", "subjectCatFields", "subjectCatReviewFields", "isClosed", "isSummaryRecord", "contentOfFMT", "parsedSummaryRecordSysNumber", "summaryRecordSysNumber"])): """ This structure is used to represent informations about export progress in Aleph. It contains informations about state of the record, so it can be tracked from edeposit project. See :func:`.toSemanticInfo` for details of parsing of those attributes. Attributes: hasAcquisitionFields (bool): Was the record aproved by acquisition? acquisitionFields (list): Acquisition fields if it the record was signed. ISBNAgencyFields (list): Was the record approved by ISBN agency? Contains list of signs if it the record was signed. descriptiveCatFields (list): Did the record get thru name description (jmenný popis). Contains list of signs if it the record was signed. descriptiveCatReviewFields (list): Did the record get thru name revision (jmenná revize). Contains list of signs if it the record was signed. subjectCatFields (list): Did the record get thru subject description (věcný popis). Contains list of signs if it the record was signed. subjectCatReviewFields (list): Did the record get thru subject revision (věcná revize). Contains list of signs if the record was signed. isClosed (bool): Was the record closed? This sometimes happen when bad ISBN is given by creator of the record, but different is in the book. isSummaryRecord (bool): Is the content of FMT == "SE"? contentOfFMT (str, default ""): Content of FMT subrecord. parsedSummaryRecordSysNumber (str): Same as :attr:`summaryRecordSysNumber` but without natural language details. summaryRecordSysNumber (str): Identificator of the new record if `.isClosed` is True. Format of the string is not specified and can be different for each record. """ @staticmethod
[dokumentace] def from_xml(xml): """ Pick informations from :class:`.MARCXMLRecord` object and use it to build :class:`.SemanticInfo` structure. Args: xml (str/MARCXMLRecord): MarcXML which will be converted to SemanticInfo. In case of str, ``<record>`` tag is required. Returns: structure: :class:`.SemanticInfo`. """ hasAcquisitionFields = False acquisitionFields = [] ISBNAgencyFields = [] descriptiveCatFields = [] descriptiveCatReviewFields = [] subjectCatFields = [] subjectCatReviewFields = [] isClosed = False summaryRecordSysNumber = "" parsedSummaryRecordSysNumber = "" isSummaryRecord = False contentOfFMT = "" parsed = xml if not isinstance(xml, MARCXMLRecord): parsed = MARCXMLRecord(str(xml)) # handle FMT record if "FMT" in parsed.controlfields: contentOfFMT = parsed["FMT"] if contentOfFMT == "SE": isSummaryRecord = True if "HLD" in parsed.datafields or "HLD" in parsed.controlfields: hasAcquisitionFields = True if "STZ" in parsed.datafields: acquisitionFields.extend(parsed["STZa"]) acquisitionFields.extend(parsed["STZb"]) def sign_and_author(sign): """ Sign is stored in ISTa, author's name is in ISTb. Sign is MarcSubrecord obj with pointers to other subrecords, so it is possible to pick references to author's name from signs. """ return [sign.replace(" ", "")] + sign.other_subfields.get("b", []) # look for catalogization fields for orig_sign in parsed["ISTa"]: sign = orig_sign.replace(" ", "") # remove spaces if sign.startswith("jp2"): descriptiveCatFields.extend(sign_and_author(orig_sign)) elif sign.startswith("jr2"): descriptiveCatReviewFields.extend(sign_and_author(orig_sign)) elif sign.startswith("vp"): subjectCatFields.extend(sign_and_author(orig_sign)) elif sign.startswith("vr"): subjectCatReviewFields.extend(sign_and_author(orig_sign)) elif sign.startswith("ii2"): ISBNAgencyFields.extend(sign_and_author(orig_sign)) # look whether the record was 'closed' by catalogizators for status in parsed["BASa"]: if status == "90": isClosed = True # if multiple PJM statuses are present, join them together status = "\n".join([x for x in parsed["PJMa"]]) # detect link to 'new' record, if the old one was 'closed' if status.strip(): summaryRecordSysNumber = status parsedSummaryRecordSysNumber = _parse_summaryRecordSysNumber( summaryRecordSysNumber ) return SemanticInfo( hasAcquisitionFields=hasAcquisitionFields, acquisitionFields=acquisitionFields, ISBNAgencyFields=ISBNAgencyFields, descriptiveCatFields=descriptiveCatFields, descriptiveCatReviewFields=descriptiveCatReviewFields, subjectCatFields=subjectCatFields, subjectCatReviewFields=subjectCatReviewFields, isClosed=isClosed, isSummaryRecord=isSummaryRecord, contentOfFMT=contentOfFMT, parsedSummaryRecordSysNumber=parsedSummaryRecordSysNumber, summaryRecordSysNumber=summaryRecordSysNumber, )