Zdrojový kód pro aleph.export

#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Interpreter version: python 2.7
#
"""
This module is used to put data to Aleph. It is based on custom made webform,
which is currently used to report new books by publishers.

Most important function from this module is :func:`exportEPublication`,
which will do everything, that is needed to do, to export
:class:`.EPublication` structure to the Aleph.

Warning:
    This whole module is highly dependent on processes, which are defined as
    import processes at the Czech National Library.

Warning:
    If you want to use export ability in your library, you should rewrite this
    and take care, that you are sending data somewhere, where someone will
    process them. Otherwise, you can fill your library's database with crap.

Note:
    Source code of the webform is not available at this moment (it was created
    by third party), but it is possible, that it will be in future. This will
    highly depend on number of people, which will use this project.
"""
# Imports =====================================================================
import isbn_validator
from httpkie import Downloader

import settings
from datastructures import Author
from datastructures import FormatEnum
from datastructures import EPublication


# Variables ===================================================================
ANNOTATION_PREFIX = "Nakladatelská anotace: "


# Functions & objects =========================================================
[dokumentace]class ExportException(Exception): def __init__(self, message): Exception.__init__(self, message)
[dokumentace]class InvalidISBNException(ExportException): def __init__(self, message): super(InvalidISBNException, self).__init__(message)
[dokumentace]class ExportRejectedException(ExportException): def __init__(self, message): super(ExportRejectedException, self).__init__(message)
[dokumentace]class PostData(object): """ This class is used to transform data from :class:`.EPublication` to dictionary, which is sent as POST request to Aleph third-party webform_. .. _webform: http://aleph.nkp.cz/F/?func=file&file_name=service-isbn Note: Class is used instead of simple function, because there is 29 POST parameters with internal dependencies, which need to be processed and validated before they can be passed to webform. Args: epub (EPublication): structure, which will be converted (see :class:`.EPublication` for details). Attr: _POST (dict): dictionary with parsed data mapping (dict): dictionary with some of mapping, which are applied to :attr:`._POST` dict in post processing Warning: Don't manipulate :attr:`._POST` property directly, if you didn't really know the internal structure and how the :attr:`.mapping` is applied. """ def __init__(self, epub): self._POST = { "sid": settings.EDEPOSIT_EXPORT_SIGNATURE, "P0100LDR__": "-----nam-a22------a-4500", "P0200FMT__": "BK", "P0300BAS__a": "30", # Báze, pro eknihy 49 "P0501010__a": "", # ISBN (uppercase) "P0502010__b": "online", # vazba/forma "P0504010__d": "", # cena # "P1201901__b": "", # ean "P0601010__a": "", # ISBN souboru "P0602010__b": "", # same thing "P07012001_a": "", # název "P07022001_b": "", # vyplneno na zaklade vazby/formy "P07032001_e": "", # podnázev "P07042001_h": "", # Část (svazek, díl) "P07052001_i": "", # Název části "P1301ZAK__b": "", # autor "P1302ZAK__c": "", # autor2 "P1303ZAK__c": "", # autor3 # "P10012252_a": "", # edice # "P10022252_v": "", # Číslo svazku "P110185640u": "", # URL "P0503010__x": "", # Formát (poze pro epublikace) "P0901210__a": "", # Místo vydání "P0902210__c": "", # Nakladatel "P0903210__d": "", # Měsíc a rok vydání "P1401PJM__a": "", # Vydáno v koedici s "P0801205__a": "", # Pořadí vydání "P1501IST1_a": "ow", # Zpracovatel záznamu (hidden) "P1502IST1_b": "", # Zpracovatel záznamu (viditelna) "P1601ISB__a": "", # ISBN2 - validated (hidden) "P1801URL__u": "", # internal URL # "REPEAT": "Y", # predvyplnit zaznam "P1001330__a": "", # anotace } self.mapping = { "mapa": [ "-----nem-a22------a-4500", "MP", "30", "kartografický dokument", "ow" ], "CD-ROM": [ "-----nam-a22------a-4500", "BK", "30", "elektronický zdroj", "ow" ], "online": [ "-----nam-a22------a-4500", "BK", "49", "elektronický zdroj", "ox", ], # "else": [ # "-----nam-a22------a-4500", # "BK", # "30", # "", # "ow" # ], "else": [ "-----nam-a22------a-4500", "BK", "49", "elektronický zdroj", "ox", ] } self.mapping["DVD"] = self.mapping["CD-ROM"] self._import_epublication(epub) def _import_epublication(self, epub): """ Fill internal property ._POST dictionary with data from EPublication. """ # mrs. Svobodová requires that annotation exported by us have this # prefix prefixed_annotation = ANNOTATION_PREFIX + epub.anotace self._POST["P0501010__a"] = epub.ISBN self._POST["P07012001_a"] = epub.nazev self._POST["P07032001_e"] = epub.podnazev self._POST["P0502010__b"] = epub.vazba self._POST["P0504010__d"] = epub.cena self._POST["P07042001_h"] = epub.castDil self._POST["P07052001_i"] = epub.nazevCasti self._POST["P0902210__c"] = epub.nakladatelVydavatel self._POST["P0903210__d"] = epub.datumVydani self._POST["P0801205__a"] = epub.poradiVydani self._POST["P1502IST1_b"] = epub.zpracovatelZaznamu self._POST["P0503010__x"] = epub.format self._POST["P110185640u"] = epub.url or "" self._POST["P0901210__a"] = epub.mistoVydani self._POST["P0601010__a"] = epub.ISBNSouboruPublikaci self._POST["P1801URL__u"] = epub.internal_url self._POST["P1001330__a"] = prefixed_annotation if epub.anotace else "" if len(epub.autori) > 3: epub.autori[2] = ", ".join(epub.autori[2:]) epub.autori = epub.autori[:3] # check whether the autors have required type (string) for author in epub.autori: error_msg = "Bad type of author (%s) (str is required)." assert isinstance(author, basestring), (error_msg % type(author)) authors_fields = ("P1301ZAK__b", "P1302ZAK__c", "P1303ZAK__c") self._POST.update(dict(zip(authors_fields, epub.autori))) def _apply_mapping(self, mapping): """ Map some case specific data to the fields in internal dictionary. """ self._POST["P0100LDR__"] = mapping[0] self._POST["P0200FMT__"] = mapping[1] self._POST["P0300BAS__a"] = mapping[2] self._POST["P07022001_b"] = mapping[3] self._POST["P1501IST1_a"] = mapping[4] def _validate_isbn(self, raw_isbn, accept_blank=False): if raw_isbn and type(raw_isbn) in [tuple, list]: raw_isbn = raw_isbn[0] # blank list -> blank str raw_isbn = raw_isbn or "" if not raw_isbn and accept_blank: return raw_isbn if not isbn_validator.is_valid_isbn(raw_isbn): raise InvalidISBNException( raw_isbn + " has invalid ISBN checksum!" ) return raw_isbn.upper() def _postprocess(self): """ Move data between internal fields, validate them and make sure, that everything is as it should be. """ # validate series ISBN self._POST["P0601010__a"] = self._validate_isbn( self._POST["P0601010__a"], accept_blank=True ) if self._POST["P0601010__a"] != "": self._POST["P0601010__b"] = "soubor : " + self._POST["P0601010__a"] # validate ISBN of the book self._POST["P0501010__a"] = self._validate_isbn( self._POST["P0501010__a"], accept_blank=False ) self._POST["P1601ISB__a"] = self._POST["P0501010__a"] @staticmethod def _czech_isbn_check(isbn_field): isbn_field = isbn_field.replace("-", "").strip() return any([ isbn_field.startswith("80"), isbn_field.startswith("97880"), ]) def _check_required_fields(self): """ Make sure, that internal dictionary contains all fields, which are required by the webform. """ assert self._POST["P0501010__a"] != "", "ISBN is required!" # export script accepts only czech ISBNs for isbn_field_name in ("P0501010__a", "P1601ISB__a"): check = PostData._czech_isbn_check(self._POST[isbn_field_name]) assert check, "Only czech ISBN is accepted!" assert self._POST["P1601ISB__a"] != "", "Hidden ISBN field is required!" assert self._POST["P07012001_a"] != "", "Nazev is required!" assert self._POST["P0901210__a"] != "", "Místo vydání is required!" assert self._POST["P0903210__d"] != "", "Datum vydání is required!" assert self._POST["P0801205__a"] != "", "Pořadí vydání is required!" # Zpracovatel záznamu assert self._POST["P1501IST1_a"] != "", "Zpracovatel is required! (H)" assert self._POST["P1502IST1_b"] != "", "Zpracovatel is required! (V)" # vazba/forma assert self._POST["P0502010__b"] != "", "Vazba/forma is required!" # assert self._POST["P110185640u"] != "", "URL is required!" # Formát (pouze pro epublikace) if self._POST["P0502010__b"] == FormatEnum.ONLINE: assert self._POST["P0503010__x"] != "", "Format is required!" assert self._POST["P0902210__c"] != "", "Nakladatel is required!" def to_unicode(inp): try: return unicode(inp) except UnicodeDecodeError: return unicode(inp, "utf-8") # check lenght of annotation field - try to convert string to unicode, # to count characters, not combination bytes annotation_length = len(to_unicode(self._POST["P1001330__a"])) annotation_length -= len(to_unicode(ANNOTATION_PREFIX)) assert annotation_length <= 500, "Annotation is too long (> 500)."
[dokumentace] def get_POST_data(self): """ Returns: dict: POST data, which can be sent to webform using \ :py:mod:`urllib` or similar library """ self._postprocess() # some fields need to be remapped (depends on type of media) self._apply_mapping( self.mapping.get(self._POST["P0502010__b"], self.mapping["else"]) ) self._check_required_fields() return self._POST
def _sendPostDict(post_dict): """ Send `post_dict` to the :attr:`.ALEPH_EXPORT_URL`. Args: post_dict (dict): dictionary from :class:`PostData.get_POST_data()` Returns: str: Reponse from webform. """ downer = Downloader() downer.headers["Referer"] = settings.EDEPOSIT_EXPORT_REFERER data = downer.download(settings.ALEPH_EXPORT_URL, post=post_dict) rheaders = downer.response_headers error_msg = rheaders.get("aleph-info", "").lower().strip() if "aleph-info" in rheaders and error_msg.startswith("error"): raise ExportRejectedException( "Export request was rejected by import webform: %s" % rheaders["aleph-info"] ) return data def _removeSpecialCharacters(epub): """ Remove most of the unnecessary interpunction from epublication, which can break unimark if not used properly. """ special_chars = "/:,- " epub_dict = epub._asdict() for key in epub_dict.keys(): if isinstance(epub_dict[key], basestring): epub_dict[key] = epub_dict[key].strip(special_chars) elif type(epub_dict[key]) in [tuple, list]: out = [] for item in epub_dict[key]: if not isinstance(item, Author): out.append(item) continue new_item = item._asdict() for key in new_item.keys(): new_item[key] = new_item[key].strip(special_chars) out.append(Author(**new_item)) epub_dict[key] = out return EPublication(**epub_dict)
[dokumentace]def exportEPublication(epub): """ Send `epub` :class:`.EPublication` object to Aleph, where it will be processed by librarians. Args: epub (EPublication): structure for export Warning: The export function is expecting some of the EPublication properties to be filled with non-blank data. Specifically: - :attr:`.EPublication.ISBN` - :attr:`.EPublication.nazev` - :attr:`.EPublication.mistoVydani` - :attr:`.EPublication.datumVydani` - :attr:`.EPublication.poradiVydani` - :attr:`.EPublication.zpracovatelZaznamu` - :attr:`.EPublication.vazba` - :attr:`.EPublication.format` - :attr:`.EPublication.format` - :attr:`.EPublication.nakladatelVydavatel` """ epub = _removeSpecialCharacters(epub) post_dict = PostData(epub).get_POST_data() return _sendPostDict(post_dict)