Source code for press.parsers.common
import re
from functools import partial
from lxml import etree
from litezip.main import COLLECTION_NSMAP
__all__ = (
'make_elm_tree',
'make_cnx_xpath',
'parse_common_properties',
)
def _maybe(a_list):
"""Grab the first value if it exists."""
try:
return a_list[0]
except IndexError:
return None
[docs]def make_elm_tree(model):
"""Makes an element-like object (:mod:`lxml.etree`) from a litezip model
(:class:`litezip.Collection` or :class:`litezip.Module`).
"""
with model.file.open() as fb:
elm_tree = etree.parse(fb)
return elm_tree
[docs]def make_cnx_xpath(elm_tree):
"""Makes an xpath function that includes the CNX namespaces.
:param elm_tree: the xml element to begin the xpath from
:type elm_tree: an element-like object from :mod:`lxml.etree`
"""
return partial(elm_tree.xpath, namespaces=COLLECTION_NSMAP)
def _squash_to_text(elm, remove_namespaces=False):
if elm is None:
return None
value = [elm.text or '']
for child in elm.getchildren():
value.append(etree.tostring(child).decode('utf-8').strip())
if remove_namespaces:
value = [re.sub(' xmlns:?[^=]*="[^"]*"', '', v) for v in value]
value = ''.join(value)
return value
[docs]def parse_common_properties(elm_tree):
"""Given an element-like object (:mod:`lxml.etree`)
lookup the common and return the properties.
:param elm_tree: the root xml element
:type elm_tree: an element-like object from :mod:`lxml.etree`
:returns: common metadata properties
:rtype: dict
.. note::
Press does not parse or update user information (aka "actor" in
the xml). User modifications should be done using the "legacy"
software.
"""
xpath = make_cnx_xpath(elm_tree)
role_xpath = lambda xp: tuple(xpath(xp)[0].split()) # noqa: E731
props = {
'id': _maybe(xpath('//md:content-id/text()')),
'version': xpath('//md:version/text()')[0],
'created': xpath('//md:created/text()')[0],
'revised': xpath('//md:revised/text()')[0],
'title': xpath('//md:title/text()')[0],
'license_url': xpath('//md:license/@url')[0],
'language': xpath('//md:language/text()')[0],
'authors': role_xpath('//md:role[@type="author"]/text()'),
'maintainers': role_xpath('//md:role[@type="maintainer"]/text()'),
'licensors': role_xpath('//md:role[@type="licensor"]/text()'),
'keywords': tuple(xpath('//md:keywordlist/md:keyword/text()')),
'subjects': tuple(xpath('//md:subjectlist/md:subject/text()')),
'abstract': _squash_to_text(_maybe(xpath('//md:abstract')),
remove_namespaces=True),
}
return props