Source code for press.models.press_element

[docs]class PressElement:
    """Represents a collxml element parsed from a Collection XML file.
    """
    class Empty:
        def alltext():
            return ''

    def __init__(self, tag, attrs=None, text='', tail=''):
        self.tag = tag  # TODO: prevent whitespace in tag name / VALIDATE tag
        self.text = text
        self.tail = tail
        self.attrs = (attrs and attrs.copy()) or {}
        self.children = []
        self.parent = None

    """Make it hashable so that we can convert the tree to a set and then use
    set operations.
    """
    def __hash__(self):
        module_id = self.attrs.get('document', '')
        return hash((self.tag, self.text, self.tail, module_id))

    def __eq__(self, other):
        return hash(self) == hash(other)

    """Represent a tag as it would appear in the source collxml file,
    with the exception that this also includes the trailing text (``tail``)
    prepended with ``...`` (ellipsis).
    """
    def __repr__(self):
        text = self.text or ''
        tail = self.tail and '...{}'.format(self.tail) or ''
        keyvals = [' %s="%s"' % item for item in self.attrs.items()]
        attr_str = ''.join(keyvals)
        return '<%s%s>%s</%s>' % (self.tag, attr_str, text + tail, self.tag)

    def __str__(self):
        text = self.text or ''
        tail = self.tail and '...{}'.format(self.tail) or ''
        keyvals = [' %s="%s"' % item for item in self.attrs.items()]
        attr_str = ''.join(keyvals)
        return '<%s%s>%s</%s>' % (self.tag, attr_str, text + tail, self.tag)

    """Easy way to get an element's (direct, not deeply nested) child by name.
    """
    def __getitem__(self, name):
        ch = dict()
        tags = [c.tag for c in self.children]

        for child in self.children:
            if tags.count(name) > 1:  # more than 1 child with the same name
                ch[child.tag] = [c for c in self.children if c.tag == name]
            else:
                ch[child.tag] = child
        # FIXME: perhaps we can improve error messaging by returning
        #        something other than None
        default = None
        return ch.get(name, default)

    def __getattr__(self, name):
        item = self.__getitem__(name)
        if item:  # see: __bool__
            return item
        else:
            raise AttributeError

    """Make its length be the length of its children."""
    def __len__(self):
        return len(self.children)

    # https://docs.python.org/3/reference/datamodel.html#object.__bool__
    def __bool__(self):
        return True

    """Make it iterable, see also ``iter()``"""
    def __iter__(self):
        return iter(self.children)

    def iter(self, tag=None):
        if tag == '*':
            tag = None
        if tag is None or self.tag == tag:
            yield self

        for child in self:
            yield from child.iter(tag)

    def find(self, tag=None):
        # returns the first matching element within self where tag == tag
        try:
            return tuple(self.iter(tag))[0]
        except IndexError:
            return self.Empty  # not found

    def findall(self, tag=None):
        return tuple(self.iter(tag))

    def find_by_path(self, path):
        path = path.strip('/')
        targets = path.split('/')
        found = None
        for target in targets:
            for elem in self.iter(target):
                if target == elem.tag:
                    found = elem
                    break
        return found

    """Make it a tree"""
    def add_child(self, child):
        # Works like append for XML ElementTree-s
        child.parent = self
        self.children.append(child)
        return self

    def insert_text(self, content):
        content = content.strip()
        if self.text and content:  # if it's already got text, it's a tail.
            return PressElement(self.tag, attrs=self.attrs, text=self.text,
                                tail=content)
        else:
            return PressElement(self.tag, attrs=self.attrs, text=content)

    def _itertext(self):
        if self.text:
            yield self.text
        for e in self:
            for s in e._itertext():
                yield s
            if e.tail:
                yield e.tail

    def alltext(self):
        text = [t for t in self._itertext()]
        title_as_string = ' '.join(text + [self.tail or '']).strip()
        return title_as_string

    def child_number(self, pos):
        try:
            return self.children[pos - 1]
        except IndexError:
            return None

    def attr(self, attr_name):
        default = None
        return self.attrs.get(attr_name, default)
Source code for press.models.press_element

CNX Press

Navigation

Related Topics