Source code for press.models.press_element

[docs]class PressElement: """Represents a collxml element parsed from a Collection XML file. """ class Empty: def alltext(): return '' def __init__(self, tag, attrs=None, text='', tail=''): self.tag = tag # TODO: prevent whitespace in tag name / VALIDATE tag self.text = text self.tail = tail self.attrs = (attrs and attrs.copy()) or {} self.children = [] self.parent = None """Make it hashable so that we can convert the tree to a set and then use set operations. """ def __hash__(self): module_id = self.attrs.get('document', '') return hash((self.tag, self.text, self.tail, module_id)) def __eq__(self, other): return hash(self) == hash(other) """Represent a tag as it would appear in the source collxml file, with the exception that this also includes the trailing text (``tail``) prepended with ``...`` (ellipsis). """ def __repr__(self): text = self.text or '' tail = self.tail and '...{}'.format(self.tail) or '' keyvals = [' %s="%s"' % item for item in self.attrs.items()] attr_str = ''.join(keyvals) return '<%s%s>%s</%s>' % (self.tag, attr_str, text + tail, self.tag) def __str__(self): text = self.text or '' tail = self.tail and '...{}'.format(self.tail) or '' keyvals = [' %s="%s"' % item for item in self.attrs.items()] attr_str = ''.join(keyvals) return '<%s%s>%s</%s>' % (self.tag, attr_str, text + tail, self.tag) """Easy way to get an element's (direct, not deeply nested) child by name. """ def __getitem__(self, name): ch = dict() tags = [c.tag for c in self.children] for child in self.children: if tags.count(name) > 1: # more than 1 child with the same name ch[child.tag] = [c for c in self.children if c.tag == name] else: ch[child.tag] = child # FIXME: perhaps we can improve error messaging by returning # something other than None default = None return ch.get(name, default) def __getattr__(self, name): item = self.__getitem__(name) if item: # see: __bool__ return item else: raise AttributeError """Make its length be the length of its children.""" def __len__(self): return len(self.children) # https://docs.python.org/3/reference/datamodel.html#object.__bool__ def __bool__(self): return True """Make it iterable, see also ``iter()``""" def __iter__(self): return iter(self.children) def iter(self, tag=None): if tag == '*': tag = None if tag is None or self.tag == tag: yield self for child in self: yield from child.iter(tag) def find(self, tag=None): # returns the first matching element within self where tag == tag try: return tuple(self.iter(tag))[0] except IndexError: return self.Empty # not found def findall(self, tag=None): return tuple(self.iter(tag)) def find_by_path(self, path): path = path.strip('/') targets = path.split('/') found = None for target in targets: for elem in self.iter(target): if target == elem.tag: found = elem break return found """Make it a tree""" def add_child(self, child): # Works like append for XML ElementTree-s child.parent = self self.children.append(child) return self def insert_text(self, content): content = content.strip() if self.text and content: # if it's already got text, it's a tail. return PressElement(self.tag, attrs=self.attrs, text=self.text, tail=content) else: return PressElement(self.tag, attrs=self.attrs, text=content) def _itertext(self): if self.text: yield self.text for e in self: for s in e._itertext(): yield s if e.tail: yield e.tail def alltext(self): text = [t for t in self._itertext()] title_as_string = ' '.join(text + [self.tail or '']).strip() return title_as_string def child_number(self, pos): try: return self.children[pos - 1] except IndexError: return None def attr(self, attr_name): default = None return self.attrs.get(attr_name, default)