diff options
Diffstat (limited to 'lib/python2.7/site-packages/Twisted-12.2.0-py2.7-linux-x86_64.egg/twisted/web/microdom.py')
-rwxr-xr-x | lib/python2.7/site-packages/Twisted-12.2.0-py2.7-linux-x86_64.egg/twisted/web/microdom.py | 1028 |
1 files changed, 0 insertions, 1028 deletions
diff --git a/lib/python2.7/site-packages/Twisted-12.2.0-py2.7-linux-x86_64.egg/twisted/web/microdom.py b/lib/python2.7/site-packages/Twisted-12.2.0-py2.7-linux-x86_64.egg/twisted/web/microdom.py deleted file mode 100755 index ca356120..00000000 --- a/lib/python2.7/site-packages/Twisted-12.2.0-py2.7-linux-x86_64.egg/twisted/web/microdom.py +++ /dev/null @@ -1,1028 +0,0 @@ -# -*- test-case-name: twisted.web.test.test_xml -*- -# Copyright (c) Twisted Matrix Laboratories. -# See LICENSE for details. - -""" -Micro Document Object Model: a partial DOM implementation with SUX. - -This is an implementation of what we consider to be the useful subset of the -DOM. The chief advantage of this library is that, not being burdened with -standards compliance, it can remain very stable between versions. We can also -implement utility 'pythonic' ways to access and mutate the XML tree. - -Since this has not subjected to a serious trial by fire, it is not recommended -to use this outside of Twisted applications. However, it seems to work just -fine for the documentation generator, which parses a fairly representative -sample of XML. - -Microdom mainly focuses on working with HTML and XHTML. -""" - -# System Imports -import re -from cStringIO import StringIO - -# create NodeList class -from types import ListType as NodeList -from types import StringTypes, UnicodeType - -# Twisted Imports -from twisted.web.sux import XMLParser, ParseError -from twisted.python.util import InsensitiveDict - - -def getElementsByTagName(iNode, name): - """ - Return a list of all child elements of C{iNode} with a name matching - C{name}. - - Note that this implementation does not conform to the DOM Level 1 Core - specification because it may return C{iNode}. - - @param iNode: An element at which to begin searching. If C{iNode} has a - name matching C{name}, it will be included in the result. - - @param name: A C{str} giving the name of the elements to return. - - @return: A C{list} of direct or indirect child elements of C{iNode} with - the name C{name}. This may include C{iNode}. - """ - matches = [] - matches_append = matches.append # faster lookup. don't do this at home - slice = [iNode] - while len(slice)>0: - c = slice.pop(0) - if c.nodeName == name: - matches_append(c) - slice[:0] = c.childNodes - return matches - - - -def getElementsByTagNameNoCase(iNode, name): - name = name.lower() - matches = [] - matches_append = matches.append - slice=[iNode] - while len(slice)>0: - c = slice.pop(0) - if c.nodeName.lower() == name: - matches_append(c) - slice[:0] = c.childNodes - return matches - -# order is important -HTML_ESCAPE_CHARS = (('&', '&'), # don't add any entities before this one - ('<', '<'), - ('>', '>'), - ('"', '"')) -REV_HTML_ESCAPE_CHARS = list(HTML_ESCAPE_CHARS) -REV_HTML_ESCAPE_CHARS.reverse() - -XML_ESCAPE_CHARS = HTML_ESCAPE_CHARS + (("'", '''),) -REV_XML_ESCAPE_CHARS = list(XML_ESCAPE_CHARS) -REV_XML_ESCAPE_CHARS.reverse() - -def unescape(text, chars=REV_HTML_ESCAPE_CHARS): - "Perform the exact opposite of 'escape'." - for s, h in chars: - text = text.replace(h, s) - return text - -def escape(text, chars=HTML_ESCAPE_CHARS): - "Escape a few XML special chars with XML entities." - for s, h in chars: - text = text.replace(s, h) - return text - - -class MismatchedTags(Exception): - - def __init__(self, filename, expect, got, endLine, endCol, begLine, begCol): - (self.filename, self.expect, self.got, self.begLine, self.begCol, self.endLine, - self.endCol) = filename, expect, got, begLine, begCol, endLine, endCol - - def __str__(self): - return ("expected </%s>, got </%s> line: %s col: %s, began line: %s col: %s" - % (self.expect, self.got, self.endLine, self.endCol, self.begLine, - self.begCol)) - - -class Node(object): - nodeName = "Node" - - def __init__(self, parentNode=None): - self.parentNode = parentNode - self.childNodes = [] - - def isEqualToNode(self, other): - """ - Compare this node to C{other}. If the nodes have the same number of - children and corresponding children are equal to each other, return - C{True}, otherwise return C{False}. - - @type other: L{Node} - @rtype: C{bool} - """ - if len(self.childNodes) != len(other.childNodes): - return False - for a, b in zip(self.childNodes, other.childNodes): - if not a.isEqualToNode(b): - return False - return True - - def writexml(self, stream, indent='', addindent='', newl='', strip=0, - nsprefixes={}, namespace=''): - raise NotImplementedError() - - def toxml(self, indent='', addindent='', newl='', strip=0, nsprefixes={}, - namespace=''): - s = StringIO() - self.writexml(s, indent, addindent, newl, strip, nsprefixes, namespace) - rv = s.getvalue() - return rv - - def writeprettyxml(self, stream, indent='', addindent=' ', newl='\n', strip=0): - return self.writexml(stream, indent, addindent, newl, strip) - - def toprettyxml(self, indent='', addindent=' ', newl='\n', strip=0): - return self.toxml(indent, addindent, newl, strip) - - def cloneNode(self, deep=0, parent=None): - raise NotImplementedError() - - def hasChildNodes(self): - if self.childNodes: - return 1 - else: - return 0 - - - def appendChild(self, child): - """ - Make the given L{Node} the last child of this node. - - @param child: The L{Node} which will become a child of this node. - - @raise TypeError: If C{child} is not a C{Node} instance. - """ - if not isinstance(child, Node): - raise TypeError("expected Node instance") - self.childNodes.append(child) - child.parentNode = self - - - def insertBefore(self, new, ref): - """ - Make the given L{Node} C{new} a child of this node which comes before - the L{Node} C{ref}. - - @param new: A L{Node} which will become a child of this node. - - @param ref: A L{Node} which is already a child of this node which - C{new} will be inserted before. - - @raise TypeError: If C{new} or C{ref} is not a C{Node} instance. - - @return: C{new} - """ - if not isinstance(new, Node) or not isinstance(ref, Node): - raise TypeError("expected Node instance") - i = self.childNodes.index(ref) - new.parentNode = self - self.childNodes.insert(i, new) - return new - - - def removeChild(self, child): - """ - Remove the given L{Node} from this node's children. - - @param child: A L{Node} which is a child of this node which will no - longer be a child of this node after this method is called. - - @raise TypeError: If C{child} is not a C{Node} instance. - - @return: C{child} - """ - if not isinstance(child, Node): - raise TypeError("expected Node instance") - if child in self.childNodes: - self.childNodes.remove(child) - child.parentNode = None - return child - - def replaceChild(self, newChild, oldChild): - """ - Replace a L{Node} which is already a child of this node with a - different node. - - @param newChild: A L{Node} which will be made a child of this node. - - @param oldChild: A L{Node} which is a child of this node which will - give up its position to C{newChild}. - - @raise TypeError: If C{newChild} or C{oldChild} is not a C{Node} - instance. - - @raise ValueError: If C{oldChild} is not a child of this C{Node}. - """ - if not isinstance(newChild, Node) or not isinstance(oldChild, Node): - raise TypeError("expected Node instance") - if oldChild.parentNode is not self: - raise ValueError("oldChild is not a child of this node") - self.childNodes[self.childNodes.index(oldChild)] = newChild - oldChild.parentNode = None - newChild.parentNode = self - - - def lastChild(self): - return self.childNodes[-1] - - - def firstChild(self): - if len(self.childNodes): - return self.childNodes[0] - return None - - #def get_ownerDocument(self): - # """This doesn't really get the owner document; microdom nodes - # don't even have one necessarily. This gets the root node, - # which is usually what you really meant. - # *NOT DOM COMPLIANT.* - # """ - # node=self - # while (node.parentNode): node=node.parentNode - # return node - #ownerDocument=node.get_ownerDocument() - # leaving commented for discussion; see also domhelpers.getParents(node) - -class Document(Node): - - def __init__(self, documentElement=None): - Node.__init__(self) - if documentElement: - self.appendChild(documentElement) - - def cloneNode(self, deep=0, parent=None): - d = Document() - d.doctype = self.doctype - if deep: - newEl = self.documentElement.cloneNode(1, self) - else: - newEl = self.documentElement - d.appendChild(newEl) - return d - - doctype = None - - def isEqualToDocument(self, n): - return (self.doctype == n.doctype) and Node.isEqualToNode(self, n) - isEqualToNode = isEqualToDocument - - def get_documentElement(self): - return self.childNodes[0] - documentElement=property(get_documentElement) - - def appendChild(self, child): - """ - Make the given L{Node} the I{document element} of this L{Document}. - - @param child: The L{Node} to make into this L{Document}'s document - element. - - @raise ValueError: If this document already has a document element. - """ - if self.childNodes: - raise ValueError("Only one element per document.") - Node.appendChild(self, child) - - def writexml(self, stream, indent='', addindent='', newl='', strip=0, - nsprefixes={}, namespace=''): - stream.write('<?xml version="1.0"?>' + newl) - if self.doctype: - stream.write("<!DOCTYPE "+self.doctype+">" + newl) - self.documentElement.writexml(stream, indent, addindent, newl, strip, - nsprefixes, namespace) - - # of dubious utility (?) - def createElement(self, name, **kw): - return Element(name, **kw) - - def createTextNode(self, text): - return Text(text) - - def createComment(self, text): - return Comment(text) - - def getElementsByTagName(self, name): - if self.documentElement.caseInsensitive: - return getElementsByTagNameNoCase(self, name) - return getElementsByTagName(self, name) - - def getElementById(self, id): - childNodes = self.childNodes[:] - while childNodes: - node = childNodes.pop(0) - if node.childNodes: - childNodes.extend(node.childNodes) - if hasattr(node, 'getAttribute') and node.getAttribute("id") == id: - return node - - -class EntityReference(Node): - - def __init__(self, eref, parentNode=None): - Node.__init__(self, parentNode) - self.eref = eref - self.nodeValue = self.data = "&" + eref + ";" - - def isEqualToEntityReference(self, n): - if not isinstance(n, EntityReference): - return 0 - return (self.eref == n.eref) and (self.nodeValue == n.nodeValue) - isEqualToNode = isEqualToEntityReference - - def writexml(self, stream, indent='', addindent='', newl='', strip=0, - nsprefixes={}, namespace=''): - stream.write(self.nodeValue) - - def cloneNode(self, deep=0, parent=None): - return EntityReference(self.eref, parent) - - -class CharacterData(Node): - - def __init__(self, data, parentNode=None): - Node.__init__(self, parentNode) - self.value = self.data = self.nodeValue = data - - def isEqualToCharacterData(self, n): - return self.value == n.value - isEqualToNode = isEqualToCharacterData - - -class Comment(CharacterData): - """A comment node.""" - - def writexml(self, stream, indent='', addindent='', newl='', strip=0, - nsprefixes={}, namespace=''): - val=self.data - if isinstance(val, UnicodeType): - val=val.encode('utf8') - stream.write("<!--%s-->" % val) - - def cloneNode(self, deep=0, parent=None): - return Comment(self.nodeValue, parent) - - -class Text(CharacterData): - - def __init__(self, data, parentNode=None, raw=0): - CharacterData.__init__(self, data, parentNode) - self.raw = raw - - - def isEqualToNode(self, other): - """ - Compare this text to C{text}. If the underlying values and the C{raw} - flag are the same, return C{True}, otherwise return C{False}. - """ - return ( - CharacterData.isEqualToNode(self, other) and - self.raw == other.raw) - - - def cloneNode(self, deep=0, parent=None): - return Text(self.nodeValue, parent, self.raw) - - def writexml(self, stream, indent='', addindent='', newl='', strip=0, - nsprefixes={}, namespace=''): - if self.raw: - val = self.nodeValue - if not isinstance(val, StringTypes): - val = str(self.nodeValue) - else: - v = self.nodeValue - if not isinstance(v, StringTypes): - v = str(v) - if strip: - v = ' '.join(v.split()) - val = escape(v) - if isinstance(val, UnicodeType): - val = val.encode('utf8') - stream.write(val) - - def __repr__(self): - return "Text(%s" % repr(self.nodeValue) + ')' - - -class CDATASection(CharacterData): - def cloneNode(self, deep=0, parent=None): - return CDATASection(self.nodeValue, parent) - - def writexml(self, stream, indent='', addindent='', newl='', strip=0, - nsprefixes={}, namespace=''): - stream.write("<![CDATA[") - stream.write(self.nodeValue) - stream.write("]]>") - -def _genprefix(): - i = 0 - while True: - yield 'p' + str(i) - i = i + 1 -genprefix = _genprefix().next - -class _Attr(CharacterData): - "Support class for getAttributeNode." - -class Element(Node): - - preserveCase = 0 - caseInsensitive = 1 - nsprefixes = None - - def __init__(self, tagName, attributes=None, parentNode=None, - filename=None, markpos=None, - caseInsensitive=1, preserveCase=0, - namespace=None): - Node.__init__(self, parentNode) - self.preserveCase = preserveCase or not caseInsensitive - self.caseInsensitive = caseInsensitive - if not preserveCase: - tagName = tagName.lower() - if attributes is None: - self.attributes = {} - else: - self.attributes = attributes - for k, v in self.attributes.items(): - self.attributes[k] = unescape(v) - - if caseInsensitive: - self.attributes = InsensitiveDict(self.attributes, - preserve=preserveCase) - - self.endTagName = self.nodeName = self.tagName = tagName - self._filename = filename - self._markpos = markpos - self.namespace = namespace - - def addPrefixes(self, pfxs): - if self.nsprefixes is None: - self.nsprefixes = pfxs - else: - self.nsprefixes.update(pfxs) - - def endTag(self, endTagName): - if not self.preserveCase: - endTagName = endTagName.lower() - self.endTagName = endTagName - - def isEqualToElement(self, n): - if self.caseInsensitive: - return ((self.attributes == n.attributes) - and (self.nodeName.lower() == n.nodeName.lower())) - return (self.attributes == n.attributes) and (self.nodeName == n.nodeName) - - - def isEqualToNode(self, other): - """ - Compare this element to C{other}. If the C{nodeName}, C{namespace}, - C{attributes}, and C{childNodes} are all the same, return C{True}, - otherwise return C{False}. - """ - return ( - self.nodeName.lower() == other.nodeName.lower() and - self.namespace == other.namespace and - self.attributes == other.attributes and - Node.isEqualToNode(self, other)) - - - def cloneNode(self, deep=0, parent=None): - clone = Element( - self.tagName, parentNode=parent, namespace=self.namespace, - preserveCase=self.preserveCase, caseInsensitive=self.caseInsensitive) - clone.attributes.update(self.attributes) - if deep: - clone.childNodes = [child.cloneNode(1, clone) for child in self.childNodes] - else: - clone.childNodes = [] - return clone - - def getElementsByTagName(self, name): - if self.caseInsensitive: - return getElementsByTagNameNoCase(self, name) - return getElementsByTagName(self, name) - - def hasAttributes(self): - return 1 - - def getAttribute(self, name, default=None): - return self.attributes.get(name, default) - - def getAttributeNS(self, ns, name, default=None): - nsk = (ns, name) - if self.attributes.has_key(nsk): - return self.attributes[nsk] - if ns == self.namespace: - return self.attributes.get(name, default) - return default - - def getAttributeNode(self, name): - return _Attr(self.getAttribute(name), self) - - def setAttribute(self, name, attr): - self.attributes[name] = attr - - def removeAttribute(self, name): - if name in self.attributes: - del self.attributes[name] - - def hasAttribute(self, name): - return name in self.attributes - - - def writexml(self, stream, indent='', addindent='', newl='', strip=0, - nsprefixes={}, namespace=''): - """ - Serialize this L{Element} to the given stream. - - @param stream: A file-like object to which this L{Element} will be - written. - - @param nsprefixes: A C{dict} mapping namespace URIs as C{str} to - prefixes as C{str}. This defines the prefixes which are already in - scope in the document at the point at which this L{Element} exists. - This is essentially an implementation detail for namespace support. - Applications should not try to use it. - - @param namespace: The namespace URI as a C{str} which is the default at - the point in the document at which this L{Element} exists. This is - essentially an implementation detail for namespace support. - Applications should not try to use it. - """ - # write beginning - ALLOWSINGLETON = ('img', 'br', 'hr', 'base', 'meta', 'link', 'param', - 'area', 'input', 'col', 'basefont', 'isindex', - 'frame') - BLOCKELEMENTS = ('html', 'head', 'body', 'noscript', 'ins', 'del', - 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'script', - 'ul', 'ol', 'dl', 'pre', 'hr', 'blockquote', - 'address', 'p', 'div', 'fieldset', 'table', 'tr', - 'form', 'object', 'fieldset', 'applet', 'map') - FORMATNICELY = ('tr', 'ul', 'ol', 'head') - - # this should never be necessary unless people start - # changing .tagName on the fly(?) - if not self.preserveCase: - self.endTagName = self.tagName - w = stream.write - if self.nsprefixes: - newprefixes = self.nsprefixes.copy() - for ns in nsprefixes.keys(): - if ns in newprefixes: - del newprefixes[ns] - else: - newprefixes = {} - - begin = ['<'] - if self.tagName in BLOCKELEMENTS: - begin = [newl, indent] + begin - bext = begin.extend - writeattr = lambda _atr, _val: bext((' ', _atr, '="', escape(_val), '"')) - - # Make a local for tracking what end tag will be used. If namespace - # prefixes are involved, this will be changed to account for that - # before it's actually used. - endTagName = self.endTagName - - if namespace != self.namespace and self.namespace is not None: - # If the current default namespace is not the namespace of this tag - # (and this tag has a namespace at all) then we'll write out - # something related to namespaces. - if self.namespace in nsprefixes: - # This tag's namespace already has a prefix bound to it. Use - # that prefix. - prefix = nsprefixes[self.namespace] - bext(prefix + ':' + self.tagName) - # Also make sure we use it for the end tag. - endTagName = prefix + ':' + self.endTagName - else: - # This tag's namespace has no prefix bound to it. Change the - # default namespace to this tag's namespace so we don't need - # prefixes. Alternatively, we could add a new prefix binding. - # I'm not sure why the code was written one way rather than the - # other. -exarkun - bext(self.tagName) - writeattr("xmlns", self.namespace) - # The default namespace just changed. Make sure any children - # know about this. - namespace = self.namespace - else: - # This tag has no namespace or its namespace is already the default - # namespace. Nothing extra to do here. - bext(self.tagName) - - j = ''.join - for attr, val in self.attributes.iteritems(): - if isinstance(attr, tuple): - ns, key = attr - if nsprefixes.has_key(ns): - prefix = nsprefixes[ns] - else: - prefix = genprefix() - newprefixes[ns] = prefix - assert val is not None - writeattr(prefix+':'+key,val) - else: - assert val is not None - writeattr(attr, val) - if newprefixes: - for ns, prefix in newprefixes.iteritems(): - if prefix: - writeattr('xmlns:'+prefix, ns) - newprefixes.update(nsprefixes) - downprefixes = newprefixes - else: - downprefixes = nsprefixes - w(j(begin)) - if self.childNodes: - w(">") - newindent = indent + addindent - for child in self.childNodes: - if self.tagName in BLOCKELEMENTS and \ - self.tagName in FORMATNICELY: - w(j((newl, newindent))) - child.writexml(stream, newindent, addindent, newl, strip, - downprefixes, namespace) - if self.tagName in BLOCKELEMENTS: - w(j((newl, indent))) - w(j(('</', endTagName, '>'))) - elif self.tagName.lower() not in ALLOWSINGLETON: - w(j(('></', endTagName, '>'))) - else: - w(" />") - - - def __repr__(self): - rep = "Element(%s" % repr(self.nodeName) - if self.attributes: - rep += ", attributes=%r" % (self.attributes,) - if self._filename: - rep += ", filename=%r" % (self._filename,) - if self._markpos: - rep += ", markpos=%r" % (self._markpos,) - return rep + ')' - - def __str__(self): - rep = "<" + self.nodeName - if self._filename or self._markpos: - rep += " (" - if self._filename: - rep += repr(self._filename) - if self._markpos: - rep += " line %s column %s" % self._markpos - if self._filename or self._markpos: - rep += ")" - for item in self.attributes.items(): - rep += " %s=%r" % item - if self.hasChildNodes(): - rep += " >...</%s>" % self.nodeName - else: - rep += " />" - return rep - -def _unescapeDict(d): - dd = {} - for k, v in d.items(): - dd[k] = unescape(v) - return dd - -def _reverseDict(d): - dd = {} - for k, v in d.items(): - dd[v]=k - return dd - -class MicroDOMParser(XMLParser): - - # <dash> glyph: a quick scan thru the DTD says BODY, AREA, LINK, IMG, HR, - # P, DT, DD, LI, INPUT, OPTION, THEAD, TFOOT, TBODY, COLGROUP, COL, TR, TH, - # TD, HEAD, BASE, META, HTML all have optional closing tags - - soonClosers = 'area link br img hr input base meta'.split() - laterClosers = {'p': ['p', 'dt'], - 'dt': ['dt','dd'], - 'dd': ['dt', 'dd'], - 'li': ['li'], - 'tbody': ['thead', 'tfoot', 'tbody'], - 'thead': ['thead', 'tfoot', 'tbody'], - 'tfoot': ['thead', 'tfoot', 'tbody'], - 'colgroup': ['colgroup'], - 'col': ['col'], - 'tr': ['tr'], - 'td': ['td'], - 'th': ['th'], - 'head': ['body'], - 'title': ['head', 'body'], # this looks wrong... - 'option': ['option'], - } - - - def __init__(self, beExtremelyLenient=0, caseInsensitive=1, preserveCase=0, - soonClosers=soonClosers, laterClosers=laterClosers): - self.elementstack = [] - d = {'xmlns': 'xmlns', '': None} - dr = _reverseDict(d) - self.nsstack = [(d,None,dr)] - self.documents = [] - self._mddoctype = None - self.beExtremelyLenient = beExtremelyLenient - self.caseInsensitive = caseInsensitive - self.preserveCase = preserveCase or not caseInsensitive - self.soonClosers = soonClosers - self.laterClosers = laterClosers - # self.indentlevel = 0 - - def shouldPreserveSpace(self): - for edx in xrange(len(self.elementstack)): - el = self.elementstack[-edx] - if el.tagName == 'pre' or el.getAttribute("xml:space", '') == 'preserve': - return 1 - return 0 - - def _getparent(self): - if self.elementstack: - return self.elementstack[-1] - else: - return None - - COMMENT = re.compile(r"\s*/[/*]\s*") - - def _fixScriptElement(self, el): - # this deals with case where there is comment or CDATA inside - # <script> tag and we want to do the right thing with it - if not self.beExtremelyLenient or not len(el.childNodes) == 1: - return - c = el.firstChild() - if isinstance(c, Text): - # deal with nasty people who do stuff like: - # <script> // <!-- - # x = 1; - # // --></script> - # tidy does this, for example. - prefix = "" - oldvalue = c.value - match = self.COMMENT.match(oldvalue) - if match: - prefix = match.group() - oldvalue = oldvalue[len(prefix):] - - # now see if contents are actual node and comment or CDATA - try: - e = parseString("<a>%s</a>" % oldvalue).childNodes[0] - except (ParseError, MismatchedTags): - return - if len(e.childNodes) != 1: - return - e = e.firstChild() - if isinstance(e, (CDATASection, Comment)): - el.childNodes = [] - if prefix: - el.childNodes.append(Text(prefix)) - el.childNodes.append(e) - - def gotDoctype(self, doctype): - self._mddoctype = doctype - - def gotTagStart(self, name, attributes): - # print ' '*self.indentlevel, 'start tag',name - # self.indentlevel += 1 - parent = self._getparent() - if (self.beExtremelyLenient and isinstance(parent, Element)): - parentName = parent.tagName - myName = name - if self.caseInsensitive: - parentName = parentName.lower() - myName = myName.lower() - if myName in self.laterClosers.get(parentName, []): - self.gotTagEnd(parent.tagName) - parent = self._getparent() - attributes = _unescapeDict(attributes) - namespaces = self.nsstack[-1][0] - newspaces = {} - for k, v in attributes.items(): - if k.startswith('xmlns'): - spacenames = k.split(':',1) - if len(spacenames) == 2: - newspaces[spacenames[1]] = v - else: - newspaces[''] = v - del attributes[k] - if newspaces: - namespaces = namespaces.copy() - namespaces.update(newspaces) - for k, v in attributes.items(): - ksplit = k.split(':', 1) - if len(ksplit) == 2: - pfx, tv = ksplit - if pfx != 'xml' and pfx in namespaces: - attributes[namespaces[pfx], tv] = v - del attributes[k] - el = Element(name, attributes, parent, - self.filename, self.saveMark(), - caseInsensitive=self.caseInsensitive, - preserveCase=self.preserveCase, - namespace=namespaces.get('')) - revspaces = _reverseDict(newspaces) - el.addPrefixes(revspaces) - - if newspaces: - rscopy = self.nsstack[-1][2].copy() - rscopy.update(revspaces) - self.nsstack.append((namespaces, el, rscopy)) - self.elementstack.append(el) - if parent: - parent.appendChild(el) - if (self.beExtremelyLenient and el.tagName in self.soonClosers): - self.gotTagEnd(name) - - def _gotStandalone(self, factory, data): - parent = self._getparent() - te = factory(data, parent) - if parent: - parent.appendChild(te) - elif self.beExtremelyLenient: - self.documents.append(te) - - def gotText(self, data): - if data.strip() or self.shouldPreserveSpace(): - self._gotStandalone(Text, data) - - def gotComment(self, data): - self._gotStandalone(Comment, data) - - def gotEntityReference(self, entityRef): - self._gotStandalone(EntityReference, entityRef) - - def gotCData(self, cdata): - self._gotStandalone(CDATASection, cdata) - - def gotTagEnd(self, name): - # print ' '*self.indentlevel, 'end tag',name - # self.indentlevel -= 1 - if not self.elementstack: - if self.beExtremelyLenient: - return - raise MismatchedTags(*((self.filename, "NOTHING", name) - +self.saveMark()+(0,0))) - el = self.elementstack.pop() - pfxdix = self.nsstack[-1][2] - if self.nsstack[-1][1] is el: - nstuple = self.nsstack.pop() - else: - nstuple = None - if self.caseInsensitive: - tn = el.tagName.lower() - cname = name.lower() - else: - tn = el.tagName - cname = name - - nsplit = name.split(':',1) - if len(nsplit) == 2: - pfx, newname = nsplit - ns = pfxdix.get(pfx,None) - if ns is not None: - if el.namespace != ns: - if not self.beExtremelyLenient: - raise MismatchedTags(*((self.filename, el.tagName, name) - +self.saveMark()+el._markpos)) - if not (tn == cname): - if self.beExtremelyLenient: - if self.elementstack: - lastEl = self.elementstack[0] - for idx in xrange(len(self.elementstack)): - if self.elementstack[-(idx+1)].tagName == cname: - self.elementstack[-(idx+1)].endTag(name) - break - else: - # this was a garbage close tag; wait for a real one - self.elementstack.append(el) - if nstuple is not None: - self.nsstack.append(nstuple) - return - del self.elementstack[-(idx+1):] - if not self.elementstack: - self.documents.append(lastEl) - return - else: - raise MismatchedTags(*((self.filename, el.tagName, name) - +self.saveMark()+el._markpos)) - el.endTag(name) - if not self.elementstack: - self.documents.append(el) - if self.beExtremelyLenient and el.tagName == "script": - self._fixScriptElement(el) - - def connectionLost(self, reason): - XMLParser.connectionLost(self, reason) # This can cause more events! - if self.elementstack: - if self.beExtremelyLenient: - self.documents.append(self.elementstack[0]) - else: - raise MismatchedTags(*((self.filename, self.elementstack[-1], - "END_OF_FILE") - +self.saveMark() - +self.elementstack[-1]._markpos)) - - -def parse(readable, *args, **kwargs): - """Parse HTML or XML readable.""" - if not hasattr(readable, "read"): - readable = open(readable, "rb") - mdp = MicroDOMParser(*args, **kwargs) - mdp.filename = getattr(readable, "name", "<xmlfile />") - mdp.makeConnection(None) - if hasattr(readable,"getvalue"): - mdp.dataReceived(readable.getvalue()) - else: - r = readable.read(1024) - while r: - mdp.dataReceived(r) - r = readable.read(1024) - mdp.connectionLost(None) - - if not mdp.documents: - raise ParseError(mdp.filename, 0, 0, "No top-level Nodes in document") - - if mdp.beExtremelyLenient: - if len(mdp.documents) == 1: - d = mdp.documents[0] - if not isinstance(d, Element): - el = Element("html") - el.appendChild(d) - d = el - else: - d = Element("html") - for child in mdp.documents: - d.appendChild(child) - else: - d = mdp.documents[0] - doc = Document(d) - doc.doctype = mdp._mddoctype - return doc - -def parseString(st, *args, **kw): - if isinstance(st, UnicodeType): - # this isn't particularly ideal, but it does work. - return parse(StringIO(st.encode('UTF-16')), *args, **kw) - return parse(StringIO(st), *args, **kw) - - -def parseXML(readable): - """Parse an XML readable object.""" - return parse(readable, caseInsensitive=0, preserveCase=1) - - -def parseXMLString(st): - """Parse an XML readable object.""" - return parseString(st, caseInsensitive=0, preserveCase=1) - - -# Utility - -class lmx: - """Easy creation of XML.""" - - def __init__(self, node='div'): - if isinstance(node, StringTypes): - node = Element(node) - self.node = node - - def __getattr__(self, name): - if name[0] == '_': - raise AttributeError("no private attrs") - return lambda **kw: self.add(name,**kw) - - def __setitem__(self, key, val): - self.node.setAttribute(key, val) - - def __getitem__(self, key): - return self.node.getAttribute(key) - - def text(self, txt, raw=0): - nn = Text(txt, raw=raw) - self.node.appendChild(nn) - return self - - def add(self, tagName, **kw): - newNode = Element(tagName, caseInsensitive=0, preserveCase=0) - self.node.appendChild(newNode) - xf = lmx(newNode) - for k, v in kw.items(): - if k[0] == '_': - k = k[1:] - xf[k]=v - return xf |