ยปCore Development>Code coverage>Lib/xml/etree/ElementTree.py

Python code coverage for Lib/xml/etree/ElementTree.py

#countcontent
1n/a"""Lightweight XML support for Python.
2n/a
3n/a XML is an inherently hierarchical data format, and the most natural way to
4n/a represent it is with a tree. This module has two classes for this purpose:
5n/a
6n/a 1. ElementTree represents the whole XML document as a tree and
7n/a
8n/a 2. Element represents a single node in this tree.
9n/a
10n/a Interactions with the whole document (reading and writing to/from files) are
11n/a usually done on the ElementTree level. Interactions with a single XML element
12n/a and its sub-elements are done on the Element level.
13n/a
14n/a Element is a flexible container object designed to store hierarchical data
15n/a structures in memory. It can be described as a cross between a list and a
16n/a dictionary. Each Element has a number of properties associated with it:
17n/a
18n/a 'tag' - a string containing the element's name.
19n/a
20n/a 'attributes' - a Python dictionary storing the element's attributes.
21n/a
22n/a 'text' - a string containing the element's text content.
23n/a
24n/a 'tail' - an optional string containing text after the element's end tag.
25n/a
26n/a And a number of child elements stored in a Python sequence.
27n/a
28n/a To create an element instance, use the Element constructor,
29n/a or the SubElement factory function.
30n/a
31n/a You can also use the ElementTree class to wrap an element structure
32n/a and convert it to and from XML.
33n/a
34n/a"""
35n/a
36n/a#---------------------------------------------------------------------
37n/a# Licensed to PSF under a Contributor Agreement.
38n/a# See http://www.python.org/psf/license for licensing details.
39n/a#
40n/a# ElementTree
41n/a# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
42n/a#
43n/a# fredrik@pythonware.com
44n/a# http://www.pythonware.com
45n/a# --------------------------------------------------------------------
46n/a# The ElementTree toolkit is
47n/a#
48n/a# Copyright (c) 1999-2008 by Fredrik Lundh
49n/a#
50n/a# By obtaining, using, and/or copying this software and/or its
51n/a# associated documentation, you agree that you have read, understood,
52n/a# and will comply with the following terms and conditions:
53n/a#
54n/a# Permission to use, copy, modify, and distribute this software and
55n/a# its associated documentation for any purpose and without fee is
56n/a# hereby granted, provided that the above copyright notice appears in
57n/a# all copies, and that both that copyright notice and this permission
58n/a# notice appear in supporting documentation, and that the name of
59n/a# Secret Labs AB or the author not be used in advertising or publicity
60n/a# pertaining to distribution of the software without specific, written
61n/a# prior permission.
62n/a#
63n/a# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
64n/a# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
65n/a# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
66n/a# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
67n/a# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
68n/a# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
69n/a# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
70n/a# OF THIS SOFTWARE.
71n/a# --------------------------------------------------------------------
72n/a
73n/a__all__ = [
74n/a # public symbols
75n/a "Comment",
76n/a "dump",
77n/a "Element", "ElementTree",
78n/a "fromstring", "fromstringlist",
79n/a "iselement", "iterparse",
80n/a "parse", "ParseError",
81n/a "PI", "ProcessingInstruction",
82n/a "QName",
83n/a "SubElement",
84n/a "tostring", "tostringlist",
85n/a "TreeBuilder",
86n/a "VERSION",
87n/a "XML", "XMLID",
88n/a "XMLParser", "XMLPullParser",
89n/a "register_namespace",
90n/a ]
91n/a
92n/aVERSION = "1.3.0"
93n/a
94n/aimport sys
95n/aimport re
96n/aimport warnings
97n/aimport io
98n/aimport collections
99n/aimport contextlib
100n/a
101n/afrom . import ElementPath
102n/a
103n/a
104n/aclass ParseError(SyntaxError):
105n/a """An error when parsing an XML document.
106n/a
107n/a In addition to its exception value, a ParseError contains
108n/a two extra attributes:
109n/a 'code' - the specific exception code
110n/a 'position' - the line and column of the error
111n/a
112n/a """
113n/a pass
114n/a
115n/a# --------------------------------------------------------------------
116n/a
117n/a
118n/adef iselement(element):
119n/a """Return True if *element* appears to be an Element."""
120n/a return hasattr(element, 'tag')
121n/a
122n/a
123n/aclass Element:
124n/a """An XML element.
125n/a
126n/a This class is the reference implementation of the Element interface.
127n/a
128n/a An element's length is its number of subelements. That means if you
129n/a want to check if an element is truly empty, you should check BOTH
130n/a its length AND its text attribute.
131n/a
132n/a The element tag, attribute names, and attribute values can be either
133n/a bytes or strings.
134n/a
135n/a *tag* is the element name. *attrib* is an optional dictionary containing
136n/a element attributes. *extra* are additional element attributes given as
137n/a keyword arguments.
138n/a
139n/a Example form:
140n/a <tag attrib>text<child/>...</tag>tail
141n/a
142n/a """
143n/a
144n/a tag = None
145n/a """The element's name."""
146n/a
147n/a attrib = None
148n/a """Dictionary of the element's attributes."""
149n/a
150n/a text = None
151n/a """
152n/a Text before first subelement. This is either a string or the value None.
153n/a Note that if there is no text, this attribute may be either
154n/a None or the empty string, depending on the parser.
155n/a
156n/a """
157n/a
158n/a tail = None
159n/a """
160n/a Text after this element's end tag, but before the next sibling element's
161n/a start tag. This is either a string or the value None. Note that if there
162n/a was no text, this attribute may be either None or an empty string,
163n/a depending on the parser.
164n/a
165n/a """
166n/a
167n/a def __init__(self, tag, attrib={}, **extra):
168n/a if not isinstance(attrib, dict):
169n/a raise TypeError("attrib must be dict, not %s" % (
170n/a attrib.__class__.__name__,))
171n/a attrib = attrib.copy()
172n/a attrib.update(extra)
173n/a self.tag = tag
174n/a self.attrib = attrib
175n/a self._children = []
176n/a
177n/a def __repr__(self):
178n/a return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
179n/a
180n/a def makeelement(self, tag, attrib):
181n/a """Create a new element with the same type.
182n/a
183n/a *tag* is a string containing the element name.
184n/a *attrib* is a dictionary containing the element attributes.
185n/a
186n/a Do not call this method, use the SubElement factory function instead.
187n/a
188n/a """
189n/a return self.__class__(tag, attrib)
190n/a
191n/a def copy(self):
192n/a """Return copy of current element.
193n/a
194n/a This creates a shallow copy. Subelements will be shared with the
195n/a original tree.
196n/a
197n/a """
198n/a elem = self.makeelement(self.tag, self.attrib)
199n/a elem.text = self.text
200n/a elem.tail = self.tail
201n/a elem[:] = self
202n/a return elem
203n/a
204n/a def __len__(self):
205n/a return len(self._children)
206n/a
207n/a def __bool__(self):
208n/a warnings.warn(
209n/a "The behavior of this method will change in future versions. "
210n/a "Use specific 'len(elem)' or 'elem is not None' test instead.",
211n/a FutureWarning, stacklevel=2
212n/a )
213n/a return len(self._children) != 0 # emulate old behaviour, for now
214n/a
215n/a def __getitem__(self, index):
216n/a return self._children[index]
217n/a
218n/a def __setitem__(self, index, element):
219n/a # if isinstance(index, slice):
220n/a # for elt in element:
221n/a # assert iselement(elt)
222n/a # else:
223n/a # assert iselement(element)
224n/a self._children[index] = element
225n/a
226n/a def __delitem__(self, index):
227n/a del self._children[index]
228n/a
229n/a def append(self, subelement):
230n/a """Add *subelement* to the end of this element.
231n/a
232n/a The new element will appear in document order after the last existing
233n/a subelement (or directly after the text, if it's the first subelement),
234n/a but before the end tag for this element.
235n/a
236n/a """
237n/a self._assert_is_element(subelement)
238n/a self._children.append(subelement)
239n/a
240n/a def extend(self, elements):
241n/a """Append subelements from a sequence.
242n/a
243n/a *elements* is a sequence with zero or more elements.
244n/a
245n/a """
246n/a for element in elements:
247n/a self._assert_is_element(element)
248n/a self._children.extend(elements)
249n/a
250n/a def insert(self, index, subelement):
251n/a """Insert *subelement* at position *index*."""
252n/a self._assert_is_element(subelement)
253n/a self._children.insert(index, subelement)
254n/a
255n/a def _assert_is_element(self, e):
256n/a # Need to refer to the actual Python implementation, not the
257n/a # shadowing C implementation.
258n/a if not isinstance(e, _Element_Py):
259n/a raise TypeError('expected an Element, not %s' % type(e).__name__)
260n/a
261n/a def remove(self, subelement):
262n/a """Remove matching subelement.
263n/a
264n/a Unlike the find methods, this method compares elements based on
265n/a identity, NOT ON tag value or contents. To remove subelements by
266n/a other means, the easiest way is to use a list comprehension to
267n/a select what elements to keep, and then use slice assignment to update
268n/a the parent element.
269n/a
270n/a ValueError is raised if a matching element could not be found.
271n/a
272n/a """
273n/a # assert iselement(element)
274n/a self._children.remove(subelement)
275n/a
276n/a def getchildren(self):
277n/a """(Deprecated) Return all subelements.
278n/a
279n/a Elements are returned in document order.
280n/a
281n/a """
282n/a warnings.warn(
283n/a "This method will be removed in future versions. "
284n/a "Use 'list(elem)' or iteration over elem instead.",
285n/a DeprecationWarning, stacklevel=2
286n/a )
287n/a return self._children
288n/a
289n/a def find(self, path, namespaces=None):
290n/a """Find first matching element by tag name or path.
291n/a
292n/a *path* is a string having either an element tag or an XPath,
293n/a *namespaces* is an optional mapping from namespace prefix to full name.
294n/a
295n/a Return the first matching element, or None if no element was found.
296n/a
297n/a """
298n/a return ElementPath.find(self, path, namespaces)
299n/a
300n/a def findtext(self, path, default=None, namespaces=None):
301n/a """Find text for first matching element by tag name or path.
302n/a
303n/a *path* is a string having either an element tag or an XPath,
304n/a *default* is the value to return if the element was not found,
305n/a *namespaces* is an optional mapping from namespace prefix to full name.
306n/a
307n/a Return text content of first matching element, or default value if
308n/a none was found. Note that if an element is found having no text
309n/a content, the empty string is returned.
310n/a
311n/a """
312n/a return ElementPath.findtext(self, path, default, namespaces)
313n/a
314n/a def findall(self, path, namespaces=None):
315n/a """Find all matching subelements by tag name or path.
316n/a
317n/a *path* is a string having either an element tag or an XPath,
318n/a *namespaces* is an optional mapping from namespace prefix to full name.
319n/a
320n/a Returns list containing all matching elements in document order.
321n/a
322n/a """
323n/a return ElementPath.findall(self, path, namespaces)
324n/a
325n/a def iterfind(self, path, namespaces=None):
326n/a """Find all matching subelements by tag name or path.
327n/a
328n/a *path* is a string having either an element tag or an XPath,
329n/a *namespaces* is an optional mapping from namespace prefix to full name.
330n/a
331n/a Return an iterable yielding all matching elements in document order.
332n/a
333n/a """
334n/a return ElementPath.iterfind(self, path, namespaces)
335n/a
336n/a def clear(self):
337n/a """Reset element.
338n/a
339n/a This function removes all subelements, clears all attributes, and sets
340n/a the text and tail attributes to None.
341n/a
342n/a """
343n/a self.attrib.clear()
344n/a self._children = []
345n/a self.text = self.tail = None
346n/a
347n/a def get(self, key, default=None):
348n/a """Get element attribute.
349n/a
350n/a Equivalent to attrib.get, but some implementations may handle this a
351n/a bit more efficiently. *key* is what attribute to look for, and
352n/a *default* is what to return if the attribute was not found.
353n/a
354n/a Returns a string containing the attribute value, or the default if
355n/a attribute was not found.
356n/a
357n/a """
358n/a return self.attrib.get(key, default)
359n/a
360n/a def set(self, key, value):
361n/a """Set element attribute.
362n/a
363n/a Equivalent to attrib[key] = value, but some implementations may handle
364n/a this a bit more efficiently. *key* is what attribute to set, and
365n/a *value* is the attribute value to set it to.
366n/a
367n/a """
368n/a self.attrib[key] = value
369n/a
370n/a def keys(self):
371n/a """Get list of attribute names.
372n/a
373n/a Names are returned in an arbitrary order, just like an ordinary
374n/a Python dict. Equivalent to attrib.keys()
375n/a
376n/a """
377n/a return self.attrib.keys()
378n/a
379n/a def items(self):
380n/a """Get element attributes as a sequence.
381n/a
382n/a The attributes are returned in arbitrary order. Equivalent to
383n/a attrib.items().
384n/a
385n/a Return a list of (name, value) tuples.
386n/a
387n/a """
388n/a return self.attrib.items()
389n/a
390n/a def iter(self, tag=None):
391n/a """Create tree iterator.
392n/a
393n/a The iterator loops over the element and all subelements in document
394n/a order, returning all elements with a matching tag.
395n/a
396n/a If the tree structure is modified during iteration, new or removed
397n/a elements may or may not be included. To get a stable set, use the
398n/a list() function on the iterator, and loop over the resulting list.
399n/a
400n/a *tag* is what tags to look for (default is to return all elements)
401n/a
402n/a Return an iterator containing all the matching elements.
403n/a
404n/a """
405n/a if tag == "*":
406n/a tag = None
407n/a if tag is None or self.tag == tag:
408n/a yield self
409n/a for e in self._children:
410n/a yield from e.iter(tag)
411n/a
412n/a # compatibility
413n/a def getiterator(self, tag=None):
414n/a # Change for a DeprecationWarning in 1.4
415n/a warnings.warn(
416n/a "This method will be removed in future versions. "
417n/a "Use 'elem.iter()' or 'list(elem.iter())' instead.",
418n/a PendingDeprecationWarning, stacklevel=2
419n/a )
420n/a return list(self.iter(tag))
421n/a
422n/a def itertext(self):
423n/a """Create text iterator.
424n/a
425n/a The iterator loops over the element and all subelements in document
426n/a order, returning all inner text.
427n/a
428n/a """
429n/a tag = self.tag
430n/a if not isinstance(tag, str) and tag is not None:
431n/a return
432n/a t = self.text
433n/a if t:
434n/a yield t
435n/a for e in self:
436n/a yield from e.itertext()
437n/a t = e.tail
438n/a if t:
439n/a yield t
440n/a
441n/a
442n/adef SubElement(parent, tag, attrib={}, **extra):
443n/a """Subelement factory which creates an element instance, and appends it
444n/a to an existing parent.
445n/a
446n/a The element tag, attribute names, and attribute values can be either
447n/a bytes or Unicode strings.
448n/a
449n/a *parent* is the parent element, *tag* is the subelements name, *attrib* is
450n/a an optional directory containing element attributes, *extra* are
451n/a additional attributes given as keyword arguments.
452n/a
453n/a """
454n/a attrib = attrib.copy()
455n/a attrib.update(extra)
456n/a element = parent.makeelement(tag, attrib)
457n/a parent.append(element)
458n/a return element
459n/a
460n/a
461n/adef Comment(text=None):
462n/a """Comment element factory.
463n/a
464n/a This function creates a special element which the standard serializer
465n/a serializes as an XML comment.
466n/a
467n/a *text* is a string containing the comment string.
468n/a
469n/a """
470n/a element = Element(Comment)
471n/a element.text = text
472n/a return element
473n/a
474n/a
475n/adef ProcessingInstruction(target, text=None):
476n/a """Processing Instruction element factory.
477n/a
478n/a This function creates a special element which the standard serializer
479n/a serializes as an XML comment.
480n/a
481n/a *target* is a string containing the processing instruction, *text* is a
482n/a string containing the processing instruction contents, if any.
483n/a
484n/a """
485n/a element = Element(ProcessingInstruction)
486n/a element.text = target
487n/a if text:
488n/a element.text = element.text + " " + text
489n/a return element
490n/a
491n/aPI = ProcessingInstruction
492n/a
493n/a
494n/aclass QName:
495n/a """Qualified name wrapper.
496n/a
497n/a This class can be used to wrap a QName attribute value in order to get
498n/a proper namespace handing on output.
499n/a
500n/a *text_or_uri* is a string containing the QName value either in the form
501n/a {uri}local, or if the tag argument is given, the URI part of a QName.
502n/a
503n/a *tag* is an optional argument which if given, will make the first
504n/a argument (text_or_uri) be interpreted as a URI, and this argument (tag)
505n/a be interpreted as a local name.
506n/a
507n/a """
508n/a def __init__(self, text_or_uri, tag=None):
509n/a if tag:
510n/a text_or_uri = "{%s}%s" % (text_or_uri, tag)
511n/a self.text = text_or_uri
512n/a def __str__(self):
513n/a return self.text
514n/a def __repr__(self):
515n/a return '<%s %r>' % (self.__class__.__name__, self.text)
516n/a def __hash__(self):
517n/a return hash(self.text)
518n/a def __le__(self, other):
519n/a if isinstance(other, QName):
520n/a return self.text <= other.text
521n/a return self.text <= other
522n/a def __lt__(self, other):
523n/a if isinstance(other, QName):
524n/a return self.text < other.text
525n/a return self.text < other
526n/a def __ge__(self, other):
527n/a if isinstance(other, QName):
528n/a return self.text >= other.text
529n/a return self.text >= other
530n/a def __gt__(self, other):
531n/a if isinstance(other, QName):
532n/a return self.text > other.text
533n/a return self.text > other
534n/a def __eq__(self, other):
535n/a if isinstance(other, QName):
536n/a return self.text == other.text
537n/a return self.text == other
538n/a
539n/a# --------------------------------------------------------------------
540n/a
541n/a
542n/aclass ElementTree:
543n/a """An XML element hierarchy.
544n/a
545n/a This class also provides support for serialization to and from
546n/a standard XML.
547n/a
548n/a *element* is an optional root element node,
549n/a *file* is an optional file handle or file name of an XML file whose
550n/a contents will be used to initialize the tree with.
551n/a
552n/a """
553n/a def __init__(self, element=None, file=None):
554n/a # assert element is None or iselement(element)
555n/a self._root = element # first node
556n/a if file:
557n/a self.parse(file)
558n/a
559n/a def getroot(self):
560n/a """Return root element of this tree."""
561n/a return self._root
562n/a
563n/a def _setroot(self, element):
564n/a """Replace root element of this tree.
565n/a
566n/a This will discard the current contents of the tree and replace it
567n/a with the given element. Use with care!
568n/a
569n/a """
570n/a # assert iselement(element)
571n/a self._root = element
572n/a
573n/a def parse(self, source, parser=None):
574n/a """Load external XML document into element tree.
575n/a
576n/a *source* is a file name or file object, *parser* is an optional parser
577n/a instance that defaults to XMLParser.
578n/a
579n/a ParseError is raised if the parser fails to parse the document.
580n/a
581n/a Returns the root element of the given source document.
582n/a
583n/a """
584n/a close_source = False
585n/a if not hasattr(source, "read"):
586n/a source = open(source, "rb")
587n/a close_source = True
588n/a try:
589n/a if parser is None:
590n/a # If no parser was specified, create a default XMLParser
591n/a parser = XMLParser()
592n/a if hasattr(parser, '_parse_whole'):
593n/a # The default XMLParser, when it comes from an accelerator,
594n/a # can define an internal _parse_whole API for efficiency.
595n/a # It can be used to parse the whole source without feeding
596n/a # it with chunks.
597n/a self._root = parser._parse_whole(source)
598n/a return self._root
599n/a while True:
600n/a data = source.read(65536)
601n/a if not data:
602n/a break
603n/a parser.feed(data)
604n/a self._root = parser.close()
605n/a return self._root
606n/a finally:
607n/a if close_source:
608n/a source.close()
609n/a
610n/a def iter(self, tag=None):
611n/a """Create and return tree iterator for the root element.
612n/a
613n/a The iterator loops over all elements in this tree, in document order.
614n/a
615n/a *tag* is a string with the tag name to iterate over
616n/a (default is to return all elements).
617n/a
618n/a """
619n/a # assert self._root is not None
620n/a return self._root.iter(tag)
621n/a
622n/a # compatibility
623n/a def getiterator(self, tag=None):
624n/a # Change for a DeprecationWarning in 1.4
625n/a warnings.warn(
626n/a "This method will be removed in future versions. "
627n/a "Use 'tree.iter()' or 'list(tree.iter())' instead.",
628n/a PendingDeprecationWarning, stacklevel=2
629n/a )
630n/a return list(self.iter(tag))
631n/a
632n/a def find(self, path, namespaces=None):
633n/a """Find first matching element by tag name or path.
634n/a
635n/a Same as getroot().find(path), which is Element.find()
636n/a
637n/a *path* is a string having either an element tag or an XPath,
638n/a *namespaces* is an optional mapping from namespace prefix to full name.
639n/a
640n/a Return the first matching element, or None if no element was found.
641n/a
642n/a """
643n/a # assert self._root is not None
644n/a if path[:1] == "/":
645n/a path = "." + path
646n/a warnings.warn(
647n/a "This search is broken in 1.3 and earlier, and will be "
648n/a "fixed in a future version. If you rely on the current "
649n/a "behaviour, change it to %r" % path,
650n/a FutureWarning, stacklevel=2
651n/a )
652n/a return self._root.find(path, namespaces)
653n/a
654n/a def findtext(self, path, default=None, namespaces=None):
655n/a """Find first matching element by tag name or path.
656n/a
657n/a Same as getroot().findtext(path), which is Element.findtext()
658n/a
659n/a *path* is a string having either an element tag or an XPath,
660n/a *namespaces* is an optional mapping from namespace prefix to full name.
661n/a
662n/a Return the first matching element, or None if no element was found.
663n/a
664n/a """
665n/a # assert self._root is not None
666n/a if path[:1] == "/":
667n/a path = "." + path
668n/a warnings.warn(
669n/a "This search is broken in 1.3 and earlier, and will be "
670n/a "fixed in a future version. If you rely on the current "
671n/a "behaviour, change it to %r" % path,
672n/a FutureWarning, stacklevel=2
673n/a )
674n/a return self._root.findtext(path, default, namespaces)
675n/a
676n/a def findall(self, path, namespaces=None):
677n/a """Find all matching subelements by tag name or path.
678n/a
679n/a Same as getroot().findall(path), which is Element.findall().
680n/a
681n/a *path* is a string having either an element tag or an XPath,
682n/a *namespaces* is an optional mapping from namespace prefix to full name.
683n/a
684n/a Return list containing all matching elements in document order.
685n/a
686n/a """
687n/a # assert self._root is not None
688n/a if path[:1] == "/":
689n/a path = "." + path
690n/a warnings.warn(
691n/a "This search is broken in 1.3 and earlier, and will be "
692n/a "fixed in a future version. If you rely on the current "
693n/a "behaviour, change it to %r" % path,
694n/a FutureWarning, stacklevel=2
695n/a )
696n/a return self._root.findall(path, namespaces)
697n/a
698n/a def iterfind(self, path, namespaces=None):
699n/a """Find all matching subelements by tag name or path.
700n/a
701n/a Same as getroot().iterfind(path), which is element.iterfind()
702n/a
703n/a *path* is a string having either an element tag or an XPath,
704n/a *namespaces* is an optional mapping from namespace prefix to full name.
705n/a
706n/a Return an iterable yielding all matching elements in document order.
707n/a
708n/a """
709n/a # assert self._root is not None
710n/a if path[:1] == "/":
711n/a path = "." + path
712n/a warnings.warn(
713n/a "This search is broken in 1.3 and earlier, and will be "
714n/a "fixed in a future version. If you rely on the current "
715n/a "behaviour, change it to %r" % path,
716n/a FutureWarning, stacklevel=2
717n/a )
718n/a return self._root.iterfind(path, namespaces)
719n/a
720n/a def write(self, file_or_filename,
721n/a encoding=None,
722n/a xml_declaration=None,
723n/a default_namespace=None,
724n/a method=None, *,
725n/a short_empty_elements=True):
726n/a """Write element tree to a file as XML.
727n/a
728n/a Arguments:
729n/a *file_or_filename* -- file name or a file object opened for writing
730n/a
731n/a *encoding* -- the output encoding (default: US-ASCII)
732n/a
733n/a *xml_declaration* -- bool indicating if an XML declaration should be
734n/a added to the output. If None, an XML declaration
735n/a is added if encoding IS NOT either of:
736n/a US-ASCII, UTF-8, or Unicode
737n/a
738n/a *default_namespace* -- sets the default XML namespace (for "xmlns")
739n/a
740n/a *method* -- either "xml" (default), "html, "text", or "c14n"
741n/a
742n/a *short_empty_elements* -- controls the formatting of elements
743n/a that contain no content. If True (default)
744n/a they are emitted as a single self-closed
745n/a tag, otherwise they are emitted as a pair
746n/a of start/end tags
747n/a
748n/a """
749n/a if not method:
750n/a method = "xml"
751n/a elif method not in _serialize:
752n/a raise ValueError("unknown method %r" % method)
753n/a if not encoding:
754n/a if method == "c14n":
755n/a encoding = "utf-8"
756n/a else:
757n/a encoding = "us-ascii"
758n/a enc_lower = encoding.lower()
759n/a with _get_writer(file_or_filename, enc_lower) as write:
760n/a if method == "xml" and (xml_declaration or
761n/a (xml_declaration is None and
762n/a enc_lower not in ("utf-8", "us-ascii", "unicode"))):
763n/a declared_encoding = encoding
764n/a if enc_lower == "unicode":
765n/a # Retrieve the default encoding for the xml declaration
766n/a import locale
767n/a declared_encoding = locale.getpreferredencoding()
768n/a write("<?xml version='1.0' encoding='%s'?>\n" % (
769n/a declared_encoding,))
770n/a if method == "text":
771n/a _serialize_text(write, self._root)
772n/a else:
773n/a qnames, namespaces = _namespaces(self._root, default_namespace)
774n/a serialize = _serialize[method]
775n/a serialize(write, self._root, qnames, namespaces,
776n/a short_empty_elements=short_empty_elements)
777n/a
778n/a def write_c14n(self, file):
779n/a # lxml.etree compatibility. use output method instead
780n/a return self.write(file, method="c14n")
781n/a
782n/a# --------------------------------------------------------------------
783n/a# serialization support
784n/a
785n/a@contextlib.contextmanager
786n/adef _get_writer(file_or_filename, encoding):
787n/a # returns text write method and release all resources after using
788n/a try:
789n/a write = file_or_filename.write
790n/a except AttributeError:
791n/a # file_or_filename is a file name
792n/a if encoding == "unicode":
793n/a file = open(file_or_filename, "w")
794n/a else:
795n/a file = open(file_or_filename, "w", encoding=encoding,
796n/a errors="xmlcharrefreplace")
797n/a with file:
798n/a yield file.write
799n/a else:
800n/a # file_or_filename is a file-like object
801n/a # encoding determines if it is a text or binary writer
802n/a if encoding == "unicode":
803n/a # use a text writer as is
804n/a yield write
805n/a else:
806n/a # wrap a binary writer with TextIOWrapper
807n/a with contextlib.ExitStack() as stack:
808n/a if isinstance(file_or_filename, io.BufferedIOBase):
809n/a file = file_or_filename
810n/a elif isinstance(file_or_filename, io.RawIOBase):
811n/a file = io.BufferedWriter(file_or_filename)
812n/a # Keep the original file open when the BufferedWriter is
813n/a # destroyed
814n/a stack.callback(file.detach)
815n/a else:
816n/a # This is to handle passed objects that aren't in the
817n/a # IOBase hierarchy, but just have a write method
818n/a file = io.BufferedIOBase()
819n/a file.writable = lambda: True
820n/a file.write = write
821n/a try:
822n/a # TextIOWrapper uses this methods to determine
823n/a # if BOM (for UTF-16, etc) should be added
824n/a file.seekable = file_or_filename.seekable
825n/a file.tell = file_or_filename.tell
826n/a except AttributeError:
827n/a pass
828n/a file = io.TextIOWrapper(file,
829n/a encoding=encoding,
830n/a errors="xmlcharrefreplace",
831n/a newline="\n")
832n/a # Keep the original file open when the TextIOWrapper is
833n/a # destroyed
834n/a stack.callback(file.detach)
835n/a yield file.write
836n/a
837n/adef _namespaces(elem, default_namespace=None):
838n/a # identify namespaces used in this tree
839n/a
840n/a # maps qnames to *encoded* prefix:local names
841n/a qnames = {None: None}
842n/a
843n/a # maps uri:s to prefixes
844n/a namespaces = {}
845n/a if default_namespace:
846n/a namespaces[default_namespace] = ""
847n/a
848n/a def add_qname(qname):
849n/a # calculate serialized qname representation
850n/a try:
851n/a if qname[:1] == "{":
852n/a uri, tag = qname[1:].rsplit("}", 1)
853n/a prefix = namespaces.get(uri)
854n/a if prefix is None:
855n/a prefix = _namespace_map.get(uri)
856n/a if prefix is None:
857n/a prefix = "ns%d" % len(namespaces)
858n/a if prefix != "xml":
859n/a namespaces[uri] = prefix
860n/a if prefix:
861n/a qnames[qname] = "%s:%s" % (prefix, tag)
862n/a else:
863n/a qnames[qname] = tag # default element
864n/a else:
865n/a if default_namespace:
866n/a # FIXME: can this be handled in XML 1.0?
867n/a raise ValueError(
868n/a "cannot use non-qualified names with "
869n/a "default_namespace option"
870n/a )
871n/a qnames[qname] = qname
872n/a except TypeError:
873n/a _raise_serialization_error(qname)
874n/a
875n/a # populate qname and namespaces table
876n/a for elem in elem.iter():
877n/a tag = elem.tag
878n/a if isinstance(tag, QName):
879n/a if tag.text not in qnames:
880n/a add_qname(tag.text)
881n/a elif isinstance(tag, str):
882n/a if tag not in qnames:
883n/a add_qname(tag)
884n/a elif tag is not None and tag is not Comment and tag is not PI:
885n/a _raise_serialization_error(tag)
886n/a for key, value in elem.items():
887n/a if isinstance(key, QName):
888n/a key = key.text
889n/a if key not in qnames:
890n/a add_qname(key)
891n/a if isinstance(value, QName) and value.text not in qnames:
892n/a add_qname(value.text)
893n/a text = elem.text
894n/a if isinstance(text, QName) and text.text not in qnames:
895n/a add_qname(text.text)
896n/a return qnames, namespaces
897n/a
898n/adef _serialize_xml(write, elem, qnames, namespaces,
899n/a short_empty_elements, **kwargs):
900n/a tag = elem.tag
901n/a text = elem.text
902n/a if tag is Comment:
903n/a write("<!--%s-->" % text)
904n/a elif tag is ProcessingInstruction:
905n/a write("<?%s?>" % text)
906n/a else:
907n/a tag = qnames[tag]
908n/a if tag is None:
909n/a if text:
910n/a write(_escape_cdata(text))
911n/a for e in elem:
912n/a _serialize_xml(write, e, qnames, None,
913n/a short_empty_elements=short_empty_elements)
914n/a else:
915n/a write("<" + tag)
916n/a items = list(elem.items())
917n/a if items or namespaces:
918n/a if namespaces:
919n/a for v, k in sorted(namespaces.items(),
920n/a key=lambda x: x[1]): # sort on prefix
921n/a if k:
922n/a k = ":" + k
923n/a write(" xmlns%s=\"%s\"" % (
924n/a k,
925n/a _escape_attrib(v)
926n/a ))
927n/a for k, v in sorted(items): # lexical order
928n/a if isinstance(k, QName):
929n/a k = k.text
930n/a if isinstance(v, QName):
931n/a v = qnames[v.text]
932n/a else:
933n/a v = _escape_attrib(v)
934n/a write(" %s=\"%s\"" % (qnames[k], v))
935n/a if text or len(elem) or not short_empty_elements:
936n/a write(">")
937n/a if text:
938n/a write(_escape_cdata(text))
939n/a for e in elem:
940n/a _serialize_xml(write, e, qnames, None,
941n/a short_empty_elements=short_empty_elements)
942n/a write("</" + tag + ">")
943n/a else:
944n/a write(" />")
945n/a if elem.tail:
946n/a write(_escape_cdata(elem.tail))
947n/a
948n/aHTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
949n/a "img", "input", "isindex", "link", "meta", "param")
950n/a
951n/atry:
952n/a HTML_EMPTY = set(HTML_EMPTY)
953n/aexcept NameError:
954n/a pass
955n/a
956n/adef _serialize_html(write, elem, qnames, namespaces, **kwargs):
957n/a tag = elem.tag
958n/a text = elem.text
959n/a if tag is Comment:
960n/a write("<!--%s-->" % _escape_cdata(text))
961n/a elif tag is ProcessingInstruction:
962n/a write("<?%s?>" % _escape_cdata(text))
963n/a else:
964n/a tag = qnames[tag]
965n/a if tag is None:
966n/a if text:
967n/a write(_escape_cdata(text))
968n/a for e in elem:
969n/a _serialize_html(write, e, qnames, None)
970n/a else:
971n/a write("<" + tag)
972n/a items = list(elem.items())
973n/a if items or namespaces:
974n/a if namespaces:
975n/a for v, k in sorted(namespaces.items(),
976n/a key=lambda x: x[1]): # sort on prefix
977n/a if k:
978n/a k = ":" + k
979n/a write(" xmlns%s=\"%s\"" % (
980n/a k,
981n/a _escape_attrib(v)
982n/a ))
983n/a for k, v in sorted(items): # lexical order
984n/a if isinstance(k, QName):
985n/a k = k.text
986n/a if isinstance(v, QName):
987n/a v = qnames[v.text]
988n/a else:
989n/a v = _escape_attrib_html(v)
990n/a # FIXME: handle boolean attributes
991n/a write(" %s=\"%s\"" % (qnames[k], v))
992n/a write(">")
993n/a ltag = tag.lower()
994n/a if text:
995n/a if ltag == "script" or ltag == "style":
996n/a write(text)
997n/a else:
998n/a write(_escape_cdata(text))
999n/a for e in elem:
1000n/a _serialize_html(write, e, qnames, None)
1001n/a if ltag not in HTML_EMPTY:
1002n/a write("</" + tag + ">")
1003n/a if elem.tail:
1004n/a write(_escape_cdata(elem.tail))
1005n/a
1006n/adef _serialize_text(write, elem):
1007n/a for part in elem.itertext():
1008n/a write(part)
1009n/a if elem.tail:
1010n/a write(elem.tail)
1011n/a
1012n/a_serialize = {
1013n/a "xml": _serialize_xml,
1014n/a "html": _serialize_html,
1015n/a "text": _serialize_text,
1016n/a# this optional method is imported at the end of the module
1017n/a# "c14n": _serialize_c14n,
1018n/a}
1019n/a
1020n/a
1021n/adef register_namespace(prefix, uri):
1022n/a """Register a namespace prefix.
1023n/a
1024n/a The registry is global, and any existing mapping for either the
1025n/a given prefix or the namespace URI will be removed.
1026n/a
1027n/a *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
1028n/a attributes in this namespace will be serialized with prefix if possible.
1029n/a
1030n/a ValueError is raised if prefix is reserved or is invalid.
1031n/a
1032n/a """
1033n/a if re.match(r"ns\d+$", prefix):
1034n/a raise ValueError("Prefix format reserved for internal use")
1035n/a for k, v in list(_namespace_map.items()):
1036n/a if k == uri or v == prefix:
1037n/a del _namespace_map[k]
1038n/a _namespace_map[uri] = prefix
1039n/a
1040n/a_namespace_map = {
1041n/a # "well-known" namespace prefixes
1042n/a "http://www.w3.org/XML/1998/namespace": "xml",
1043n/a "http://www.w3.org/1999/xhtml": "html",
1044n/a "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1045n/a "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1046n/a # xml schema
1047n/a "http://www.w3.org/2001/XMLSchema": "xs",
1048n/a "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1049n/a # dublin core
1050n/a "http://purl.org/dc/elements/1.1/": "dc",
1051n/a}
1052n/a# For tests and troubleshooting
1053n/aregister_namespace._namespace_map = _namespace_map
1054n/a
1055n/adef _raise_serialization_error(text):
1056n/a raise TypeError(
1057n/a "cannot serialize %r (type %s)" % (text, type(text).__name__)
1058n/a )
1059n/a
1060n/adef _escape_cdata(text):
1061n/a # escape character data
1062n/a try:
1063n/a # it's worth avoiding do-nothing calls for strings that are
1064n/a # shorter than 500 character, or so. assume that's, by far,
1065n/a # the most common case in most applications.
1066n/a if "&" in text:
1067n/a text = text.replace("&", "&amp;")
1068n/a if "<" in text:
1069n/a text = text.replace("<", "&lt;")
1070n/a if ">" in text:
1071n/a text = text.replace(">", "&gt;")
1072n/a return text
1073n/a except (TypeError, AttributeError):
1074n/a _raise_serialization_error(text)
1075n/a
1076n/adef _escape_attrib(text):
1077n/a # escape attribute value
1078n/a try:
1079n/a if "&" in text:
1080n/a text = text.replace("&", "&amp;")
1081n/a if "<" in text:
1082n/a text = text.replace("<", "&lt;")
1083n/a if ">" in text:
1084n/a text = text.replace(">", "&gt;")
1085n/a if "\"" in text:
1086n/a text = text.replace("\"", "&quot;")
1087n/a # The following business with carriage returns is to satisfy
1088n/a # Section 2.11 of the XML specification, stating that
1089n/a # CR or CR LN should be replaced with just LN
1090n/a # http://www.w3.org/TR/REC-xml/#sec-line-ends
1091n/a if "\r\n" in text:
1092n/a text = text.replace("\r\n", "\n")
1093n/a if "\r" in text:
1094n/a text = text.replace("\r", "\n")
1095n/a #The following four lines are issue 17582
1096n/a if "\n" in text:
1097n/a text = text.replace("\n", "&#10;")
1098n/a if "\t" in text:
1099n/a text = text.replace("\t", "&#09;")
1100n/a return text
1101n/a except (TypeError, AttributeError):
1102n/a _raise_serialization_error(text)
1103n/a
1104n/adef _escape_attrib_html(text):
1105n/a # escape attribute value
1106n/a try:
1107n/a if "&" in text:
1108n/a text = text.replace("&", "&amp;")
1109n/a if ">" in text:
1110n/a text = text.replace(">", "&gt;")
1111n/a if "\"" in text:
1112n/a text = text.replace("\"", "&quot;")
1113n/a return text
1114n/a except (TypeError, AttributeError):
1115n/a _raise_serialization_error(text)
1116n/a
1117n/a# --------------------------------------------------------------------
1118n/a
1119n/adef tostring(element, encoding=None, method=None, *,
1120n/a short_empty_elements=True):
1121n/a """Generate string representation of XML element.
1122n/a
1123n/a All subelements are included. If encoding is "unicode", a string
1124n/a is returned. Otherwise a bytestring is returned.
1125n/a
1126n/a *element* is an Element instance, *encoding* is an optional output
1127n/a encoding defaulting to US-ASCII, *method* is an optional output which can
1128n/a be one of "xml" (default), "html", "text" or "c14n".
1129n/a
1130n/a Returns an (optionally) encoded string containing the XML data.
1131n/a
1132n/a """
1133n/a stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
1134n/a ElementTree(element).write(stream, encoding, method=method,
1135n/a short_empty_elements=short_empty_elements)
1136n/a return stream.getvalue()
1137n/a
1138n/aclass _ListDataStream(io.BufferedIOBase):
1139n/a """An auxiliary stream accumulating into a list reference."""
1140n/a def __init__(self, lst):
1141n/a self.lst = lst
1142n/a
1143n/a def writable(self):
1144n/a return True
1145n/a
1146n/a def seekable(self):
1147n/a return True
1148n/a
1149n/a def write(self, b):
1150n/a self.lst.append(b)
1151n/a
1152n/a def tell(self):
1153n/a return len(self.lst)
1154n/a
1155n/adef tostringlist(element, encoding=None, method=None, *,
1156n/a short_empty_elements=True):
1157n/a lst = []
1158n/a stream = _ListDataStream(lst)
1159n/a ElementTree(element).write(stream, encoding, method=method,
1160n/a short_empty_elements=short_empty_elements)
1161n/a return lst
1162n/a
1163n/a
1164n/adef dump(elem):
1165n/a """Write element tree or element structure to sys.stdout.
1166n/a
1167n/a This function should be used for debugging only.
1168n/a
1169n/a *elem* is either an ElementTree, or a single Element. The exact output
1170n/a format is implementation dependent. In this version, it's written as an
1171n/a ordinary XML file.
1172n/a
1173n/a """
1174n/a # debugging
1175n/a if not isinstance(elem, ElementTree):
1176n/a elem = ElementTree(elem)
1177n/a elem.write(sys.stdout, encoding="unicode")
1178n/a tail = elem.getroot().tail
1179n/a if not tail or tail[-1] != "\n":
1180n/a sys.stdout.write("\n")
1181n/a
1182n/a# --------------------------------------------------------------------
1183n/a# parsing
1184n/a
1185n/a
1186n/adef parse(source, parser=None):
1187n/a """Parse XML document into element tree.
1188n/a
1189n/a *source* is a filename or file object containing XML data,
1190n/a *parser* is an optional parser instance defaulting to XMLParser.
1191n/a
1192n/a Return an ElementTree instance.
1193n/a
1194n/a """
1195n/a tree = ElementTree()
1196n/a tree.parse(source, parser)
1197n/a return tree
1198n/a
1199n/a
1200n/adef iterparse(source, events=None, parser=None):
1201n/a """Incrementally parse XML document into ElementTree.
1202n/a
1203n/a This class also reports what's going on to the user based on the
1204n/a *events* it is initialized with. The supported events are the strings
1205n/a "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
1206n/a detailed namespace information). If *events* is omitted, only
1207n/a "end" events are reported.
1208n/a
1209n/a *source* is a filename or file object containing XML data, *events* is
1210n/a a list of events to report back, *parser* is an optional parser instance.
1211n/a
1212n/a Returns an iterator providing (event, elem) pairs.
1213n/a
1214n/a """
1215n/a # Use the internal, undocumented _parser argument for now; When the
1216n/a # parser argument of iterparse is removed, this can be killed.
1217n/a pullparser = XMLPullParser(events=events, _parser=parser)
1218n/a def iterator():
1219n/a try:
1220n/a while True:
1221n/a yield from pullparser.read_events()
1222n/a # load event buffer
1223n/a data = source.read(16 * 1024)
1224n/a if not data:
1225n/a break
1226n/a pullparser.feed(data)
1227n/a root = pullparser._close_and_return_root()
1228n/a yield from pullparser.read_events()
1229n/a it.root = root
1230n/a finally:
1231n/a if close_source:
1232n/a source.close()
1233n/a
1234n/a class IterParseIterator(collections.Iterator):
1235n/a __next__ = iterator().__next__
1236n/a it = IterParseIterator()
1237n/a it.root = None
1238n/a del iterator, IterParseIterator
1239n/a
1240n/a close_source = False
1241n/a if not hasattr(source, "read"):
1242n/a source = open(source, "rb")
1243n/a close_source = True
1244n/a
1245n/a return it
1246n/a
1247n/a
1248n/aclass XMLPullParser:
1249n/a
1250n/a def __init__(self, events=None, *, _parser=None):
1251n/a # The _parser argument is for internal use only and must not be relied
1252n/a # upon in user code. It will be removed in a future release.
1253n/a # See http://bugs.python.org/issue17741 for more details.
1254n/a
1255n/a self._events_queue = collections.deque()
1256n/a self._parser = _parser or XMLParser(target=TreeBuilder())
1257n/a # wire up the parser for event reporting
1258n/a if events is None:
1259n/a events = ("end",)
1260n/a self._parser._setevents(self._events_queue, events)
1261n/a
1262n/a def feed(self, data):
1263n/a """Feed encoded data to parser."""
1264n/a if self._parser is None:
1265n/a raise ValueError("feed() called after end of stream")
1266n/a if data:
1267n/a try:
1268n/a self._parser.feed(data)
1269n/a except SyntaxError as exc:
1270n/a self._events_queue.append(exc)
1271n/a
1272n/a def _close_and_return_root(self):
1273n/a # iterparse needs this to set its root attribute properly :(
1274n/a root = self._parser.close()
1275n/a self._parser = None
1276n/a return root
1277n/a
1278n/a def close(self):
1279n/a """Finish feeding data to parser.
1280n/a
1281n/a Unlike XMLParser, does not return the root element. Use
1282n/a read_events() to consume elements from XMLPullParser.
1283n/a """
1284n/a self._close_and_return_root()
1285n/a
1286n/a def read_events(self):
1287n/a """Return an iterator over currently available (event, elem) pairs.
1288n/a
1289n/a Events are consumed from the internal event queue as they are
1290n/a retrieved from the iterator.
1291n/a """
1292n/a events = self._events_queue
1293n/a while events:
1294n/a event = events.popleft()
1295n/a if isinstance(event, Exception):
1296n/a raise event
1297n/a else:
1298n/a yield event
1299n/a
1300n/a
1301n/adef XML(text, parser=None):
1302n/a """Parse XML document from string constant.
1303n/a
1304n/a This function can be used to embed "XML Literals" in Python code.
1305n/a
1306n/a *text* is a string containing XML data, *parser* is an
1307n/a optional parser instance, defaulting to the standard XMLParser.
1308n/a
1309n/a Returns an Element instance.
1310n/a
1311n/a """
1312n/a if not parser:
1313n/a parser = XMLParser(target=TreeBuilder())
1314n/a parser.feed(text)
1315n/a return parser.close()
1316n/a
1317n/a
1318n/adef XMLID(text, parser=None):
1319n/a """Parse XML document from string constant for its IDs.
1320n/a
1321n/a *text* is a string containing XML data, *parser* is an
1322n/a optional parser instance, defaulting to the standard XMLParser.
1323n/a
1324n/a Returns an (Element, dict) tuple, in which the
1325n/a dict maps element id:s to elements.
1326n/a
1327n/a """
1328n/a if not parser:
1329n/a parser = XMLParser(target=TreeBuilder())
1330n/a parser.feed(text)
1331n/a tree = parser.close()
1332n/a ids = {}
1333n/a for elem in tree.iter():
1334n/a id = elem.get("id")
1335n/a if id:
1336n/a ids[id] = elem
1337n/a return tree, ids
1338n/a
1339n/a# Parse XML document from string constant. Alias for XML().
1340n/afromstring = XML
1341n/a
1342n/adef fromstringlist(sequence, parser=None):
1343n/a """Parse XML document from sequence of string fragments.
1344n/a
1345n/a *sequence* is a list of other sequence, *parser* is an optional parser
1346n/a instance, defaulting to the standard XMLParser.
1347n/a
1348n/a Returns an Element instance.
1349n/a
1350n/a """
1351n/a if not parser:
1352n/a parser = XMLParser(target=TreeBuilder())
1353n/a for text in sequence:
1354n/a parser.feed(text)
1355n/a return parser.close()
1356n/a
1357n/a# --------------------------------------------------------------------
1358n/a
1359n/a
1360n/aclass TreeBuilder:
1361n/a """Generic element structure builder.
1362n/a
1363n/a This builder converts a sequence of start, data, and end method
1364n/a calls to a well-formed element structure.
1365n/a
1366n/a You can use this class to build an element structure using a custom XML
1367n/a parser, or a parser for some other XML-like format.
1368n/a
1369n/a *element_factory* is an optional element factory which is called
1370n/a to create new Element instances, as necessary.
1371n/a
1372n/a """
1373n/a def __init__(self, element_factory=None):
1374n/a self._data = [] # data collector
1375n/a self._elem = [] # element stack
1376n/a self._last = None # last element
1377n/a self._tail = None # true if we're after an end tag
1378n/a if element_factory is None:
1379n/a element_factory = Element
1380n/a self._factory = element_factory
1381n/a
1382n/a def close(self):
1383n/a """Flush builder buffers and return toplevel document Element."""
1384n/a assert len(self._elem) == 0, "missing end tags"
1385n/a assert self._last is not None, "missing toplevel element"
1386n/a return self._last
1387n/a
1388n/a def _flush(self):
1389n/a if self._data:
1390n/a if self._last is not None:
1391n/a text = "".join(self._data)
1392n/a if self._tail:
1393n/a assert self._last.tail is None, "internal error (tail)"
1394n/a self._last.tail = text
1395n/a else:
1396n/a assert self._last.text is None, "internal error (text)"
1397n/a self._last.text = text
1398n/a self._data = []
1399n/a
1400n/a def data(self, data):
1401n/a """Add text to current element."""
1402n/a self._data.append(data)
1403n/a
1404n/a def start(self, tag, attrs):
1405n/a """Open new element and return it.
1406n/a
1407n/a *tag* is the element name, *attrs* is a dict containing element
1408n/a attributes.
1409n/a
1410n/a """
1411n/a self._flush()
1412n/a self._last = elem = self._factory(tag, attrs)
1413n/a if self._elem:
1414n/a self._elem[-1].append(elem)
1415n/a self._elem.append(elem)
1416n/a self._tail = 0
1417n/a return elem
1418n/a
1419n/a def end(self, tag):
1420n/a """Close and return current Element.
1421n/a
1422n/a *tag* is the element name.
1423n/a
1424n/a """
1425n/a self._flush()
1426n/a self._last = self._elem.pop()
1427n/a assert self._last.tag == tag,\
1428n/a "end tag mismatch (expected %s, got %s)" % (
1429n/a self._last.tag, tag)
1430n/a self._tail = 1
1431n/a return self._last
1432n/a
1433n/a
1434n/a# also see ElementTree and TreeBuilder
1435n/aclass XMLParser:
1436n/a """Element structure builder for XML source data based on the expat parser.
1437n/a
1438n/a *html* are predefined HTML entities (deprecated and not supported),
1439n/a *target* is an optional target object which defaults to an instance of the
1440n/a standard TreeBuilder class, *encoding* is an optional encoding string
1441n/a which if given, overrides the encoding specified in the XML file:
1442n/a http://www.iana.org/assignments/character-sets
1443n/a
1444n/a """
1445n/a
1446n/a def __init__(self, html=0, target=None, encoding=None):
1447n/a try:
1448n/a from xml.parsers import expat
1449n/a except ImportError:
1450n/a try:
1451n/a import pyexpat as expat
1452n/a except ImportError:
1453n/a raise ImportError(
1454n/a "No module named expat; use SimpleXMLTreeBuilder instead"
1455n/a )
1456n/a parser = expat.ParserCreate(encoding, "}")
1457n/a if target is None:
1458n/a target = TreeBuilder()
1459n/a # underscored names are provided for compatibility only
1460n/a self.parser = self._parser = parser
1461n/a self.target = self._target = target
1462n/a self._error = expat.error
1463n/a self._names = {} # name memo cache
1464n/a # main callbacks
1465n/a parser.DefaultHandlerExpand = self._default
1466n/a if hasattr(target, 'start'):
1467n/a parser.StartElementHandler = self._start
1468n/a if hasattr(target, 'end'):
1469n/a parser.EndElementHandler = self._end
1470n/a if hasattr(target, 'data'):
1471n/a parser.CharacterDataHandler = target.data
1472n/a # miscellaneous callbacks
1473n/a if hasattr(target, 'comment'):
1474n/a parser.CommentHandler = target.comment
1475n/a if hasattr(target, 'pi'):
1476n/a parser.ProcessingInstructionHandler = target.pi
1477n/a # Configure pyexpat: buffering, new-style attribute handling.
1478n/a parser.buffer_text = 1
1479n/a parser.ordered_attributes = 1
1480n/a parser.specified_attributes = 1
1481n/a self._doctype = None
1482n/a self.entity = {}
1483n/a try:
1484n/a self.version = "Expat %d.%d.%d" % expat.version_info
1485n/a except AttributeError:
1486n/a pass # unknown
1487n/a
1488n/a def _setevents(self, events_queue, events_to_report):
1489n/a # Internal API for XMLPullParser
1490n/a # events_to_report: a list of events to report during parsing (same as
1491n/a # the *events* of XMLPullParser's constructor.
1492n/a # events_queue: a list of actual parsing events that will be populated
1493n/a # by the underlying parser.
1494n/a #
1495n/a parser = self._parser
1496n/a append = events_queue.append
1497n/a for event_name in events_to_report:
1498n/a if event_name == "start":
1499n/a parser.ordered_attributes = 1
1500n/a parser.specified_attributes = 1
1501n/a def handler(tag, attrib_in, event=event_name, append=append,
1502n/a start=self._start):
1503n/a append((event, start(tag, attrib_in)))
1504n/a parser.StartElementHandler = handler
1505n/a elif event_name == "end":
1506n/a def handler(tag, event=event_name, append=append,
1507n/a end=self._end):
1508n/a append((event, end(tag)))
1509n/a parser.EndElementHandler = handler
1510n/a elif event_name == "start-ns":
1511n/a def handler(prefix, uri, event=event_name, append=append):
1512n/a append((event, (prefix or "", uri or "")))
1513n/a parser.StartNamespaceDeclHandler = handler
1514n/a elif event_name == "end-ns":
1515n/a def handler(prefix, event=event_name, append=append):
1516n/a append((event, None))
1517n/a parser.EndNamespaceDeclHandler = handler
1518n/a else:
1519n/a raise ValueError("unknown event %r" % event_name)
1520n/a
1521n/a def _raiseerror(self, value):
1522n/a err = ParseError(value)
1523n/a err.code = value.code
1524n/a err.position = value.lineno, value.offset
1525n/a raise err
1526n/a
1527n/a def _fixname(self, key):
1528n/a # expand qname, and convert name string to ascii, if possible
1529n/a try:
1530n/a name = self._names[key]
1531n/a except KeyError:
1532n/a name = key
1533n/a if "}" in name:
1534n/a name = "{" + name
1535n/a self._names[key] = name
1536n/a return name
1537n/a
1538n/a def _start(self, tag, attr_list):
1539n/a # Handler for expat's StartElementHandler. Since ordered_attributes
1540n/a # is set, the attributes are reported as a list of alternating
1541n/a # attribute name,value.
1542n/a fixname = self._fixname
1543n/a tag = fixname(tag)
1544n/a attrib = {}
1545n/a if attr_list:
1546n/a for i in range(0, len(attr_list), 2):
1547n/a attrib[fixname(attr_list[i])] = attr_list[i+1]
1548n/a return self.target.start(tag, attrib)
1549n/a
1550n/a def _end(self, tag):
1551n/a return self.target.end(self._fixname(tag))
1552n/a
1553n/a def _default(self, text):
1554n/a prefix = text[:1]
1555n/a if prefix == "&":
1556n/a # deal with undefined entities
1557n/a try:
1558n/a data_handler = self.target.data
1559n/a except AttributeError:
1560n/a return
1561n/a try:
1562n/a data_handler(self.entity[text[1:-1]])
1563n/a except KeyError:
1564n/a from xml.parsers import expat
1565n/a err = expat.error(
1566n/a "undefined entity %s: line %d, column %d" %
1567n/a (text, self.parser.ErrorLineNumber,
1568n/a self.parser.ErrorColumnNumber)
1569n/a )
1570n/a err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1571n/a err.lineno = self.parser.ErrorLineNumber
1572n/a err.offset = self.parser.ErrorColumnNumber
1573n/a raise err
1574n/a elif prefix == "<" and text[:9] == "<!DOCTYPE":
1575n/a self._doctype = [] # inside a doctype declaration
1576n/a elif self._doctype is not None:
1577n/a # parse doctype contents
1578n/a if prefix == ">":
1579n/a self._doctype = None
1580n/a return
1581n/a text = text.strip()
1582n/a if not text:
1583n/a return
1584n/a self._doctype.append(text)
1585n/a n = len(self._doctype)
1586n/a if n > 2:
1587n/a type = self._doctype[1]
1588n/a if type == "PUBLIC" and n == 4:
1589n/a name, type, pubid, system = self._doctype
1590n/a if pubid:
1591n/a pubid = pubid[1:-1]
1592n/a elif type == "SYSTEM" and n == 3:
1593n/a name, type, system = self._doctype
1594n/a pubid = None
1595n/a else:
1596n/a return
1597n/a if hasattr(self.target, "doctype"):
1598n/a self.target.doctype(name, pubid, system[1:-1])
1599n/a elif self.doctype != self._XMLParser__doctype:
1600n/a # warn about deprecated call
1601n/a self._XMLParser__doctype(name, pubid, system[1:-1])
1602n/a self.doctype(name, pubid, system[1:-1])
1603n/a self._doctype = None
1604n/a
1605n/a def doctype(self, name, pubid, system):
1606n/a """(Deprecated) Handle doctype declaration
1607n/a
1608n/a *name* is the Doctype name, *pubid* is the public identifier,
1609n/a and *system* is the system identifier.
1610n/a
1611n/a """
1612n/a warnings.warn(
1613n/a "This method of XMLParser is deprecated. Define doctype() "
1614n/a "method on the TreeBuilder target.",
1615n/a DeprecationWarning,
1616n/a )
1617n/a
1618n/a # sentinel, if doctype is redefined in a subclass
1619n/a __doctype = doctype
1620n/a
1621n/a def feed(self, data):
1622n/a """Feed encoded data to parser."""
1623n/a try:
1624n/a self.parser.Parse(data, 0)
1625n/a except self._error as v:
1626n/a self._raiseerror(v)
1627n/a
1628n/a def close(self):
1629n/a """Finish feeding data to parser and return element structure."""
1630n/a try:
1631n/a self.parser.Parse("", 1) # end of data
1632n/a except self._error as v:
1633n/a self._raiseerror(v)
1634n/a try:
1635n/a close_handler = self.target.close
1636n/a except AttributeError:
1637n/a pass
1638n/a else:
1639n/a return close_handler()
1640n/a finally:
1641n/a # get rid of circular references
1642n/a del self.parser, self._parser
1643n/a del self.target, self._target
1644n/a
1645n/a
1646n/a# Import the C accelerators
1647n/atry:
1648n/a # Element is going to be shadowed by the C implementation. We need to keep
1649n/a # the Python version of it accessible for some "creative" by external code
1650n/a # (see tests)
1651n/a _Element_Py = Element
1652n/a
1653n/a # Element, SubElement, ParseError, TreeBuilder, XMLParser
1654n/a from _elementtree import *
1655n/aexcept ImportError:
1656n/a pass