ยปCore Development>Code coverage>Lib/xml/sax/expatreader.py

Python code coverage for Lib/xml/sax/expatreader.py

#countcontent
1n/a"""
2n/aSAX driver for the pyexpat C module. This driver works with
3n/apyexpat.__version__ == '2.22'.
4n/a"""
5n/a
6n/aversion = "0.20"
7n/a
8n/afrom xml.sax._exceptions import *
9n/afrom xml.sax.handler import feature_validation, feature_namespaces
10n/afrom xml.sax.handler import feature_namespace_prefixes
11n/afrom xml.sax.handler import feature_external_ges, feature_external_pes
12n/afrom xml.sax.handler import feature_string_interning
13n/afrom xml.sax.handler import property_xml_string, property_interning_dict
14n/a
15n/a# xml.parsers.expat does not raise ImportError in Jython
16n/aimport sys
17n/aif sys.platform[:4] == "java":
18n/a raise SAXReaderNotAvailable("expat not available in Java", None)
19n/adel sys
20n/a
21n/atry:
22n/a from xml.parsers import expat
23n/aexcept ImportError:
24n/a raise SAXReaderNotAvailable("expat not supported", None)
25n/aelse:
26n/a if not hasattr(expat, "ParserCreate"):
27n/a raise SAXReaderNotAvailable("expat not supported", None)
28n/afrom xml.sax import xmlreader, saxutils, handler
29n/a
30n/aAttributesImpl = xmlreader.AttributesImpl
31n/aAttributesNSImpl = xmlreader.AttributesNSImpl
32n/a
33n/a# If we're using a sufficiently recent version of Python, we can use
34n/a# weak references to avoid cycles between the parser and content
35n/a# handler, otherwise we'll just have to pretend.
36n/atry:
37n/a import _weakref
38n/aexcept ImportError:
39n/a def _mkproxy(o):
40n/a return o
41n/aelse:
42n/a import weakref
43n/a _mkproxy = weakref.proxy
44n/a del weakref, _weakref
45n/a
46n/aclass _ClosedParser:
47n/a pass
48n/a
49n/a# --- ExpatLocator
50n/a
51n/aclass ExpatLocator(xmlreader.Locator):
52n/a """Locator for use with the ExpatParser class.
53n/a
54n/a This uses a weak reference to the parser object to avoid creating
55n/a a circular reference between the parser and the content handler.
56n/a """
57n/a def __init__(self, parser):
58n/a self._ref = _mkproxy(parser)
59n/a
60n/a def getColumnNumber(self):
61n/a parser = self._ref
62n/a if parser._parser is None:
63n/a return None
64n/a return parser._parser.ErrorColumnNumber
65n/a
66n/a def getLineNumber(self):
67n/a parser = self._ref
68n/a if parser._parser is None:
69n/a return 1
70n/a return parser._parser.ErrorLineNumber
71n/a
72n/a def getPublicId(self):
73n/a parser = self._ref
74n/a if parser is None:
75n/a return None
76n/a return parser._source.getPublicId()
77n/a
78n/a def getSystemId(self):
79n/a parser = self._ref
80n/a if parser is None:
81n/a return None
82n/a return parser._source.getSystemId()
83n/a
84n/a
85n/a# --- ExpatParser
86n/a
87n/aclass ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
88n/a """SAX driver for the pyexpat C module."""
89n/a
90n/a def __init__(self, namespaceHandling=0, bufsize=2**16-20):
91n/a xmlreader.IncrementalParser.__init__(self, bufsize)
92n/a self._source = xmlreader.InputSource()
93n/a self._parser = None
94n/a self._namespaces = namespaceHandling
95n/a self._lex_handler_prop = None
96n/a self._parsing = 0
97n/a self._entity_stack = []
98n/a self._external_ges = 1
99n/a self._interning = None
100n/a
101n/a # XMLReader methods
102n/a
103n/a def parse(self, source):
104n/a "Parse an XML document from a URL or an InputSource."
105n/a source = saxutils.prepare_input_source(source)
106n/a
107n/a self._source = source
108n/a self.reset()
109n/a self._cont_handler.setDocumentLocator(ExpatLocator(self))
110n/a xmlreader.IncrementalParser.parse(self, source)
111n/a
112n/a def prepareParser(self, source):
113n/a if source.getSystemId() is not None:
114n/a self._parser.SetBase(source.getSystemId())
115n/a
116n/a # Redefined setContentHandler to allow changing handlers during parsing
117n/a
118n/a def setContentHandler(self, handler):
119n/a xmlreader.IncrementalParser.setContentHandler(self, handler)
120n/a if self._parsing:
121n/a self._reset_cont_handler()
122n/a
123n/a def getFeature(self, name):
124n/a if name == feature_namespaces:
125n/a return self._namespaces
126n/a elif name == feature_string_interning:
127n/a return self._interning is not None
128n/a elif name in (feature_validation, feature_external_pes,
129n/a feature_namespace_prefixes):
130n/a return 0
131n/a elif name == feature_external_ges:
132n/a return self._external_ges
133n/a raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
134n/a
135n/a def setFeature(self, name, state):
136n/a if self._parsing:
137n/a raise SAXNotSupportedException("Cannot set features while parsing")
138n/a
139n/a if name == feature_namespaces:
140n/a self._namespaces = state
141n/a elif name == feature_external_ges:
142n/a self._external_ges = state
143n/a elif name == feature_string_interning:
144n/a if state:
145n/a if self._interning is None:
146n/a self._interning = {}
147n/a else:
148n/a self._interning = None
149n/a elif name == feature_validation:
150n/a if state:
151n/a raise SAXNotSupportedException(
152n/a "expat does not support validation")
153n/a elif name == feature_external_pes:
154n/a if state:
155n/a raise SAXNotSupportedException(
156n/a "expat does not read external parameter entities")
157n/a elif name == feature_namespace_prefixes:
158n/a if state:
159n/a raise SAXNotSupportedException(
160n/a "expat does not report namespace prefixes")
161n/a else:
162n/a raise SAXNotRecognizedException(
163n/a "Feature '%s' not recognized" % name)
164n/a
165n/a def getProperty(self, name):
166n/a if name == handler.property_lexical_handler:
167n/a return self._lex_handler_prop
168n/a elif name == property_interning_dict:
169n/a return self._interning
170n/a elif name == property_xml_string:
171n/a if self._parser:
172n/a if hasattr(self._parser, "GetInputContext"):
173n/a return self._parser.GetInputContext()
174n/a else:
175n/a raise SAXNotRecognizedException(
176n/a "This version of expat does not support getting"
177n/a " the XML string")
178n/a else:
179n/a raise SAXNotSupportedException(
180n/a "XML string cannot be returned when not parsing")
181n/a raise SAXNotRecognizedException("Property '%s' not recognized" % name)
182n/a
183n/a def setProperty(self, name, value):
184n/a if name == handler.property_lexical_handler:
185n/a self._lex_handler_prop = value
186n/a if self._parsing:
187n/a self._reset_lex_handler_prop()
188n/a elif name == property_interning_dict:
189n/a self._interning = value
190n/a elif name == property_xml_string:
191n/a raise SAXNotSupportedException("Property '%s' cannot be set" %
192n/a name)
193n/a else:
194n/a raise SAXNotRecognizedException("Property '%s' not recognized" %
195n/a name)
196n/a
197n/a # IncrementalParser methods
198n/a
199n/a def feed(self, data, isFinal = 0):
200n/a if not self._parsing:
201n/a self.reset()
202n/a self._parsing = 1
203n/a self._cont_handler.startDocument()
204n/a
205n/a try:
206n/a # The isFinal parameter is internal to the expat reader.
207n/a # If it is set to true, expat will check validity of the entire
208n/a # document. When feeding chunks, they are not normally final -
209n/a # except when invoked from close.
210n/a self._parser.Parse(data, isFinal)
211n/a except expat.error as e:
212n/a exc = SAXParseException(expat.ErrorString(e.code), e, self)
213n/a # FIXME: when to invoke error()?
214n/a self._err_handler.fatalError(exc)
215n/a
216n/a def close(self):
217n/a if (self._entity_stack or self._parser is None or
218n/a isinstance(self._parser, _ClosedParser)):
219n/a # If we are completing an external entity, do nothing here
220n/a return
221n/a try:
222n/a self.feed("", isFinal = 1)
223n/a self._cont_handler.endDocument()
224n/a self._parsing = 0
225n/a # break cycle created by expat handlers pointing to our methods
226n/a self._parser = None
227n/a finally:
228n/a self._parsing = 0
229n/a if self._parser is not None:
230n/a # Keep ErrorColumnNumber and ErrorLineNumber after closing.
231n/a parser = _ClosedParser()
232n/a parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
233n/a parser.ErrorLineNumber = self._parser.ErrorLineNumber
234n/a self._parser = parser
235n/a try:
236n/a file = self._source.getCharacterStream()
237n/a if file is not None:
238n/a file.close()
239n/a finally:
240n/a file = self._source.getByteStream()
241n/a if file is not None:
242n/a file.close()
243n/a
244n/a def _reset_cont_handler(self):
245n/a self._parser.ProcessingInstructionHandler = \
246n/a self._cont_handler.processingInstruction
247n/a self._parser.CharacterDataHandler = self._cont_handler.characters
248n/a
249n/a def _reset_lex_handler_prop(self):
250n/a lex = self._lex_handler_prop
251n/a parser = self._parser
252n/a if lex is None:
253n/a parser.CommentHandler = None
254n/a parser.StartCdataSectionHandler = None
255n/a parser.EndCdataSectionHandler = None
256n/a parser.StartDoctypeDeclHandler = None
257n/a parser.EndDoctypeDeclHandler = None
258n/a else:
259n/a parser.CommentHandler = lex.comment
260n/a parser.StartCdataSectionHandler = lex.startCDATA
261n/a parser.EndCdataSectionHandler = lex.endCDATA
262n/a parser.StartDoctypeDeclHandler = self.start_doctype_decl
263n/a parser.EndDoctypeDeclHandler = lex.endDTD
264n/a
265n/a def reset(self):
266n/a if self._namespaces:
267n/a self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
268n/a intern=self._interning)
269n/a self._parser.namespace_prefixes = 1
270n/a self._parser.StartElementHandler = self.start_element_ns
271n/a self._parser.EndElementHandler = self.end_element_ns
272n/a else:
273n/a self._parser = expat.ParserCreate(self._source.getEncoding(),
274n/a intern = self._interning)
275n/a self._parser.StartElementHandler = self.start_element
276n/a self._parser.EndElementHandler = self.end_element
277n/a
278n/a self._reset_cont_handler()
279n/a self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
280n/a self._parser.NotationDeclHandler = self.notation_decl
281n/a self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
282n/a self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
283n/a
284n/a self._decl_handler_prop = None
285n/a if self._lex_handler_prop:
286n/a self._reset_lex_handler_prop()
287n/a# self._parser.DefaultHandler =
288n/a# self._parser.DefaultHandlerExpand =
289n/a# self._parser.NotStandaloneHandler =
290n/a self._parser.ExternalEntityRefHandler = self.external_entity_ref
291n/a try:
292n/a self._parser.SkippedEntityHandler = self.skipped_entity_handler
293n/a except AttributeError:
294n/a # This pyexpat does not support SkippedEntity
295n/a pass
296n/a self._parser.SetParamEntityParsing(
297n/a expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
298n/a
299n/a self._parsing = 0
300n/a self._entity_stack = []
301n/a
302n/a # Locator methods
303n/a
304n/a def getColumnNumber(self):
305n/a if self._parser is None:
306n/a return None
307n/a return self._parser.ErrorColumnNumber
308n/a
309n/a def getLineNumber(self):
310n/a if self._parser is None:
311n/a return 1
312n/a return self._parser.ErrorLineNumber
313n/a
314n/a def getPublicId(self):
315n/a return self._source.getPublicId()
316n/a
317n/a def getSystemId(self):
318n/a return self._source.getSystemId()
319n/a
320n/a # event handlers
321n/a def start_element(self, name, attrs):
322n/a self._cont_handler.startElement(name, AttributesImpl(attrs))
323n/a
324n/a def end_element(self, name):
325n/a self._cont_handler.endElement(name)
326n/a
327n/a def start_element_ns(self, name, attrs):
328n/a pair = name.split()
329n/a if len(pair) == 1:
330n/a # no namespace
331n/a pair = (None, name)
332n/a elif len(pair) == 3:
333n/a pair = pair[0], pair[1]
334n/a else:
335n/a # default namespace
336n/a pair = tuple(pair)
337n/a
338n/a newattrs = {}
339n/a qnames = {}
340n/a for (aname, value) in attrs.items():
341n/a parts = aname.split()
342n/a length = len(parts)
343n/a if length == 1:
344n/a # no namespace
345n/a qname = aname
346n/a apair = (None, aname)
347n/a elif length == 3:
348n/a qname = "%s:%s" % (parts[2], parts[1])
349n/a apair = parts[0], parts[1]
350n/a else:
351n/a # default namespace
352n/a qname = parts[1]
353n/a apair = tuple(parts)
354n/a
355n/a newattrs[apair] = value
356n/a qnames[apair] = qname
357n/a
358n/a self._cont_handler.startElementNS(pair, None,
359n/a AttributesNSImpl(newattrs, qnames))
360n/a
361n/a def end_element_ns(self, name):
362n/a pair = name.split()
363n/a if len(pair) == 1:
364n/a pair = (None, name)
365n/a elif len(pair) == 3:
366n/a pair = pair[0], pair[1]
367n/a else:
368n/a pair = tuple(pair)
369n/a
370n/a self._cont_handler.endElementNS(pair, None)
371n/a
372n/a # this is not used (call directly to ContentHandler)
373n/a def processing_instruction(self, target, data):
374n/a self._cont_handler.processingInstruction(target, data)
375n/a
376n/a # this is not used (call directly to ContentHandler)
377n/a def character_data(self, data):
378n/a self._cont_handler.characters(data)
379n/a
380n/a def start_namespace_decl(self, prefix, uri):
381n/a self._cont_handler.startPrefixMapping(prefix, uri)
382n/a
383n/a def end_namespace_decl(self, prefix):
384n/a self._cont_handler.endPrefixMapping(prefix)
385n/a
386n/a def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
387n/a self._lex_handler_prop.startDTD(name, pubid, sysid)
388n/a
389n/a def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
390n/a self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
391n/a
392n/a def notation_decl(self, name, base, sysid, pubid):
393n/a self._dtd_handler.notationDecl(name, pubid, sysid)
394n/a
395n/a def external_entity_ref(self, context, base, sysid, pubid):
396n/a if not self._external_ges:
397n/a return 1
398n/a
399n/a source = self._ent_handler.resolveEntity(pubid, sysid)
400n/a source = saxutils.prepare_input_source(source,
401n/a self._source.getSystemId() or
402n/a "")
403n/a
404n/a self._entity_stack.append((self._parser, self._source))
405n/a self._parser = self._parser.ExternalEntityParserCreate(context)
406n/a self._source = source
407n/a
408n/a try:
409n/a xmlreader.IncrementalParser.parse(self, source)
410n/a except:
411n/a return 0 # FIXME: save error info here?
412n/a
413n/a (self._parser, self._source) = self._entity_stack[-1]
414n/a del self._entity_stack[-1]
415n/a return 1
416n/a
417n/a def skipped_entity_handler(self, name, is_pe):
418n/a if is_pe:
419n/a # The SAX spec requires to report skipped PEs with a '%'
420n/a name = '%'+name
421n/a self._cont_handler.skippedEntity(name)
422n/a
423n/a# ---
424n/a
425n/adef create_parser(*args, **kwargs):
426n/a return ExpatParser(*args, **kwargs)
427n/a
428n/a# ---
429n/a
430n/aif __name__ == "__main__":
431n/a import xml.sax.saxutils
432n/a p = create_parser()
433n/a p.setContentHandler(xml.sax.saxutils.XMLGenerator())
434n/a p.setErrorHandler(xml.sax.ErrorHandler())
435n/a p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")