ยปCore Development>Code coverage>Lib/xml/sax/xmlreader.py

Python code coverage for Lib/xml/sax/xmlreader.py

#countcontent
1n/a"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
2n/ashould be based on this code. """
3n/a
4n/afrom . import handler
5n/a
6n/afrom ._exceptions import SAXNotSupportedException, SAXNotRecognizedException
7n/a
8n/a
9n/a# ===== XMLREADER =====
10n/a
11n/aclass XMLReader:
12n/a """Interface for reading an XML document using callbacks.
13n/a
14n/a XMLReader is the interface that an XML parser's SAX2 driver must
15n/a implement. This interface allows an application to set and query
16n/a features and properties in the parser, to register event handlers
17n/a for document processing, and to initiate a document parse.
18n/a
19n/a All SAX interfaces are assumed to be synchronous: the parse
20n/a methods must not return until parsing is complete, and readers
21n/a must wait for an event-handler callback to return before reporting
22n/a the next event."""
23n/a
24n/a def __init__(self):
25n/a self._cont_handler = handler.ContentHandler()
26n/a self._dtd_handler = handler.DTDHandler()
27n/a self._ent_handler = handler.EntityResolver()
28n/a self._err_handler = handler.ErrorHandler()
29n/a
30n/a def parse(self, source):
31n/a "Parse an XML document from a system identifier or an InputSource."
32n/a raise NotImplementedError("This method must be implemented!")
33n/a
34n/a def getContentHandler(self):
35n/a "Returns the current ContentHandler."
36n/a return self._cont_handler
37n/a
38n/a def setContentHandler(self, handler):
39n/a "Registers a new object to receive document content events."
40n/a self._cont_handler = handler
41n/a
42n/a def getDTDHandler(self):
43n/a "Returns the current DTD handler."
44n/a return self._dtd_handler
45n/a
46n/a def setDTDHandler(self, handler):
47n/a "Register an object to receive basic DTD-related events."
48n/a self._dtd_handler = handler
49n/a
50n/a def getEntityResolver(self):
51n/a "Returns the current EntityResolver."
52n/a return self._ent_handler
53n/a
54n/a def setEntityResolver(self, resolver):
55n/a "Register an object to resolve external entities."
56n/a self._ent_handler = resolver
57n/a
58n/a def getErrorHandler(self):
59n/a "Returns the current ErrorHandler."
60n/a return self._err_handler
61n/a
62n/a def setErrorHandler(self, handler):
63n/a "Register an object to receive error-message events."
64n/a self._err_handler = handler
65n/a
66n/a def setLocale(self, locale):
67n/a """Allow an application to set the locale for errors and warnings.
68n/a
69n/a SAX parsers are not required to provide localization for errors
70n/a and warnings; if they cannot support the requested locale,
71n/a however, they must raise a SAX exception. Applications may
72n/a request a locale change in the middle of a parse."""
73n/a raise SAXNotSupportedException("Locale support not implemented")
74n/a
75n/a def getFeature(self, name):
76n/a "Looks up and returns the state of a SAX2 feature."
77n/a raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
78n/a
79n/a def setFeature(self, name, state):
80n/a "Sets the state of a SAX2 feature."
81n/a raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
82n/a
83n/a def getProperty(self, name):
84n/a "Looks up and returns the value of a SAX2 property."
85n/a raise SAXNotRecognizedException("Property '%s' not recognized" % name)
86n/a
87n/a def setProperty(self, name, value):
88n/a "Sets the value of a SAX2 property."
89n/a raise SAXNotRecognizedException("Property '%s' not recognized" % name)
90n/a
91n/aclass IncrementalParser(XMLReader):
92n/a """This interface adds three extra methods to the XMLReader
93n/a interface that allow XML parsers to support incremental
94n/a parsing. Support for this interface is optional, since not all
95n/a underlying XML parsers support this functionality.
96n/a
97n/a When the parser is instantiated it is ready to begin accepting
98n/a data from the feed method immediately. After parsing has been
99n/a finished with a call to close the reset method must be called to
100n/a make the parser ready to accept new data, either from feed or
101n/a using the parse method.
102n/a
103n/a Note that these methods must _not_ be called during parsing, that
104n/a is, after parse has been called and before it returns.
105n/a
106n/a By default, the class also implements the parse method of the XMLReader
107n/a interface using the feed, close and reset methods of the
108n/a IncrementalParser interface as a convenience to SAX 2.0 driver
109n/a writers."""
110n/a
111n/a def __init__(self, bufsize=2**16):
112n/a self._bufsize = bufsize
113n/a XMLReader.__init__(self)
114n/a
115n/a def parse(self, source):
116n/a from . import saxutils
117n/a source = saxutils.prepare_input_source(source)
118n/a
119n/a self.prepareParser(source)
120n/a file = source.getCharacterStream()
121n/a if file is None:
122n/a file = source.getByteStream()
123n/a buffer = file.read(self._bufsize)
124n/a while buffer:
125n/a self.feed(buffer)
126n/a buffer = file.read(self._bufsize)
127n/a self.close()
128n/a
129n/a def feed(self, data):
130n/a """This method gives the raw XML data in the data parameter to
131n/a the parser and makes it parse the data, emitting the
132n/a corresponding events. It is allowed for XML constructs to be
133n/a split across several calls to feed.
134n/a
135n/a feed may raise SAXException."""
136n/a raise NotImplementedError("This method must be implemented!")
137n/a
138n/a def prepareParser(self, source):
139n/a """This method is called by the parse implementation to allow
140n/a the SAX 2.0 driver to prepare itself for parsing."""
141n/a raise NotImplementedError("prepareParser must be overridden!")
142n/a
143n/a def close(self):
144n/a """This method is called when the entire XML document has been
145n/a passed to the parser through the feed method, to notify the
146n/a parser that there are no more data. This allows the parser to
147n/a do the final checks on the document and empty the internal
148n/a data buffer.
149n/a
150n/a The parser will not be ready to parse another document until
151n/a the reset method has been called.
152n/a
153n/a close may raise SAXException."""
154n/a raise NotImplementedError("This method must be implemented!")
155n/a
156n/a def reset(self):
157n/a """This method is called after close has been called to reset
158n/a the parser so that it is ready to parse new documents. The
159n/a results of calling parse or feed after close without calling
160n/a reset are undefined."""
161n/a raise NotImplementedError("This method must be implemented!")
162n/a
163n/a# ===== LOCATOR =====
164n/a
165n/aclass Locator:
166n/a """Interface for associating a SAX event with a document
167n/a location. A locator object will return valid results only during
168n/a calls to DocumentHandler methods; at any other time, the
169n/a results are unpredictable."""
170n/a
171n/a def getColumnNumber(self):
172n/a "Return the column number where the current event ends."
173n/a return -1
174n/a
175n/a def getLineNumber(self):
176n/a "Return the line number where the current event ends."
177n/a return -1
178n/a
179n/a def getPublicId(self):
180n/a "Return the public identifier for the current event."
181n/a return None
182n/a
183n/a def getSystemId(self):
184n/a "Return the system identifier for the current event."
185n/a return None
186n/a
187n/a# ===== INPUTSOURCE =====
188n/a
189n/aclass InputSource:
190n/a """Encapsulation of the information needed by the XMLReader to
191n/a read entities.
192n/a
193n/a This class may include information about the public identifier,
194n/a system identifier, byte stream (possibly with character encoding
195n/a information) and/or the character stream of an entity.
196n/a
197n/a Applications will create objects of this class for use in the
198n/a XMLReader.parse method and for returning from
199n/a EntityResolver.resolveEntity.
200n/a
201n/a An InputSource belongs to the application, the XMLReader is not
202n/a allowed to modify InputSource objects passed to it from the
203n/a application, although it may make copies and modify those."""
204n/a
205n/a def __init__(self, system_id = None):
206n/a self.__system_id = system_id
207n/a self.__public_id = None
208n/a self.__encoding = None
209n/a self.__bytefile = None
210n/a self.__charfile = None
211n/a
212n/a def setPublicId(self, public_id):
213n/a "Sets the public identifier of this InputSource."
214n/a self.__public_id = public_id
215n/a
216n/a def getPublicId(self):
217n/a "Returns the public identifier of this InputSource."
218n/a return self.__public_id
219n/a
220n/a def setSystemId(self, system_id):
221n/a "Sets the system identifier of this InputSource."
222n/a self.__system_id = system_id
223n/a
224n/a def getSystemId(self):
225n/a "Returns the system identifier of this InputSource."
226n/a return self.__system_id
227n/a
228n/a def setEncoding(self, encoding):
229n/a """Sets the character encoding of this InputSource.
230n/a
231n/a The encoding must be a string acceptable for an XML encoding
232n/a declaration (see section 4.3.3 of the XML recommendation).
233n/a
234n/a The encoding attribute of the InputSource is ignored if the
235n/a InputSource also contains a character stream."""
236n/a self.__encoding = encoding
237n/a
238n/a def getEncoding(self):
239n/a "Get the character encoding of this InputSource."
240n/a return self.__encoding
241n/a
242n/a def setByteStream(self, bytefile):
243n/a """Set the byte stream (a Python file-like object which does
244n/a not perform byte-to-character conversion) for this input
245n/a source.
246n/a
247n/a The SAX parser will ignore this if there is also a character
248n/a stream specified, but it will use a byte stream in preference
249n/a to opening a URI connection itself.
250n/a
251n/a If the application knows the character encoding of the byte
252n/a stream, it should set it with the setEncoding method."""
253n/a self.__bytefile = bytefile
254n/a
255n/a def getByteStream(self):
256n/a """Get the byte stream for this input source.
257n/a
258n/a The getEncoding method will return the character encoding for
259n/a this byte stream, or None if unknown."""
260n/a return self.__bytefile
261n/a
262n/a def setCharacterStream(self, charfile):
263n/a """Set the character stream for this input source. (The stream
264n/a must be a Python 2.0 Unicode-wrapped file-like that performs
265n/a conversion to Unicode strings.)
266n/a
267n/a If there is a character stream specified, the SAX parser will
268n/a ignore any byte stream and will not attempt to open a URI
269n/a connection to the system identifier."""
270n/a self.__charfile = charfile
271n/a
272n/a def getCharacterStream(self):
273n/a "Get the character stream for this input source."
274n/a return self.__charfile
275n/a
276n/a# ===== ATTRIBUTESIMPL =====
277n/a
278n/aclass AttributesImpl:
279n/a
280n/a def __init__(self, attrs):
281n/a """Non-NS-aware implementation.
282n/a
283n/a attrs should be of the form {name : value}."""
284n/a self._attrs = attrs
285n/a
286n/a def getLength(self):
287n/a return len(self._attrs)
288n/a
289n/a def getType(self, name):
290n/a return "CDATA"
291n/a
292n/a def getValue(self, name):
293n/a return self._attrs[name]
294n/a
295n/a def getValueByQName(self, name):
296n/a return self._attrs[name]
297n/a
298n/a def getNameByQName(self, name):
299n/a if name not in self._attrs:
300n/a raise KeyError(name)
301n/a return name
302n/a
303n/a def getQNameByName(self, name):
304n/a if name not in self._attrs:
305n/a raise KeyError(name)
306n/a return name
307n/a
308n/a def getNames(self):
309n/a return list(self._attrs.keys())
310n/a
311n/a def getQNames(self):
312n/a return list(self._attrs.keys())
313n/a
314n/a def __len__(self):
315n/a return len(self._attrs)
316n/a
317n/a def __getitem__(self, name):
318n/a return self._attrs[name]
319n/a
320n/a def keys(self):
321n/a return list(self._attrs.keys())
322n/a
323n/a def __contains__(self, name):
324n/a return name in self._attrs
325n/a
326n/a def get(self, name, alternative=None):
327n/a return self._attrs.get(name, alternative)
328n/a
329n/a def copy(self):
330n/a return self.__class__(self._attrs)
331n/a
332n/a def items(self):
333n/a return list(self._attrs.items())
334n/a
335n/a def values(self):
336n/a return list(self._attrs.values())
337n/a
338n/a# ===== ATTRIBUTESNSIMPL =====
339n/a
340n/aclass AttributesNSImpl(AttributesImpl):
341n/a
342n/a def __init__(self, attrs, qnames):
343n/a """NS-aware implementation.
344n/a
345n/a attrs should be of the form {(ns_uri, lname): value, ...}.
346n/a qnames of the form {(ns_uri, lname): qname, ...}."""
347n/a self._attrs = attrs
348n/a self._qnames = qnames
349n/a
350n/a def getValueByQName(self, name):
351n/a for (nsname, qname) in self._qnames.items():
352n/a if qname == name:
353n/a return self._attrs[nsname]
354n/a
355n/a raise KeyError(name)
356n/a
357n/a def getNameByQName(self, name):
358n/a for (nsname, qname) in self._qnames.items():
359n/a if qname == name:
360n/a return nsname
361n/a
362n/a raise KeyError(name)
363n/a
364n/a def getQNameByName(self, name):
365n/a return self._qnames[name]
366n/a
367n/a def getQNames(self):
368n/a return list(self._qnames.values())
369n/a
370n/a def copy(self):
371n/a return self.__class__(self._attrs, self._qnames)
372n/a
373n/a
374n/adef _test():
375n/a XMLReader()
376n/a IncrementalParser()
377n/a Locator()
378n/a
379n/aif __name__ == "__main__":
380n/a _test()