ยปCore Development>Code coverage>Modules/expat/xmltok_impl.c

Python code coverage for Modules/expat/xmltok_impl.c

#countcontent
1n/a/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2n/a See the file COPYING for copying permission.
3n/a*/
4n/a
5n/a/* This file is included! */
6n/a#ifdef XML_TOK_IMPL_C
7n/a
8n/a#ifndef IS_INVALID_CHAR
9n/a#define IS_INVALID_CHAR(enc, ptr, n) (0)
10n/a#endif
11n/a
12n/a#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
13n/a case BT_LEAD ## n: \
14n/a if (end - ptr < n) \
15n/a return XML_TOK_PARTIAL_CHAR; \
16n/a if (IS_INVALID_CHAR(enc, ptr, n)) { \
17n/a *(nextTokPtr) = (ptr); \
18n/a return XML_TOK_INVALID; \
19n/a } \
20n/a ptr += n; \
21n/a break;
22n/a
23n/a#define INVALID_CASES(ptr, nextTokPtr) \
24n/a INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
25n/a INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
26n/a INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
27n/a case BT_NONXML: \
28n/a case BT_MALFORM: \
29n/a case BT_TRAIL: \
30n/a *(nextTokPtr) = (ptr); \
31n/a return XML_TOK_INVALID;
32n/a
33n/a#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
34n/a case BT_LEAD ## n: \
35n/a if (end - ptr < n) \
36n/a return XML_TOK_PARTIAL_CHAR; \
37n/a if (!IS_NAME_CHAR(enc, ptr, n)) { \
38n/a *nextTokPtr = ptr; \
39n/a return XML_TOK_INVALID; \
40n/a } \
41n/a ptr += n; \
42n/a break;
43n/a
44n/a#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
45n/a case BT_NONASCII: \
46n/a if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
47n/a *nextTokPtr = ptr; \
48n/a return XML_TOK_INVALID; \
49n/a } \
50n/a case BT_NMSTRT: \
51n/a case BT_HEX: \
52n/a case BT_DIGIT: \
53n/a case BT_NAME: \
54n/a case BT_MINUS: \
55n/a ptr += MINBPC(enc); \
56n/a break; \
57n/a CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
58n/a CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
59n/a CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
60n/a
61n/a#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
62n/a case BT_LEAD ## n: \
63n/a if (end - ptr < n) \
64n/a return XML_TOK_PARTIAL_CHAR; \
65n/a if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
66n/a *nextTokPtr = ptr; \
67n/a return XML_TOK_INVALID; \
68n/a } \
69n/a ptr += n; \
70n/a break;
71n/a
72n/a#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
73n/a case BT_NONASCII: \
74n/a if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
75n/a *nextTokPtr = ptr; \
76n/a return XML_TOK_INVALID; \
77n/a } \
78n/a case BT_NMSTRT: \
79n/a case BT_HEX: \
80n/a ptr += MINBPC(enc); \
81n/a break; \
82n/a CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
83n/a CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
84n/a CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
85n/a
86n/a#ifndef PREFIX
87n/a#define PREFIX(ident) ident
88n/a#endif
89n/a
90n/a/* ptr points to character following "<!-" */
91n/a
92n/astatic int PTRCALL
93n/aPREFIX(scanComment)(const ENCODING *enc, const char *ptr,
94n/a const char *end, const char **nextTokPtr)
95n/a{
96n/a if (ptr != end) {
97n/a if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
98n/a *nextTokPtr = ptr;
99n/a return XML_TOK_INVALID;
100n/a }
101n/a ptr += MINBPC(enc);
102n/a while (ptr != end) {
103n/a switch (BYTE_TYPE(enc, ptr)) {
104n/a INVALID_CASES(ptr, nextTokPtr)
105n/a case BT_MINUS:
106n/a if ((ptr += MINBPC(enc)) == end)
107n/a return XML_TOK_PARTIAL;
108n/a if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
109n/a if ((ptr += MINBPC(enc)) == end)
110n/a return XML_TOK_PARTIAL;
111n/a if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
112n/a *nextTokPtr = ptr;
113n/a return XML_TOK_INVALID;
114n/a }
115n/a *nextTokPtr = ptr + MINBPC(enc);
116n/a return XML_TOK_COMMENT;
117n/a }
118n/a break;
119n/a default:
120n/a ptr += MINBPC(enc);
121n/a break;
122n/a }
123n/a }
124n/a }
125n/a return XML_TOK_PARTIAL;
126n/a}
127n/a
128n/a/* ptr points to character following "<!" */
129n/a
130n/astatic int PTRCALL
131n/aPREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
132n/a const char *end, const char **nextTokPtr)
133n/a{
134n/a if (ptr == end)
135n/a return XML_TOK_PARTIAL;
136n/a switch (BYTE_TYPE(enc, ptr)) {
137n/a case BT_MINUS:
138n/a return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
139n/a case BT_LSQB:
140n/a *nextTokPtr = ptr + MINBPC(enc);
141n/a return XML_TOK_COND_SECT_OPEN;
142n/a case BT_NMSTRT:
143n/a case BT_HEX:
144n/a ptr += MINBPC(enc);
145n/a break;
146n/a default:
147n/a *nextTokPtr = ptr;
148n/a return XML_TOK_INVALID;
149n/a }
150n/a while (ptr != end) {
151n/a switch (BYTE_TYPE(enc, ptr)) {
152n/a case BT_PERCNT:
153n/a if (ptr + MINBPC(enc) == end)
154n/a return XML_TOK_PARTIAL;
155n/a /* don't allow <!ENTITY% foo "whatever"> */
156n/a switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
157n/a case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
158n/a *nextTokPtr = ptr;
159n/a return XML_TOK_INVALID;
160n/a }
161n/a /* fall through */
162n/a case BT_S: case BT_CR: case BT_LF:
163n/a *nextTokPtr = ptr;
164n/a return XML_TOK_DECL_OPEN;
165n/a case BT_NMSTRT:
166n/a case BT_HEX:
167n/a ptr += MINBPC(enc);
168n/a break;
169n/a default:
170n/a *nextTokPtr = ptr;
171n/a return XML_TOK_INVALID;
172n/a }
173n/a }
174n/a return XML_TOK_PARTIAL;
175n/a}
176n/a
177n/astatic int PTRCALL
178n/aPREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
179n/a const char *end, int *tokPtr)
180n/a{
181n/a int upper = 0;
182n/a *tokPtr = XML_TOK_PI;
183n/a if (end - ptr != MINBPC(enc)*3)
184n/a return 1;
185n/a switch (BYTE_TO_ASCII(enc, ptr)) {
186n/a case ASCII_x:
187n/a break;
188n/a case ASCII_X:
189n/a upper = 1;
190n/a break;
191n/a default:
192n/a return 1;
193n/a }
194n/a ptr += MINBPC(enc);
195n/a switch (BYTE_TO_ASCII(enc, ptr)) {
196n/a case ASCII_m:
197n/a break;
198n/a case ASCII_M:
199n/a upper = 1;
200n/a break;
201n/a default:
202n/a return 1;
203n/a }
204n/a ptr += MINBPC(enc);
205n/a switch (BYTE_TO_ASCII(enc, ptr)) {
206n/a case ASCII_l:
207n/a break;
208n/a case ASCII_L:
209n/a upper = 1;
210n/a break;
211n/a default:
212n/a return 1;
213n/a }
214n/a if (upper)
215n/a return 0;
216n/a *tokPtr = XML_TOK_XML_DECL;
217n/a return 1;
218n/a}
219n/a
220n/a/* ptr points to character following "<?" */
221n/a
222n/astatic int PTRCALL
223n/aPREFIX(scanPi)(const ENCODING *enc, const char *ptr,
224n/a const char *end, const char **nextTokPtr)
225n/a{
226n/a int tok;
227n/a const char *target = ptr;
228n/a if (ptr == end)
229n/a return XML_TOK_PARTIAL;
230n/a switch (BYTE_TYPE(enc, ptr)) {
231n/a CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
232n/a default:
233n/a *nextTokPtr = ptr;
234n/a return XML_TOK_INVALID;
235n/a }
236n/a while (ptr != end) {
237n/a switch (BYTE_TYPE(enc, ptr)) {
238n/a CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
239n/a case BT_S: case BT_CR: case BT_LF:
240n/a if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
241n/a *nextTokPtr = ptr;
242n/a return XML_TOK_INVALID;
243n/a }
244n/a ptr += MINBPC(enc);
245n/a while (ptr != end) {
246n/a switch (BYTE_TYPE(enc, ptr)) {
247n/a INVALID_CASES(ptr, nextTokPtr)
248n/a case BT_QUEST:
249n/a ptr += MINBPC(enc);
250n/a if (ptr == end)
251n/a return XML_TOK_PARTIAL;
252n/a if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
253n/a *nextTokPtr = ptr + MINBPC(enc);
254n/a return tok;
255n/a }
256n/a break;
257n/a default:
258n/a ptr += MINBPC(enc);
259n/a break;
260n/a }
261n/a }
262n/a return XML_TOK_PARTIAL;
263n/a case BT_QUEST:
264n/a if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
265n/a *nextTokPtr = ptr;
266n/a return XML_TOK_INVALID;
267n/a }
268n/a ptr += MINBPC(enc);
269n/a if (ptr == end)
270n/a return XML_TOK_PARTIAL;
271n/a if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
272n/a *nextTokPtr = ptr + MINBPC(enc);
273n/a return tok;
274n/a }
275n/a /* fall through */
276n/a default:
277n/a *nextTokPtr = ptr;
278n/a return XML_TOK_INVALID;
279n/a }
280n/a }
281n/a return XML_TOK_PARTIAL;
282n/a}
283n/a
284n/astatic int PTRCALL
285n/aPREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
286n/a const char *end, const char **nextTokPtr)
287n/a{
288n/a static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
289n/a ASCII_T, ASCII_A, ASCII_LSQB };
290n/a int i;
291n/a /* CDATA[ */
292n/a if (end - ptr < 6 * MINBPC(enc))
293n/a return XML_TOK_PARTIAL;
294n/a for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
295n/a if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
296n/a *nextTokPtr = ptr;
297n/a return XML_TOK_INVALID;
298n/a }
299n/a }
300n/a *nextTokPtr = ptr;
301n/a return XML_TOK_CDATA_SECT_OPEN;
302n/a}
303n/a
304n/astatic int PTRCALL
305n/aPREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
306n/a const char *end, const char **nextTokPtr)
307n/a{
308n/a if (ptr == end)
309n/a return XML_TOK_NONE;
310n/a if (MINBPC(enc) > 1) {
311n/a size_t n = end - ptr;
312n/a if (n & (MINBPC(enc) - 1)) {
313n/a n &= ~(MINBPC(enc) - 1);
314n/a if (n == 0)
315n/a return XML_TOK_PARTIAL;
316n/a end = ptr + n;
317n/a }
318n/a }
319n/a switch (BYTE_TYPE(enc, ptr)) {
320n/a case BT_RSQB:
321n/a ptr += MINBPC(enc);
322n/a if (ptr == end)
323n/a return XML_TOK_PARTIAL;
324n/a if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
325n/a break;
326n/a ptr += MINBPC(enc);
327n/a if (ptr == end)
328n/a return XML_TOK_PARTIAL;
329n/a if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
330n/a ptr -= MINBPC(enc);
331n/a break;
332n/a }
333n/a *nextTokPtr = ptr + MINBPC(enc);
334n/a return XML_TOK_CDATA_SECT_CLOSE;
335n/a case BT_CR:
336n/a ptr += MINBPC(enc);
337n/a if (ptr == end)
338n/a return XML_TOK_PARTIAL;
339n/a if (BYTE_TYPE(enc, ptr) == BT_LF)
340n/a ptr += MINBPC(enc);
341n/a *nextTokPtr = ptr;
342n/a return XML_TOK_DATA_NEWLINE;
343n/a case BT_LF:
344n/a *nextTokPtr = ptr + MINBPC(enc);
345n/a return XML_TOK_DATA_NEWLINE;
346n/a INVALID_CASES(ptr, nextTokPtr)
347n/a default:
348n/a ptr += MINBPC(enc);
349n/a break;
350n/a }
351n/a while (ptr != end) {
352n/a switch (BYTE_TYPE(enc, ptr)) {
353n/a#define LEAD_CASE(n) \
354n/a case BT_LEAD ## n: \
355n/a if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
356n/a *nextTokPtr = ptr; \
357n/a return XML_TOK_DATA_CHARS; \
358n/a } \
359n/a ptr += n; \
360n/a break;
361n/a LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
362n/a#undef LEAD_CASE
363n/a case BT_NONXML:
364n/a case BT_MALFORM:
365n/a case BT_TRAIL:
366n/a case BT_CR:
367n/a case BT_LF:
368n/a case BT_RSQB:
369n/a *nextTokPtr = ptr;
370n/a return XML_TOK_DATA_CHARS;
371n/a default:
372n/a ptr += MINBPC(enc);
373n/a break;
374n/a }
375n/a }
376n/a *nextTokPtr = ptr;
377n/a return XML_TOK_DATA_CHARS;
378n/a}
379n/a
380n/a/* ptr points to character following "</" */
381n/a
382n/astatic int PTRCALL
383n/aPREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
384n/a const char *end, const char **nextTokPtr)
385n/a{
386n/a if (ptr == end)
387n/a return XML_TOK_PARTIAL;
388n/a switch (BYTE_TYPE(enc, ptr)) {
389n/a CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
390n/a default:
391n/a *nextTokPtr = ptr;
392n/a return XML_TOK_INVALID;
393n/a }
394n/a while (ptr != end) {
395n/a switch (BYTE_TYPE(enc, ptr)) {
396n/a CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
397n/a case BT_S: case BT_CR: case BT_LF:
398n/a for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
399n/a switch (BYTE_TYPE(enc, ptr)) {
400n/a case BT_S: case BT_CR: case BT_LF:
401n/a break;
402n/a case BT_GT:
403n/a *nextTokPtr = ptr + MINBPC(enc);
404n/a return XML_TOK_END_TAG;
405n/a default:
406n/a *nextTokPtr = ptr;
407n/a return XML_TOK_INVALID;
408n/a }
409n/a }
410n/a return XML_TOK_PARTIAL;
411n/a#ifdef XML_NS
412n/a case BT_COLON:
413n/a /* no need to check qname syntax here,
414n/a since end-tag must match exactly */
415n/a ptr += MINBPC(enc);
416n/a break;
417n/a#endif
418n/a case BT_GT:
419n/a *nextTokPtr = ptr + MINBPC(enc);
420n/a return XML_TOK_END_TAG;
421n/a default:
422n/a *nextTokPtr = ptr;
423n/a return XML_TOK_INVALID;
424n/a }
425n/a }
426n/a return XML_TOK_PARTIAL;
427n/a}
428n/a
429n/a/* ptr points to character following "&#X" */
430n/a
431n/astatic int PTRCALL
432n/aPREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
433n/a const char *end, const char **nextTokPtr)
434n/a{
435n/a if (ptr != end) {
436n/a switch (BYTE_TYPE(enc, ptr)) {
437n/a case BT_DIGIT:
438n/a case BT_HEX:
439n/a break;
440n/a default:
441n/a *nextTokPtr = ptr;
442n/a return XML_TOK_INVALID;
443n/a }
444n/a for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
445n/a switch (BYTE_TYPE(enc, ptr)) {
446n/a case BT_DIGIT:
447n/a case BT_HEX:
448n/a break;
449n/a case BT_SEMI:
450n/a *nextTokPtr = ptr + MINBPC(enc);
451n/a return XML_TOK_CHAR_REF;
452n/a default:
453n/a *nextTokPtr = ptr;
454n/a return XML_TOK_INVALID;
455n/a }
456n/a }
457n/a }
458n/a return XML_TOK_PARTIAL;
459n/a}
460n/a
461n/a/* ptr points to character following "&#" */
462n/a
463n/astatic int PTRCALL
464n/aPREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
465n/a const char *end, const char **nextTokPtr)
466n/a{
467n/a if (ptr != end) {
468n/a if (CHAR_MATCHES(enc, ptr, ASCII_x))
469n/a return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
470n/a switch (BYTE_TYPE(enc, ptr)) {
471n/a case BT_DIGIT:
472n/a break;
473n/a default:
474n/a *nextTokPtr = ptr;
475n/a return XML_TOK_INVALID;
476n/a }
477n/a for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
478n/a switch (BYTE_TYPE(enc, ptr)) {
479n/a case BT_DIGIT:
480n/a break;
481n/a case BT_SEMI:
482n/a *nextTokPtr = ptr + MINBPC(enc);
483n/a return XML_TOK_CHAR_REF;
484n/a default:
485n/a *nextTokPtr = ptr;
486n/a return XML_TOK_INVALID;
487n/a }
488n/a }
489n/a }
490n/a return XML_TOK_PARTIAL;
491n/a}
492n/a
493n/a/* ptr points to character following "&" */
494n/a
495n/astatic int PTRCALL
496n/aPREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
497n/a const char **nextTokPtr)
498n/a{
499n/a if (ptr == end)
500n/a return XML_TOK_PARTIAL;
501n/a switch (BYTE_TYPE(enc, ptr)) {
502n/a CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
503n/a case BT_NUM:
504n/a return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
505n/a default:
506n/a *nextTokPtr = ptr;
507n/a return XML_TOK_INVALID;
508n/a }
509n/a while (ptr != end) {
510n/a switch (BYTE_TYPE(enc, ptr)) {
511n/a CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
512n/a case BT_SEMI:
513n/a *nextTokPtr = ptr + MINBPC(enc);
514n/a return XML_TOK_ENTITY_REF;
515n/a default:
516n/a *nextTokPtr = ptr;
517n/a return XML_TOK_INVALID;
518n/a }
519n/a }
520n/a return XML_TOK_PARTIAL;
521n/a}
522n/a
523n/a/* ptr points to character following first character of attribute name */
524n/a
525n/astatic int PTRCALL
526n/aPREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
527n/a const char **nextTokPtr)
528n/a{
529n/a#ifdef XML_NS
530n/a int hadColon = 0;
531n/a#endif
532n/a while (ptr != end) {
533n/a switch (BYTE_TYPE(enc, ptr)) {
534n/a CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
535n/a#ifdef XML_NS
536n/a case BT_COLON:
537n/a if (hadColon) {
538n/a *nextTokPtr = ptr;
539n/a return XML_TOK_INVALID;
540n/a }
541n/a hadColon = 1;
542n/a ptr += MINBPC(enc);
543n/a if (ptr == end)
544n/a return XML_TOK_PARTIAL;
545n/a switch (BYTE_TYPE(enc, ptr)) {
546n/a CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
547n/a default:
548n/a *nextTokPtr = ptr;
549n/a return XML_TOK_INVALID;
550n/a }
551n/a break;
552n/a#endif
553n/a case BT_S: case BT_CR: case BT_LF:
554n/a for (;;) {
555n/a int t;
556n/a
557n/a ptr += MINBPC(enc);
558n/a if (ptr == end)
559n/a return XML_TOK_PARTIAL;
560n/a t = BYTE_TYPE(enc, ptr);
561n/a if (t == BT_EQUALS)
562n/a break;
563n/a switch (t) {
564n/a case BT_S:
565n/a case BT_LF:
566n/a case BT_CR:
567n/a break;
568n/a default:
569n/a *nextTokPtr = ptr;
570n/a return XML_TOK_INVALID;
571n/a }
572n/a }
573n/a /* fall through */
574n/a case BT_EQUALS:
575n/a {
576n/a int open;
577n/a#ifdef XML_NS
578n/a hadColon = 0;
579n/a#endif
580n/a for (;;) {
581n/a ptr += MINBPC(enc);
582n/a if (ptr == end)
583n/a return XML_TOK_PARTIAL;
584n/a open = BYTE_TYPE(enc, ptr);
585n/a if (open == BT_QUOT || open == BT_APOS)
586n/a break;
587n/a switch (open) {
588n/a case BT_S:
589n/a case BT_LF:
590n/a case BT_CR:
591n/a break;
592n/a default:
593n/a *nextTokPtr = ptr;
594n/a return XML_TOK_INVALID;
595n/a }
596n/a }
597n/a ptr += MINBPC(enc);
598n/a /* in attribute value */
599n/a for (;;) {
600n/a int t;
601n/a if (ptr == end)
602n/a return XML_TOK_PARTIAL;
603n/a t = BYTE_TYPE(enc, ptr);
604n/a if (t == open)
605n/a break;
606n/a switch (t) {
607n/a INVALID_CASES(ptr, nextTokPtr)
608n/a case BT_AMP:
609n/a {
610n/a int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
611n/a if (tok <= 0) {
612n/a if (tok == XML_TOK_INVALID)
613n/a *nextTokPtr = ptr;
614n/a return tok;
615n/a }
616n/a break;
617n/a }
618n/a case BT_LT:
619n/a *nextTokPtr = ptr;
620n/a return XML_TOK_INVALID;
621n/a default:
622n/a ptr += MINBPC(enc);
623n/a break;
624n/a }
625n/a }
626n/a ptr += MINBPC(enc);
627n/a if (ptr == end)
628n/a return XML_TOK_PARTIAL;
629n/a switch (BYTE_TYPE(enc, ptr)) {
630n/a case BT_S:
631n/a case BT_CR:
632n/a case BT_LF:
633n/a break;
634n/a case BT_SOL:
635n/a goto sol;
636n/a case BT_GT:
637n/a goto gt;
638n/a default:
639n/a *nextTokPtr = ptr;
640n/a return XML_TOK_INVALID;
641n/a }
642n/a /* ptr points to closing quote */
643n/a for (;;) {
644n/a ptr += MINBPC(enc);
645n/a if (ptr == end)
646n/a return XML_TOK_PARTIAL;
647n/a switch (BYTE_TYPE(enc, ptr)) {
648n/a CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
649n/a case BT_S: case BT_CR: case BT_LF:
650n/a continue;
651n/a case BT_GT:
652n/a gt:
653n/a *nextTokPtr = ptr + MINBPC(enc);
654n/a return XML_TOK_START_TAG_WITH_ATTS;
655n/a case BT_SOL:
656n/a sol:
657n/a ptr += MINBPC(enc);
658n/a if (ptr == end)
659n/a return XML_TOK_PARTIAL;
660n/a if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
661n/a *nextTokPtr = ptr;
662n/a return XML_TOK_INVALID;
663n/a }
664n/a *nextTokPtr = ptr + MINBPC(enc);
665n/a return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
666n/a default:
667n/a *nextTokPtr = ptr;
668n/a return XML_TOK_INVALID;
669n/a }
670n/a break;
671n/a }
672n/a break;
673n/a }
674n/a default:
675n/a *nextTokPtr = ptr;
676n/a return XML_TOK_INVALID;
677n/a }
678n/a }
679n/a return XML_TOK_PARTIAL;
680n/a}
681n/a
682n/a/* ptr points to character following "<" */
683n/a
684n/astatic int PTRCALL
685n/aPREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
686n/a const char **nextTokPtr)
687n/a{
688n/a#ifdef XML_NS
689n/a int hadColon;
690n/a#endif
691n/a if (ptr == end)
692n/a return XML_TOK_PARTIAL;
693n/a switch (BYTE_TYPE(enc, ptr)) {
694n/a CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
695n/a case BT_EXCL:
696n/a if ((ptr += MINBPC(enc)) == end)
697n/a return XML_TOK_PARTIAL;
698n/a switch (BYTE_TYPE(enc, ptr)) {
699n/a case BT_MINUS:
700n/a return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
701n/a case BT_LSQB:
702n/a return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
703n/a end, nextTokPtr);
704n/a }
705n/a *nextTokPtr = ptr;
706n/a return XML_TOK_INVALID;
707n/a case BT_QUEST:
708n/a return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
709n/a case BT_SOL:
710n/a return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
711n/a default:
712n/a *nextTokPtr = ptr;
713n/a return XML_TOK_INVALID;
714n/a }
715n/a#ifdef XML_NS
716n/a hadColon = 0;
717n/a#endif
718n/a /* we have a start-tag */
719n/a while (ptr != end) {
720n/a switch (BYTE_TYPE(enc, ptr)) {
721n/a CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
722n/a#ifdef XML_NS
723n/a case BT_COLON:
724n/a if (hadColon) {
725n/a *nextTokPtr = ptr;
726n/a return XML_TOK_INVALID;
727n/a }
728n/a hadColon = 1;
729n/a ptr += MINBPC(enc);
730n/a if (ptr == end)
731n/a return XML_TOK_PARTIAL;
732n/a switch (BYTE_TYPE(enc, ptr)) {
733n/a CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
734n/a default:
735n/a *nextTokPtr = ptr;
736n/a return XML_TOK_INVALID;
737n/a }
738n/a break;
739n/a#endif
740n/a case BT_S: case BT_CR: case BT_LF:
741n/a {
742n/a ptr += MINBPC(enc);
743n/a while (ptr != end) {
744n/a switch (BYTE_TYPE(enc, ptr)) {
745n/a CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
746n/a case BT_GT:
747n/a goto gt;
748n/a case BT_SOL:
749n/a goto sol;
750n/a case BT_S: case BT_CR: case BT_LF:
751n/a ptr += MINBPC(enc);
752n/a continue;
753n/a default:
754n/a *nextTokPtr = ptr;
755n/a return XML_TOK_INVALID;
756n/a }
757n/a return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
758n/a }
759n/a return XML_TOK_PARTIAL;
760n/a }
761n/a case BT_GT:
762n/a gt:
763n/a *nextTokPtr = ptr + MINBPC(enc);
764n/a return XML_TOK_START_TAG_NO_ATTS;
765n/a case BT_SOL:
766n/a sol:
767n/a ptr += MINBPC(enc);
768n/a if (ptr == end)
769n/a return XML_TOK_PARTIAL;
770n/a if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
771n/a *nextTokPtr = ptr;
772n/a return XML_TOK_INVALID;
773n/a }
774n/a *nextTokPtr = ptr + MINBPC(enc);
775n/a return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
776n/a default:
777n/a *nextTokPtr = ptr;
778n/a return XML_TOK_INVALID;
779n/a }
780n/a }
781n/a return XML_TOK_PARTIAL;
782n/a}
783n/a
784n/astatic int PTRCALL
785n/aPREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
786n/a const char **nextTokPtr)
787n/a{
788n/a if (ptr == end)
789n/a return XML_TOK_NONE;
790n/a if (MINBPC(enc) > 1) {
791n/a size_t n = end - ptr;
792n/a if (n & (MINBPC(enc) - 1)) {
793n/a n &= ~(MINBPC(enc) - 1);
794n/a if (n == 0)
795n/a return XML_TOK_PARTIAL;
796n/a end = ptr + n;
797n/a }
798n/a }
799n/a switch (BYTE_TYPE(enc, ptr)) {
800n/a case BT_LT:
801n/a return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
802n/a case BT_AMP:
803n/a return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
804n/a case BT_CR:
805n/a ptr += MINBPC(enc);
806n/a if (ptr == end)
807n/a return XML_TOK_TRAILING_CR;
808n/a if (BYTE_TYPE(enc, ptr) == BT_LF)
809n/a ptr += MINBPC(enc);
810n/a *nextTokPtr = ptr;
811n/a return XML_TOK_DATA_NEWLINE;
812n/a case BT_LF:
813n/a *nextTokPtr = ptr + MINBPC(enc);
814n/a return XML_TOK_DATA_NEWLINE;
815n/a case BT_RSQB:
816n/a ptr += MINBPC(enc);
817n/a if (ptr == end)
818n/a return XML_TOK_TRAILING_RSQB;
819n/a if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
820n/a break;
821n/a ptr += MINBPC(enc);
822n/a if (ptr == end)
823n/a return XML_TOK_TRAILING_RSQB;
824n/a if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
825n/a ptr -= MINBPC(enc);
826n/a break;
827n/a }
828n/a *nextTokPtr = ptr;
829n/a return XML_TOK_INVALID;
830n/a INVALID_CASES(ptr, nextTokPtr)
831n/a default:
832n/a ptr += MINBPC(enc);
833n/a break;
834n/a }
835n/a while (ptr != end) {
836n/a switch (BYTE_TYPE(enc, ptr)) {
837n/a#define LEAD_CASE(n) \
838n/a case BT_LEAD ## n: \
839n/a if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
840n/a *nextTokPtr = ptr; \
841n/a return XML_TOK_DATA_CHARS; \
842n/a } \
843n/a ptr += n; \
844n/a break;
845n/a LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
846n/a#undef LEAD_CASE
847n/a case BT_RSQB:
848n/a if (ptr + MINBPC(enc) != end) {
849n/a if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
850n/a ptr += MINBPC(enc);
851n/a break;
852n/a }
853n/a if (ptr + 2*MINBPC(enc) != end) {
854n/a if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
855n/a ptr += MINBPC(enc);
856n/a break;
857n/a }
858n/a *nextTokPtr = ptr + 2*MINBPC(enc);
859n/a return XML_TOK_INVALID;
860n/a }
861n/a }
862n/a /* fall through */
863n/a case BT_AMP:
864n/a case BT_LT:
865n/a case BT_NONXML:
866n/a case BT_MALFORM:
867n/a case BT_TRAIL:
868n/a case BT_CR:
869n/a case BT_LF:
870n/a *nextTokPtr = ptr;
871n/a return XML_TOK_DATA_CHARS;
872n/a default:
873n/a ptr += MINBPC(enc);
874n/a break;
875n/a }
876n/a }
877n/a *nextTokPtr = ptr;
878n/a return XML_TOK_DATA_CHARS;
879n/a}
880n/a
881n/a/* ptr points to character following "%" */
882n/a
883n/astatic int PTRCALL
884n/aPREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
885n/a const char **nextTokPtr)
886n/a{
887n/a if (ptr == end)
888n/a return XML_TOK_PARTIAL;
889n/a switch (BYTE_TYPE(enc, ptr)) {
890n/a CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
891n/a case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
892n/a *nextTokPtr = ptr;
893n/a return XML_TOK_PERCENT;
894n/a default:
895n/a *nextTokPtr = ptr;
896n/a return XML_TOK_INVALID;
897n/a }
898n/a while (ptr != end) {
899n/a switch (BYTE_TYPE(enc, ptr)) {
900n/a CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
901n/a case BT_SEMI:
902n/a *nextTokPtr = ptr + MINBPC(enc);
903n/a return XML_TOK_PARAM_ENTITY_REF;
904n/a default:
905n/a *nextTokPtr = ptr;
906n/a return XML_TOK_INVALID;
907n/a }
908n/a }
909n/a return XML_TOK_PARTIAL;
910n/a}
911n/a
912n/astatic int PTRCALL
913n/aPREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
914n/a const char **nextTokPtr)
915n/a{
916n/a if (ptr == end)
917n/a return XML_TOK_PARTIAL;
918n/a switch (BYTE_TYPE(enc, ptr)) {
919n/a CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
920n/a default:
921n/a *nextTokPtr = ptr;
922n/a return XML_TOK_INVALID;
923n/a }
924n/a while (ptr != end) {
925n/a switch (BYTE_TYPE(enc, ptr)) {
926n/a CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
927n/a case BT_CR: case BT_LF: case BT_S:
928n/a case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
929n/a *nextTokPtr = ptr;
930n/a return XML_TOK_POUND_NAME;
931n/a default:
932n/a *nextTokPtr = ptr;
933n/a return XML_TOK_INVALID;
934n/a }
935n/a }
936n/a return -XML_TOK_POUND_NAME;
937n/a}
938n/a
939n/astatic int PTRCALL
940n/aPREFIX(scanLit)(int open, const ENCODING *enc,
941n/a const char *ptr, const char *end,
942n/a const char **nextTokPtr)
943n/a{
944n/a while (ptr != end) {
945n/a int t = BYTE_TYPE(enc, ptr);
946n/a switch (t) {
947n/a INVALID_CASES(ptr, nextTokPtr)
948n/a case BT_QUOT:
949n/a case BT_APOS:
950n/a ptr += MINBPC(enc);
951n/a if (t != open)
952n/a break;
953n/a if (ptr == end)
954n/a return -XML_TOK_LITERAL;
955n/a *nextTokPtr = ptr;
956n/a switch (BYTE_TYPE(enc, ptr)) {
957n/a case BT_S: case BT_CR: case BT_LF:
958n/a case BT_GT: case BT_PERCNT: case BT_LSQB:
959n/a return XML_TOK_LITERAL;
960n/a default:
961n/a return XML_TOK_INVALID;
962n/a }
963n/a default:
964n/a ptr += MINBPC(enc);
965n/a break;
966n/a }
967n/a }
968n/a return XML_TOK_PARTIAL;
969n/a}
970n/a
971n/astatic int PTRCALL
972n/aPREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
973n/a const char **nextTokPtr)
974n/a{
975n/a int tok;
976n/a if (ptr == end)
977n/a return XML_TOK_NONE;
978n/a if (MINBPC(enc) > 1) {
979n/a size_t n = end - ptr;
980n/a if (n & (MINBPC(enc) - 1)) {
981n/a n &= ~(MINBPC(enc) - 1);
982n/a if (n == 0)
983n/a return XML_TOK_PARTIAL;
984n/a end = ptr + n;
985n/a }
986n/a }
987n/a switch (BYTE_TYPE(enc, ptr)) {
988n/a case BT_QUOT:
989n/a return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
990n/a case BT_APOS:
991n/a return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
992n/a case BT_LT:
993n/a {
994n/a ptr += MINBPC(enc);
995n/a if (ptr == end)
996n/a return XML_TOK_PARTIAL;
997n/a switch (BYTE_TYPE(enc, ptr)) {
998n/a case BT_EXCL:
999n/a return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1000n/a case BT_QUEST:
1001n/a return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1002n/a case BT_NMSTRT:
1003n/a case BT_HEX:
1004n/a case BT_NONASCII:
1005n/a case BT_LEAD2:
1006n/a case BT_LEAD3:
1007n/a case BT_LEAD4:
1008n/a *nextTokPtr = ptr - MINBPC(enc);
1009n/a return XML_TOK_INSTANCE_START;
1010n/a }
1011n/a *nextTokPtr = ptr;
1012n/a return XML_TOK_INVALID;
1013n/a }
1014n/a case BT_CR:
1015n/a if (ptr + MINBPC(enc) == end) {
1016n/a *nextTokPtr = end;
1017n/a /* indicate that this might be part of a CR/LF pair */
1018n/a return -XML_TOK_PROLOG_S;
1019n/a }
1020n/a /* fall through */
1021n/a case BT_S: case BT_LF:
1022n/a for (;;) {
1023n/a ptr += MINBPC(enc);
1024n/a if (ptr == end)
1025n/a break;
1026n/a switch (BYTE_TYPE(enc, ptr)) {
1027n/a case BT_S: case BT_LF:
1028n/a break;
1029n/a case BT_CR:
1030n/a /* don't split CR/LF pair */
1031n/a if (ptr + MINBPC(enc) != end)
1032n/a break;
1033n/a /* fall through */
1034n/a default:
1035n/a *nextTokPtr = ptr;
1036n/a return XML_TOK_PROLOG_S;
1037n/a }
1038n/a }
1039n/a *nextTokPtr = ptr;
1040n/a return XML_TOK_PROLOG_S;
1041n/a case BT_PERCNT:
1042n/a return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1043n/a case BT_COMMA:
1044n/a *nextTokPtr = ptr + MINBPC(enc);
1045n/a return XML_TOK_COMMA;
1046n/a case BT_LSQB:
1047n/a *nextTokPtr = ptr + MINBPC(enc);
1048n/a return XML_TOK_OPEN_BRACKET;
1049n/a case BT_RSQB:
1050n/a ptr += MINBPC(enc);
1051n/a if (ptr == end)
1052n/a return -XML_TOK_CLOSE_BRACKET;
1053n/a if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1054n/a if (ptr + MINBPC(enc) == end)
1055n/a return XML_TOK_PARTIAL;
1056n/a if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1057n/a *nextTokPtr = ptr + 2*MINBPC(enc);
1058n/a return XML_TOK_COND_SECT_CLOSE;
1059n/a }
1060n/a }
1061n/a *nextTokPtr = ptr;
1062n/a return XML_TOK_CLOSE_BRACKET;
1063n/a case BT_LPAR:
1064n/a *nextTokPtr = ptr + MINBPC(enc);
1065n/a return XML_TOK_OPEN_PAREN;
1066n/a case BT_RPAR:
1067n/a ptr += MINBPC(enc);
1068n/a if (ptr == end)
1069n/a return -XML_TOK_CLOSE_PAREN;
1070n/a switch (BYTE_TYPE(enc, ptr)) {
1071n/a case BT_AST:
1072n/a *nextTokPtr = ptr + MINBPC(enc);
1073n/a return XML_TOK_CLOSE_PAREN_ASTERISK;
1074n/a case BT_QUEST:
1075n/a *nextTokPtr = ptr + MINBPC(enc);
1076n/a return XML_TOK_CLOSE_PAREN_QUESTION;
1077n/a case BT_PLUS:
1078n/a *nextTokPtr = ptr + MINBPC(enc);
1079n/a return XML_TOK_CLOSE_PAREN_PLUS;
1080n/a case BT_CR: case BT_LF: case BT_S:
1081n/a case BT_GT: case BT_COMMA: case BT_VERBAR:
1082n/a case BT_RPAR:
1083n/a *nextTokPtr = ptr;
1084n/a return XML_TOK_CLOSE_PAREN;
1085n/a }
1086n/a *nextTokPtr = ptr;
1087n/a return XML_TOK_INVALID;
1088n/a case BT_VERBAR:
1089n/a *nextTokPtr = ptr + MINBPC(enc);
1090n/a return XML_TOK_OR;
1091n/a case BT_GT:
1092n/a *nextTokPtr = ptr + MINBPC(enc);
1093n/a return XML_TOK_DECL_CLOSE;
1094n/a case BT_NUM:
1095n/a return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1096n/a#define LEAD_CASE(n) \
1097n/a case BT_LEAD ## n: \
1098n/a if (end - ptr < n) \
1099n/a return XML_TOK_PARTIAL_CHAR; \
1100n/a if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1101n/a ptr += n; \
1102n/a tok = XML_TOK_NAME; \
1103n/a break; \
1104n/a } \
1105n/a if (IS_NAME_CHAR(enc, ptr, n)) { \
1106n/a ptr += n; \
1107n/a tok = XML_TOK_NMTOKEN; \
1108n/a break; \
1109n/a } \
1110n/a *nextTokPtr = ptr; \
1111n/a return XML_TOK_INVALID;
1112n/a LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1113n/a#undef LEAD_CASE
1114n/a case BT_NMSTRT:
1115n/a case BT_HEX:
1116n/a tok = XML_TOK_NAME;
1117n/a ptr += MINBPC(enc);
1118n/a break;
1119n/a case BT_DIGIT:
1120n/a case BT_NAME:
1121n/a case BT_MINUS:
1122n/a#ifdef XML_NS
1123n/a case BT_COLON:
1124n/a#endif
1125n/a tok = XML_TOK_NMTOKEN;
1126n/a ptr += MINBPC(enc);
1127n/a break;
1128n/a case BT_NONASCII:
1129n/a if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1130n/a ptr += MINBPC(enc);
1131n/a tok = XML_TOK_NAME;
1132n/a break;
1133n/a }
1134n/a if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1135n/a ptr += MINBPC(enc);
1136n/a tok = XML_TOK_NMTOKEN;
1137n/a break;
1138n/a }
1139n/a /* fall through */
1140n/a default:
1141n/a *nextTokPtr = ptr;
1142n/a return XML_TOK_INVALID;
1143n/a }
1144n/a while (ptr != end) {
1145n/a switch (BYTE_TYPE(enc, ptr)) {
1146n/a CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1147n/a case BT_GT: case BT_RPAR: case BT_COMMA:
1148n/a case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1149n/a case BT_S: case BT_CR: case BT_LF:
1150n/a *nextTokPtr = ptr;
1151n/a return tok;
1152n/a#ifdef XML_NS
1153n/a case BT_COLON:
1154n/a ptr += MINBPC(enc);
1155n/a switch (tok) {
1156n/a case XML_TOK_NAME:
1157n/a if (ptr == end)
1158n/a return XML_TOK_PARTIAL;
1159n/a tok = XML_TOK_PREFIXED_NAME;
1160n/a switch (BYTE_TYPE(enc, ptr)) {
1161n/a CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1162n/a default:
1163n/a tok = XML_TOK_NMTOKEN;
1164n/a break;
1165n/a }
1166n/a break;
1167n/a case XML_TOK_PREFIXED_NAME:
1168n/a tok = XML_TOK_NMTOKEN;
1169n/a break;
1170n/a }
1171n/a break;
1172n/a#endif
1173n/a case BT_PLUS:
1174n/a if (tok == XML_TOK_NMTOKEN) {
1175n/a *nextTokPtr = ptr;
1176n/a return XML_TOK_INVALID;
1177n/a }
1178n/a *nextTokPtr = ptr + MINBPC(enc);
1179n/a return XML_TOK_NAME_PLUS;
1180n/a case BT_AST:
1181n/a if (tok == XML_TOK_NMTOKEN) {
1182n/a *nextTokPtr = ptr;
1183n/a return XML_TOK_INVALID;
1184n/a }
1185n/a *nextTokPtr = ptr + MINBPC(enc);
1186n/a return XML_TOK_NAME_ASTERISK;
1187n/a case BT_QUEST:
1188n/a if (tok == XML_TOK_NMTOKEN) {
1189n/a *nextTokPtr = ptr;
1190n/a return XML_TOK_INVALID;
1191n/a }
1192n/a *nextTokPtr = ptr + MINBPC(enc);
1193n/a return XML_TOK_NAME_QUESTION;
1194n/a default:
1195n/a *nextTokPtr = ptr;
1196n/a return XML_TOK_INVALID;
1197n/a }
1198n/a }
1199n/a return -tok;
1200n/a}
1201n/a
1202n/astatic int PTRCALL
1203n/aPREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
1204n/a const char *end, const char **nextTokPtr)
1205n/a{
1206n/a const char *start;
1207n/a if (ptr == end)
1208n/a return XML_TOK_NONE;
1209n/a start = ptr;
1210n/a while (ptr != end) {
1211n/a switch (BYTE_TYPE(enc, ptr)) {
1212n/a#define LEAD_CASE(n) \
1213n/a case BT_LEAD ## n: ptr += n; break;
1214n/a LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1215n/a#undef LEAD_CASE
1216n/a case BT_AMP:
1217n/a if (ptr == start)
1218n/a return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1219n/a *nextTokPtr = ptr;
1220n/a return XML_TOK_DATA_CHARS;
1221n/a case BT_LT:
1222n/a /* this is for inside entity references */
1223n/a *nextTokPtr = ptr;
1224n/a return XML_TOK_INVALID;
1225n/a case BT_LF:
1226n/a if (ptr == start) {
1227n/a *nextTokPtr = ptr + MINBPC(enc);
1228n/a return XML_TOK_DATA_NEWLINE;
1229n/a }
1230n/a *nextTokPtr = ptr;
1231n/a return XML_TOK_DATA_CHARS;
1232n/a case BT_CR:
1233n/a if (ptr == start) {
1234n/a ptr += MINBPC(enc);
1235n/a if (ptr == end)
1236n/a return XML_TOK_TRAILING_CR;
1237n/a if (BYTE_TYPE(enc, ptr) == BT_LF)
1238n/a ptr += MINBPC(enc);
1239n/a *nextTokPtr = ptr;
1240n/a return XML_TOK_DATA_NEWLINE;
1241n/a }
1242n/a *nextTokPtr = ptr;
1243n/a return XML_TOK_DATA_CHARS;
1244n/a case BT_S:
1245n/a if (ptr == start) {
1246n/a *nextTokPtr = ptr + MINBPC(enc);
1247n/a return XML_TOK_ATTRIBUTE_VALUE_S;
1248n/a }
1249n/a *nextTokPtr = ptr;
1250n/a return XML_TOK_DATA_CHARS;
1251n/a default:
1252n/a ptr += MINBPC(enc);
1253n/a break;
1254n/a }
1255n/a }
1256n/a *nextTokPtr = ptr;
1257n/a return XML_TOK_DATA_CHARS;
1258n/a}
1259n/a
1260n/astatic int PTRCALL
1261n/aPREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
1262n/a const char *end, const char **nextTokPtr)
1263n/a{
1264n/a const char *start;
1265n/a if (ptr == end)
1266n/a return XML_TOK_NONE;
1267n/a start = ptr;
1268n/a while (ptr != end) {
1269n/a switch (BYTE_TYPE(enc, ptr)) {
1270n/a#define LEAD_CASE(n) \
1271n/a case BT_LEAD ## n: ptr += n; break;
1272n/a LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1273n/a#undef LEAD_CASE
1274n/a case BT_AMP:
1275n/a if (ptr == start)
1276n/a return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1277n/a *nextTokPtr = ptr;
1278n/a return XML_TOK_DATA_CHARS;
1279n/a case BT_PERCNT:
1280n/a if (ptr == start) {
1281n/a int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1282n/a end, nextTokPtr);
1283n/a return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1284n/a }
1285n/a *nextTokPtr = ptr;
1286n/a return XML_TOK_DATA_CHARS;
1287n/a case BT_LF:
1288n/a if (ptr == start) {
1289n/a *nextTokPtr = ptr + MINBPC(enc);
1290n/a return XML_TOK_DATA_NEWLINE;
1291n/a }
1292n/a *nextTokPtr = ptr;
1293n/a return XML_TOK_DATA_CHARS;
1294n/a case BT_CR:
1295n/a if (ptr == start) {
1296n/a ptr += MINBPC(enc);
1297n/a if (ptr == end)
1298n/a return XML_TOK_TRAILING_CR;
1299n/a if (BYTE_TYPE(enc, ptr) == BT_LF)
1300n/a ptr += MINBPC(enc);
1301n/a *nextTokPtr = ptr;
1302n/a return XML_TOK_DATA_NEWLINE;
1303n/a }
1304n/a *nextTokPtr = ptr;
1305n/a return XML_TOK_DATA_CHARS;
1306n/a default:
1307n/a ptr += MINBPC(enc);
1308n/a break;
1309n/a }
1310n/a }
1311n/a *nextTokPtr = ptr;
1312n/a return XML_TOK_DATA_CHARS;
1313n/a}
1314n/a
1315n/a#ifdef XML_DTD
1316n/a
1317n/astatic int PTRCALL
1318n/aPREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
1319n/a const char *end, const char **nextTokPtr)
1320n/a{
1321n/a int level = 0;
1322n/a if (MINBPC(enc) > 1) {
1323n/a size_t n = end - ptr;
1324n/a if (n & (MINBPC(enc) - 1)) {
1325n/a n &= ~(MINBPC(enc) - 1);
1326n/a end = ptr + n;
1327n/a }
1328n/a }
1329n/a while (ptr != end) {
1330n/a switch (BYTE_TYPE(enc, ptr)) {
1331n/a INVALID_CASES(ptr, nextTokPtr)
1332n/a case BT_LT:
1333n/a if ((ptr += MINBPC(enc)) == end)
1334n/a return XML_TOK_PARTIAL;
1335n/a if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1336n/a if ((ptr += MINBPC(enc)) == end)
1337n/a return XML_TOK_PARTIAL;
1338n/a if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1339n/a ++level;
1340n/a ptr += MINBPC(enc);
1341n/a }
1342n/a }
1343n/a break;
1344n/a case BT_RSQB:
1345n/a if ((ptr += MINBPC(enc)) == end)
1346n/a return XML_TOK_PARTIAL;
1347n/a if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1348n/a if ((ptr += MINBPC(enc)) == end)
1349n/a return XML_TOK_PARTIAL;
1350n/a if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1351n/a ptr += MINBPC(enc);
1352n/a if (level == 0) {
1353n/a *nextTokPtr = ptr;
1354n/a return XML_TOK_IGNORE_SECT;
1355n/a }
1356n/a --level;
1357n/a }
1358n/a }
1359n/a break;
1360n/a default:
1361n/a ptr += MINBPC(enc);
1362n/a break;
1363n/a }
1364n/a }
1365n/a return XML_TOK_PARTIAL;
1366n/a}
1367n/a
1368n/a#endif /* XML_DTD */
1369n/a
1370n/astatic int PTRCALL
1371n/aPREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1372n/a const char **badPtr)
1373n/a{
1374n/a ptr += MINBPC(enc);
1375n/a end -= MINBPC(enc);
1376n/a for (; ptr != end; ptr += MINBPC(enc)) {
1377n/a switch (BYTE_TYPE(enc, ptr)) {
1378n/a case BT_DIGIT:
1379n/a case BT_HEX:
1380n/a case BT_MINUS:
1381n/a case BT_APOS:
1382n/a case BT_LPAR:
1383n/a case BT_RPAR:
1384n/a case BT_PLUS:
1385n/a case BT_COMMA:
1386n/a case BT_SOL:
1387n/a case BT_EQUALS:
1388n/a case BT_QUEST:
1389n/a case BT_CR:
1390n/a case BT_LF:
1391n/a case BT_SEMI:
1392n/a case BT_EXCL:
1393n/a case BT_AST:
1394n/a case BT_PERCNT:
1395n/a case BT_NUM:
1396n/a#ifdef XML_NS
1397n/a case BT_COLON:
1398n/a#endif
1399n/a break;
1400n/a case BT_S:
1401n/a if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1402n/a *badPtr = ptr;
1403n/a return 0;
1404n/a }
1405n/a break;
1406n/a case BT_NAME:
1407n/a case BT_NMSTRT:
1408n/a if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1409n/a break;
1410n/a default:
1411n/a switch (BYTE_TO_ASCII(enc, ptr)) {
1412n/a case 0x24: /* $ */
1413n/a case 0x40: /* @ */
1414n/a break;
1415n/a default:
1416n/a *badPtr = ptr;
1417n/a return 0;
1418n/a }
1419n/a break;
1420n/a }
1421n/a }
1422n/a return 1;
1423n/a}
1424n/a
1425n/a/* This must only be called for a well-formed start-tag or empty
1426n/a element tag. Returns the number of attributes. Pointers to the
1427n/a first attsMax attributes are stored in atts.
1428n/a*/
1429n/a
1430n/astatic int PTRCALL
1431n/aPREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1432n/a int attsMax, ATTRIBUTE *atts)
1433n/a{
1434n/a enum { other, inName, inValue } state = inName;
1435n/a int nAtts = 0;
1436n/a int open = 0; /* defined when state == inValue;
1437n/a initialization just to shut up compilers */
1438n/a
1439n/a for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1440n/a switch (BYTE_TYPE(enc, ptr)) {
1441n/a#define START_NAME \
1442n/a if (state == other) { \
1443n/a if (nAtts < attsMax) { \
1444n/a atts[nAtts].name = ptr; \
1445n/a atts[nAtts].normalized = 1; \
1446n/a } \
1447n/a state = inName; \
1448n/a }
1449n/a#define LEAD_CASE(n) \
1450n/a case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1451n/a LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1452n/a#undef LEAD_CASE
1453n/a case BT_NONASCII:
1454n/a case BT_NMSTRT:
1455n/a case BT_HEX:
1456n/a START_NAME
1457n/a break;
1458n/a#undef START_NAME
1459n/a case BT_QUOT:
1460n/a if (state != inValue) {
1461n/a if (nAtts < attsMax)
1462n/a atts[nAtts].valuePtr = ptr + MINBPC(enc);
1463n/a state = inValue;
1464n/a open = BT_QUOT;
1465n/a }
1466n/a else if (open == BT_QUOT) {
1467n/a state = other;
1468n/a if (nAtts < attsMax)
1469n/a atts[nAtts].valueEnd = ptr;
1470n/a nAtts++;
1471n/a }
1472n/a break;
1473n/a case BT_APOS:
1474n/a if (state != inValue) {
1475n/a if (nAtts < attsMax)
1476n/a atts[nAtts].valuePtr = ptr + MINBPC(enc);
1477n/a state = inValue;
1478n/a open = BT_APOS;
1479n/a }
1480n/a else if (open == BT_APOS) {
1481n/a state = other;
1482n/a if (nAtts < attsMax)
1483n/a atts[nAtts].valueEnd = ptr;
1484n/a nAtts++;
1485n/a }
1486n/a break;
1487n/a case BT_AMP:
1488n/a if (nAtts < attsMax)
1489n/a atts[nAtts].normalized = 0;
1490n/a break;
1491n/a case BT_S:
1492n/a if (state == inName)
1493n/a state = other;
1494n/a else if (state == inValue
1495n/a && nAtts < attsMax
1496n/a && atts[nAtts].normalized
1497n/a && (ptr == atts[nAtts].valuePtr
1498n/a || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1499n/a || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1500n/a || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1501n/a atts[nAtts].normalized = 0;
1502n/a break;
1503n/a case BT_CR: case BT_LF:
1504n/a /* This case ensures that the first attribute name is counted
1505n/a Apart from that we could just change state on the quote. */
1506n/a if (state == inName)
1507n/a state = other;
1508n/a else if (state == inValue && nAtts < attsMax)
1509n/a atts[nAtts].normalized = 0;
1510n/a break;
1511n/a case BT_GT:
1512n/a case BT_SOL:
1513n/a if (state != inValue)
1514n/a return nAtts;
1515n/a break;
1516n/a default:
1517n/a break;
1518n/a }
1519n/a }
1520n/a /* not reached */
1521n/a}
1522n/a
1523n/astatic int PTRFASTCALL
1524n/aPREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
1525n/a{
1526n/a int result = 0;
1527n/a /* skip &# */
1528n/a ptr += 2*MINBPC(enc);
1529n/a if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1530n/a for (ptr += MINBPC(enc);
1531n/a !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1532n/a ptr += MINBPC(enc)) {
1533n/a int c = BYTE_TO_ASCII(enc, ptr);
1534n/a switch (c) {
1535n/a case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1536n/a case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1537n/a result <<= 4;
1538n/a result |= (c - ASCII_0);
1539n/a break;
1540n/a case ASCII_A: case ASCII_B: case ASCII_C:
1541n/a case ASCII_D: case ASCII_E: case ASCII_F:
1542n/a result <<= 4;
1543n/a result += 10 + (c - ASCII_A);
1544n/a break;
1545n/a case ASCII_a: case ASCII_b: case ASCII_c:
1546n/a case ASCII_d: case ASCII_e: case ASCII_f:
1547n/a result <<= 4;
1548n/a result += 10 + (c - ASCII_a);
1549n/a break;
1550n/a }
1551n/a if (result >= 0x110000)
1552n/a return -1;
1553n/a }
1554n/a }
1555n/a else {
1556n/a for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1557n/a int c = BYTE_TO_ASCII(enc, ptr);
1558n/a result *= 10;
1559n/a result += (c - ASCII_0);
1560n/a if (result >= 0x110000)
1561n/a return -1;
1562n/a }
1563n/a }
1564n/a return checkCharRefNumber(result);
1565n/a}
1566n/a
1567n/astatic int PTRCALL
1568n/aPREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
1569n/a const char *end)
1570n/a{
1571n/a switch ((end - ptr)/MINBPC(enc)) {
1572n/a case 2:
1573n/a if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1574n/a switch (BYTE_TO_ASCII(enc, ptr)) {
1575n/a case ASCII_l:
1576n/a return ASCII_LT;
1577n/a case ASCII_g:
1578n/a return ASCII_GT;
1579n/a }
1580n/a }
1581n/a break;
1582n/a case 3:
1583n/a if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1584n/a ptr += MINBPC(enc);
1585n/a if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1586n/a ptr += MINBPC(enc);
1587n/a if (CHAR_MATCHES(enc, ptr, ASCII_p))
1588n/a return ASCII_AMP;
1589n/a }
1590n/a }
1591n/a break;
1592n/a case 4:
1593n/a switch (BYTE_TO_ASCII(enc, ptr)) {
1594n/a case ASCII_q:
1595n/a ptr += MINBPC(enc);
1596n/a if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1597n/a ptr += MINBPC(enc);
1598n/a if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1599n/a ptr += MINBPC(enc);
1600n/a if (CHAR_MATCHES(enc, ptr, ASCII_t))
1601n/a return ASCII_QUOT;
1602n/a }
1603n/a }
1604n/a break;
1605n/a case ASCII_a:
1606n/a ptr += MINBPC(enc);
1607n/a if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1608n/a ptr += MINBPC(enc);
1609n/a if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1610n/a ptr += MINBPC(enc);
1611n/a if (CHAR_MATCHES(enc, ptr, ASCII_s))
1612n/a return ASCII_APOS;
1613n/a }
1614n/a }
1615n/a break;
1616n/a }
1617n/a }
1618n/a return 0;
1619n/a}
1620n/a
1621n/astatic int PTRCALL
1622n/aPREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1623n/a{
1624n/a for (;;) {
1625n/a switch (BYTE_TYPE(enc, ptr1)) {
1626n/a#define LEAD_CASE(n) \
1627n/a case BT_LEAD ## n: \
1628n/a if (*ptr1++ != *ptr2++) \
1629n/a return 0;
1630n/a LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1631n/a#undef LEAD_CASE
1632n/a /* fall through */
1633n/a if (*ptr1++ != *ptr2++)
1634n/a return 0;
1635n/a break;
1636n/a case BT_NONASCII:
1637n/a case BT_NMSTRT:
1638n/a#ifdef XML_NS
1639n/a case BT_COLON:
1640n/a#endif
1641n/a case BT_HEX:
1642n/a case BT_DIGIT:
1643n/a case BT_NAME:
1644n/a case BT_MINUS:
1645n/a if (*ptr2++ != *ptr1++)
1646n/a return 0;
1647n/a if (MINBPC(enc) > 1) {
1648n/a if (*ptr2++ != *ptr1++)
1649n/a return 0;
1650n/a if (MINBPC(enc) > 2) {
1651n/a if (*ptr2++ != *ptr1++)
1652n/a return 0;
1653n/a if (MINBPC(enc) > 3) {
1654n/a if (*ptr2++ != *ptr1++)
1655n/a return 0;
1656n/a }
1657n/a }
1658n/a }
1659n/a break;
1660n/a default:
1661n/a if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1662n/a return 1;
1663n/a switch (BYTE_TYPE(enc, ptr2)) {
1664n/a case BT_LEAD2:
1665n/a case BT_LEAD3:
1666n/a case BT_LEAD4:
1667n/a case BT_NONASCII:
1668n/a case BT_NMSTRT:
1669n/a#ifdef XML_NS
1670n/a case BT_COLON:
1671n/a#endif
1672n/a case BT_HEX:
1673n/a case BT_DIGIT:
1674n/a case BT_NAME:
1675n/a case BT_MINUS:
1676n/a return 0;
1677n/a default:
1678n/a return 1;
1679n/a }
1680n/a }
1681n/a }
1682n/a /* not reached */
1683n/a}
1684n/a
1685n/astatic int PTRCALL
1686n/aPREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1687n/a const char *end1, const char *ptr2)
1688n/a{
1689n/a for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1690n/a if (ptr1 == end1)
1691n/a return 0;
1692n/a if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1693n/a return 0;
1694n/a }
1695n/a return ptr1 == end1;
1696n/a}
1697n/a
1698n/astatic int PTRFASTCALL
1699n/aPREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1700n/a{
1701n/a const char *start = ptr;
1702n/a for (;;) {
1703n/a switch (BYTE_TYPE(enc, ptr)) {
1704n/a#define LEAD_CASE(n) \
1705n/a case BT_LEAD ## n: ptr += n; break;
1706n/a LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1707n/a#undef LEAD_CASE
1708n/a case BT_NONASCII:
1709n/a case BT_NMSTRT:
1710n/a#ifdef XML_NS
1711n/a case BT_COLON:
1712n/a#endif
1713n/a case BT_HEX:
1714n/a case BT_DIGIT:
1715n/a case BT_NAME:
1716n/a case BT_MINUS:
1717n/a ptr += MINBPC(enc);
1718n/a break;
1719n/a default:
1720n/a return (int)(ptr - start);
1721n/a }
1722n/a }
1723n/a}
1724n/a
1725n/astatic const char * PTRFASTCALL
1726n/aPREFIX(skipS)(const ENCODING *enc, const char *ptr)
1727n/a{
1728n/a for (;;) {
1729n/a switch (BYTE_TYPE(enc, ptr)) {
1730n/a case BT_LF:
1731n/a case BT_CR:
1732n/a case BT_S:
1733n/a ptr += MINBPC(enc);
1734n/a break;
1735n/a default:
1736n/a return ptr;
1737n/a }
1738n/a }
1739n/a}
1740n/a
1741n/astatic void PTRCALL
1742n/aPREFIX(updatePosition)(const ENCODING *enc,
1743n/a const char *ptr,
1744n/a const char *end,
1745n/a POSITION *pos)
1746n/a{
1747n/a while (ptr < end) {
1748n/a switch (BYTE_TYPE(enc, ptr)) {
1749n/a#define LEAD_CASE(n) \
1750n/a case BT_LEAD ## n: \
1751n/a ptr += n; \
1752n/a break;
1753n/a LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1754n/a#undef LEAD_CASE
1755n/a case BT_LF:
1756n/a pos->columnNumber = (XML_Size)-1;
1757n/a pos->lineNumber++;
1758n/a ptr += MINBPC(enc);
1759n/a break;
1760n/a case BT_CR:
1761n/a pos->lineNumber++;
1762n/a ptr += MINBPC(enc);
1763n/a if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1764n/a ptr += MINBPC(enc);
1765n/a pos->columnNumber = (XML_Size)-1;
1766n/a break;
1767n/a default:
1768n/a ptr += MINBPC(enc);
1769n/a break;
1770n/a }
1771n/a pos->columnNumber++;
1772n/a }
1773n/a}
1774n/a
1775n/a#undef DO_LEAD_CASE
1776n/a#undef MULTIBYTE_CASES
1777n/a#undef INVALID_CASES
1778n/a#undef CHECK_NAME_CASE
1779n/a#undef CHECK_NAME_CASES
1780n/a#undef CHECK_NMSTRT_CASE
1781n/a#undef CHECK_NMSTRT_CASES
1782n/a
1783n/a#endif /* XML_TOK_IMPL_C */