| 1 | 1 | import pprint |
|---|
| 2 | 1 | import re |
|---|
| 3 | 1 | import unittest |
|---|
| 4 | 1 | from test import test_support |
|---|
| 5 | 1 | sgmllib = test_support.import_module('sgmllib', deprecated=True) |
|---|
| 6 | n/a | |
|---|
| 7 | n/a | |
|---|
| 8 | 2 | class EventCollector(sgmllib.SGMLParser): |
|---|
| 9 | n/a | |
|---|
| 10 | 1 | def __init__(self): |
|---|
| 11 | 31 | self.events = [] |
|---|
| 12 | 31 | self.append = self.events.append |
|---|
| 13 | 31 | sgmllib.SGMLParser.__init__(self) |
|---|
| 14 | n/a | |
|---|
| 15 | 1 | def get_events(self): |
|---|
| 16 | n/a | # Normalize the list of events so that buffer artefacts don't |
|---|
| 17 | n/a | # separate runs of contiguous characters. |
|---|
| 18 | 30 | L = [] |
|---|
| 19 | 30 | prevtype = None |
|---|
| 20 | 241 | for event in self.events: |
|---|
| 21 | 211 | type = event[0] |
|---|
| 22 | 211 | if type == prevtype == "data": |
|---|
| 23 | 151 | L[-1] = ("data", L[-1][1] + event[1]) |
|---|
| 24 | n/a | else: |
|---|
| 25 | 60 | L.append(event) |
|---|
| 26 | 211 | prevtype = type |
|---|
| 27 | 30 | self.events = L |
|---|
| 28 | 30 | return L |
|---|
| 29 | n/a | |
|---|
| 30 | n/a | # structure markup |
|---|
| 31 | n/a | |
|---|
| 32 | 1 | def unknown_starttag(self, tag, attrs): |
|---|
| 33 | 26 | self.append(("starttag", tag, attrs)) |
|---|
| 34 | n/a | |
|---|
| 35 | 1 | def unknown_endtag(self, tag): |
|---|
| 36 | 9 | self.append(("endtag", tag)) |
|---|
| 37 | n/a | |
|---|
| 38 | n/a | # all other markup |
|---|
| 39 | n/a | |
|---|
| 40 | 1 | def handle_comment(self, data): |
|---|
| 41 | 1 | self.append(("comment", data)) |
|---|
| 42 | n/a | |
|---|
| 43 | 1 | def handle_charref(self, data): |
|---|
| 44 | 0 | self.append(("charref", data)) |
|---|
| 45 | n/a | |
|---|
| 46 | 1 | def handle_data(self, data): |
|---|
| 47 | 161 | self.append(("data", data)) |
|---|
| 48 | n/a | |
|---|
| 49 | 1 | def handle_decl(self, decl): |
|---|
| 50 | 3 | self.append(("decl", decl)) |
|---|
| 51 | n/a | |
|---|
| 52 | 1 | def handle_entityref(self, data): |
|---|
| 53 | 0 | self.append(("entityref", data)) |
|---|
| 54 | n/a | |
|---|
| 55 | 1 | def handle_pi(self, data): |
|---|
| 56 | 1 | self.append(("pi", data)) |
|---|
| 57 | n/a | |
|---|
| 58 | 1 | def unknown_decl(self, decl): |
|---|
| 59 | 1 | self.append(("unknown decl", decl)) |
|---|
| 60 | n/a | |
|---|
| 61 | n/a | |
|---|
| 62 | 2 | class CDATAEventCollector(EventCollector): |
|---|
| 63 | 1 | def start_cdata(self, attrs): |
|---|
| 64 | 2 | self.append(("starttag", "cdata", attrs)) |
|---|
| 65 | 2 | self.setliteral() |
|---|
| 66 | n/a | |
|---|
| 67 | n/a | |
|---|
| 68 | 2 | class HTMLEntityCollector(EventCollector): |
|---|
| 69 | n/a | |
|---|
| 70 | 1 | entity_or_charref = re.compile('(?:&([a-zA-Z][-.a-zA-Z0-9]*)' |
|---|
| 71 | n/a | '|&#(x[0-9a-zA-Z]+|[0-9]+))(;?)') |
|---|
| 72 | n/a | |
|---|
| 73 | 1 | def convert_charref(self, name): |
|---|
| 74 | 2 | self.append(("charref", "convert", name)) |
|---|
| 75 | 2 | if name[0] != "x": |
|---|
| 76 | 1 | return EventCollector.convert_charref(self, name) |
|---|
| 77 | n/a | |
|---|
| 78 | 1 | def convert_codepoint(self, codepoint): |
|---|
| 79 | 1 | self.append(("codepoint", "convert", codepoint)) |
|---|
| 80 | 1 | EventCollector.convert_codepoint(self, codepoint) |
|---|
| 81 | n/a | |
|---|
| 82 | 1 | def convert_entityref(self, name): |
|---|
| 83 | 2 | self.append(("entityref", "convert", name)) |
|---|
| 84 | 2 | return EventCollector.convert_entityref(self, name) |
|---|
| 85 | n/a | |
|---|
| 86 | n/a | # These to record that they were called, then pass the call along |
|---|
| 87 | n/a | # to the default implementation so that it's actions can be |
|---|
| 88 | n/a | # recorded. |
|---|
| 89 | n/a | |
|---|
| 90 | 1 | def handle_charref(self, data): |
|---|
| 91 | 1 | self.append(("charref", data)) |
|---|
| 92 | 1 | sgmllib.SGMLParser.handle_charref(self, data) |
|---|
| 93 | n/a | |
|---|
| 94 | 1 | def handle_entityref(self, data): |
|---|
| 95 | 1 | self.append(("entityref", data)) |
|---|
| 96 | 1 | sgmllib.SGMLParser.handle_entityref(self, data) |
|---|
| 97 | n/a | |
|---|
| 98 | n/a | |
|---|
| 99 | 2 | class SGMLParserTestCase(unittest.TestCase): |
|---|
| 100 | n/a | |
|---|
| 101 | 1 | collector = EventCollector |
|---|
| 102 | n/a | |
|---|
| 103 | 1 | def get_events(self, source): |
|---|
| 104 | 30 | parser = self.collector() |
|---|
| 105 | 30 | try: |
|---|
| 106 | 1211 | for s in source: |
|---|
| 107 | 1181 | parser.feed(s) |
|---|
| 108 | 30 | parser.close() |
|---|
| 109 | 0 | except: |
|---|
| 110 | n/a | #self.events = parser.events |
|---|
| 111 | 0 | raise |
|---|
| 112 | 30 | return parser.get_events() |
|---|
| 113 | n/a | |
|---|
| 114 | 1 | def check_events(self, source, expected_events): |
|---|
| 115 | 30 | try: |
|---|
| 116 | 30 | events = self.get_events(source) |
|---|
| 117 | 0 | except: |
|---|
| 118 | n/a | #import sys |
|---|
| 119 | n/a | #print >>sys.stderr, pprint.pformat(self.events) |
|---|
| 120 | 0 | raise |
|---|
| 121 | 30 | if events != expected_events: |
|---|
| 122 | 0 | self.fail("received events did not match expected events\n" |
|---|
| 123 | n/a | "Expected:\n" + pprint.pformat(expected_events) + |
|---|
| 124 | 0 | "\nReceived:\n" + pprint.pformat(events)) |
|---|
| 125 | n/a | |
|---|
| 126 | 1 | def check_parse_error(self, source): |
|---|
| 127 | 1 | parser = EventCollector() |
|---|
| 128 | 1 | try: |
|---|
| 129 | 1 | parser.feed(source) |
|---|
| 130 | 0 | parser.close() |
|---|
| 131 | 1 | except sgmllib.SGMLParseError: |
|---|
| 132 | 1 | pass |
|---|
| 133 | n/a | else: |
|---|
| 134 | 0 | self.fail("expected SGMLParseError for %r\nReceived:\n%s" |
|---|
| 135 | 0 | % (source, pprint.pformat(parser.get_events()))) |
|---|
| 136 | n/a | |
|---|
| 137 | 1 | def test_doctype_decl_internal(self): |
|---|
| 138 | n/a | inside = """\ |
|---|
| 139 | n/a | DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN' |
|---|
| 140 | n/a | SYSTEM 'http://www.w3.org/TR/html401/strict.dtd' [ |
|---|
| 141 | n/a | <!ELEMENT html - O EMPTY> |
|---|
| 142 | n/a | <!ATTLIST html |
|---|
| 143 | n/a | version CDATA #IMPLIED |
|---|
| 144 | n/a | profile CDATA 'DublinCore'> |
|---|
| 145 | n/a | <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'> |
|---|
| 146 | n/a | <!ENTITY myEntity 'internal parsed entity'> |
|---|
| 147 | n/a | <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'> |
|---|
| 148 | n/a | <!ENTITY % paramEntity 'name|name|name'> |
|---|
| 149 | n/a | %paramEntity; |
|---|
| 150 | n/a | <!-- comment --> |
|---|
| 151 | 1 | ]""" |
|---|
| 152 | 1 | self.check_events(["<!%s>" % inside], [ |
|---|
| 153 | 1 | ("decl", inside), |
|---|
| 154 | n/a | ]) |
|---|
| 155 | n/a | |
|---|
| 156 | 1 | def test_doctype_decl_external(self): |
|---|
| 157 | 1 | inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'" |
|---|
| 158 | 1 | self.check_events("<!%s>" % inside, [ |
|---|
| 159 | 1 | ("decl", inside), |
|---|
| 160 | n/a | ]) |
|---|
| 161 | n/a | |
|---|
| 162 | 1 | def test_underscore_in_attrname(self): |
|---|
| 163 | n/a | # SF bug #436621 |
|---|
| 164 | n/a | """Make sure attribute names with underscores are accepted""" |
|---|
| 165 | 1 | self.check_events("<a has_under _under>", [ |
|---|
| 166 | 1 | ("starttag", "a", [("has_under", "has_under"), |
|---|
| 167 | 1 | ("_under", "_under")]), |
|---|
| 168 | n/a | ]) |
|---|
| 169 | n/a | |
|---|
| 170 | 1 | def test_underscore_in_tagname(self): |
|---|
| 171 | n/a | # SF bug #436621 |
|---|
| 172 | n/a | """Make sure tag names with underscores are accepted""" |
|---|
| 173 | 1 | self.check_events("<has_under></has_under>", [ |
|---|
| 174 | 1 | ("starttag", "has_under", []), |
|---|
| 175 | 1 | ("endtag", "has_under"), |
|---|
| 176 | n/a | ]) |
|---|
| 177 | n/a | |
|---|
| 178 | 1 | def test_quotes_in_unquoted_attrs(self): |
|---|
| 179 | n/a | # SF bug #436621 |
|---|
| 180 | n/a | """Be sure quotes in unquoted attributes are made part of the value""" |
|---|
| 181 | 1 | self.check_events("<a href=foo'bar\"baz>", [ |
|---|
| 182 | 1 | ("starttag", "a", [("href", "foo'bar\"baz")]), |
|---|
| 183 | n/a | ]) |
|---|
| 184 | n/a | |
|---|
| 185 | 1 | def test_xhtml_empty_tag(self): |
|---|
| 186 | n/a | """Handling of XHTML-style empty start tags""" |
|---|
| 187 | 1 | self.check_events("<br />text<i></i>", [ |
|---|
| 188 | 1 | ("starttag", "br", []), |
|---|
| 189 | 1 | ("data", "text"), |
|---|
| 190 | 1 | ("starttag", "i", []), |
|---|
| 191 | 1 | ("endtag", "i"), |
|---|
| 192 | n/a | ]) |
|---|
| 193 | n/a | |
|---|
| 194 | 1 | def test_processing_instruction_only(self): |
|---|
| 195 | 1 | self.check_events("<?processing instruction>", [ |
|---|
| 196 | 1 | ("pi", "processing instruction"), |
|---|
| 197 | n/a | ]) |
|---|
| 198 | n/a | |
|---|
| 199 | 1 | def test_bad_nesting(self): |
|---|
| 200 | 1 | self.check_events("<a><b></a></b>", [ |
|---|
| 201 | 1 | ("starttag", "a", []), |
|---|
| 202 | 1 | ("starttag", "b", []), |
|---|
| 203 | 1 | ("endtag", "a"), |
|---|
| 204 | 1 | ("endtag", "b"), |
|---|
| 205 | n/a | ]) |
|---|
| 206 | n/a | |
|---|
| 207 | 1 | def test_bare_ampersands(self): |
|---|
| 208 | 1 | self.check_events("this text & contains & ampersands &", [ |
|---|
| 209 | 1 | ("data", "this text & contains & ampersands &"), |
|---|
| 210 | n/a | ]) |
|---|
| 211 | n/a | |
|---|
| 212 | 1 | def test_bare_pointy_brackets(self): |
|---|
| 213 | 1 | self.check_events("this < text > contains < bare>pointy< brackets", [ |
|---|
| 214 | 1 | ("data", "this < text > contains < bare>pointy< brackets"), |
|---|
| 215 | n/a | ]) |
|---|
| 216 | n/a | |
|---|
| 217 | 1 | def test_attr_syntax(self): |
|---|
| 218 | n/a | output = [ |
|---|
| 219 | 1 | ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")]) |
|---|
| 220 | n/a | ] |
|---|
| 221 | 1 | self.check_events("""<a b='v' c="v" d=v e>""", output) |
|---|
| 222 | 1 | self.check_events("""<a b = 'v' c = "v" d = v e>""", output) |
|---|
| 223 | 1 | self.check_events("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output) |
|---|
| 224 | 1 | self.check_events("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output) |
|---|
| 225 | n/a | |
|---|
| 226 | 1 | def test_attr_values(self): |
|---|
| 227 | 1 | self.check_events("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""", |
|---|
| 228 | 1 | [("starttag", "a", [("b", "xxx\n\txxx"), |
|---|
| 229 | 1 | ("c", "yyy\t\nyyy"), |
|---|
| 230 | 1 | ("d", "\txyz\n")]) |
|---|
| 231 | n/a | ]) |
|---|
| 232 | 1 | self.check_events("""<a b='' c="">""", [ |
|---|
| 233 | 1 | ("starttag", "a", [("b", ""), ("c", "")]), |
|---|
| 234 | n/a | ]) |
|---|
| 235 | n/a | # URL construction stuff from RFC 1808: |
|---|
| 236 | 1 | safe = "$-_.+" |
|---|
| 237 | 1 | extra = "!*'()," |
|---|
| 238 | 1 | reserved = ";/?:@&=" |
|---|
| 239 | 1 | url = "http://example.com:8080/path/to/file?%s%s%s" % ( |
|---|
| 240 | 1 | safe, extra, reserved) |
|---|
| 241 | 1 | self.check_events("""<e a=%s>""" % url, [ |
|---|
| 242 | 1 | ("starttag", "e", [("a", url)]), |
|---|
| 243 | n/a | ]) |
|---|
| 244 | n/a | # Regression test for SF patch #669683. |
|---|
| 245 | 1 | self.check_events("<e a=rgb(1,2,3)>", [ |
|---|
| 246 | 1 | ("starttag", "e", [("a", "rgb(1,2,3)")]), |
|---|
| 247 | n/a | ]) |
|---|
| 248 | n/a | |
|---|
| 249 | 1 | def test_attr_values_entities(self): |
|---|
| 250 | n/a | """Substitution of entities and charrefs in attribute values""" |
|---|
| 251 | n/a | # SF bug #1452246 |
|---|
| 252 | 1 | self.check_events("""<a b=< c=<> d=<-> e='< ' |
|---|
| 253 | n/a | f="&xxx;" g=' !' h='Ǵ' |
|---|
| 254 | n/a | i='x?a=b&c=d;' |
|---|
| 255 | 1 | j='&#42;' k='&#42;'>""", |
|---|
| 256 | 1 | [("starttag", "a", [("b", "<"), |
|---|
| 257 | 1 | ("c", "<>"), |
|---|
| 258 | 1 | ("d", "<->"), |
|---|
| 259 | 1 | ("e", "< "), |
|---|
| 260 | 1 | ("f", "&xxx;"), |
|---|
| 261 | 1 | ("g", " !"), |
|---|
| 262 | 1 | ("h", "Ǵ"), |
|---|
| 263 | 1 | ("i", "x?a=b&c=d;"), |
|---|
| 264 | 1 | ("j", "*"), |
|---|
| 265 | 1 | ("k", "*"), |
|---|
| 266 | n/a | ])]) |
|---|
| 267 | n/a | |
|---|
| 268 | 1 | def test_convert_overrides(self): |
|---|
| 269 | n/a | # This checks that the character and entity reference |
|---|
| 270 | n/a | # conversion helpers are called at the documented times. No |
|---|
| 271 | n/a | # attempt is made to really change what the parser accepts. |
|---|
| 272 | n/a | # |
|---|
| 273 | 1 | self.collector = HTMLEntityCollector |
|---|
| 274 | 1 | self.check_events(('<a title="“test”">foo</a>' |
|---|
| 275 | n/a | '&foobar;*'), [ |
|---|
| 276 | 1 | ('entityref', 'convert', 'ldquo'), |
|---|
| 277 | 1 | ('charref', 'convert', 'x201d'), |
|---|
| 278 | 1 | ('starttag', 'a', [('title', '“test”')]), |
|---|
| 279 | 1 | ('data', 'foo'), |
|---|
| 280 | 1 | ('endtag', 'a'), |
|---|
| 281 | 1 | ('entityref', 'foobar'), |
|---|
| 282 | 1 | ('entityref', 'convert', 'foobar'), |
|---|
| 283 | 1 | ('charref', '42'), |
|---|
| 284 | 1 | ('charref', 'convert', '42'), |
|---|
| 285 | 1 | ('codepoint', 'convert', 42), |
|---|
| 286 | n/a | ]) |
|---|
| 287 | n/a | |
|---|
| 288 | 1 | def test_attr_funky_names(self): |
|---|
| 289 | 1 | self.check_events("""<a a.b='v' c:d=v e-f=v>""", [ |
|---|
| 290 | 1 | ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]), |
|---|
| 291 | n/a | ]) |
|---|
| 292 | n/a | |
|---|
| 293 | 1 | def test_attr_value_ip6_url(self): |
|---|
| 294 | n/a | # http://www.python.org/sf/853506 |
|---|
| 295 | 1 | self.check_events(("<a href='http://[1080::8:800:200C:417A]/'>" |
|---|
| 296 | n/a | "<a href=http://[1080::8:800:200C:417A]/>"), [ |
|---|
| 297 | 1 | ("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]), |
|---|
| 298 | 1 | ("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]), |
|---|
| 299 | n/a | ]) |
|---|
| 300 | n/a | |
|---|
| 301 | 1 | def test_weird_starttags(self): |
|---|
| 302 | 1 | self.check_events("<a<a>", [ |
|---|
| 303 | 1 | ("starttag", "a", []), |
|---|
| 304 | 1 | ("starttag", "a", []), |
|---|
| 305 | n/a | ]) |
|---|
| 306 | 1 | self.check_events("</a<a>", [ |
|---|
| 307 | 1 | ("endtag", "a"), |
|---|
| 308 | 1 | ("starttag", "a", []), |
|---|
| 309 | n/a | ]) |
|---|
| 310 | n/a | |
|---|
| 311 | 1 | def test_declaration_junk_chars(self): |
|---|
| 312 | 1 | self.check_parse_error("<!DOCTYPE foo $ >") |
|---|
| 313 | n/a | |
|---|
| 314 | 1 | def test_get_starttag_text(self): |
|---|
| 315 | 1 | s = """<foobar \n one="1"\ttwo=2 >""" |
|---|
| 316 | 1 | self.check_events(s, [ |
|---|
| 317 | 1 | ("starttag", "foobar", [("one", "1"), ("two", "2")]), |
|---|
| 318 | n/a | ]) |
|---|
| 319 | n/a | |
|---|
| 320 | 1 | def test_cdata_content(self): |
|---|
| 321 | 1 | s = ("<cdata> <!-- not a comment --> ¬-an-entity-ref; </cdata>" |
|---|
| 322 | n/a | "<notcdata> <!-- comment --> </notcdata>") |
|---|
| 323 | 1 | self.collector = CDATAEventCollector |
|---|
| 324 | 1 | self.check_events(s, [ |
|---|
| 325 | 1 | ("starttag", "cdata", []), |
|---|
| 326 | 1 | ("data", " <!-- not a comment --> ¬-an-entity-ref; "), |
|---|
| 327 | 1 | ("endtag", "cdata"), |
|---|
| 328 | 1 | ("starttag", "notcdata", []), |
|---|
| 329 | 1 | ("data", " "), |
|---|
| 330 | 1 | ("comment", " comment "), |
|---|
| 331 | 1 | ("data", " "), |
|---|
| 332 | 1 | ("endtag", "notcdata"), |
|---|
| 333 | n/a | ]) |
|---|
| 334 | 1 | s = """<cdata> <not a='start tag'> </cdata>""" |
|---|
| 335 | 1 | self.check_events(s, [ |
|---|
| 336 | 1 | ("starttag", "cdata", []), |
|---|
| 337 | 1 | ("data", " <not a='start tag'> "), |
|---|
| 338 | 1 | ("endtag", "cdata"), |
|---|
| 339 | n/a | ]) |
|---|
| 340 | n/a | |
|---|
| 341 | 1 | def test_illegal_declarations(self): |
|---|
| 342 | 1 | s = 'abc<!spacer type="block" height="25">def' |
|---|
| 343 | 1 | self.check_events(s, [ |
|---|
| 344 | 1 | ("data", "abc"), |
|---|
| 345 | 1 | ("unknown decl", 'spacer type="block" height="25"'), |
|---|
| 346 | 1 | ("data", "def"), |
|---|
| 347 | n/a | ]) |
|---|
| 348 | n/a | |
|---|
| 349 | 1 | def test_enumerated_attr_type(self): |
|---|
| 350 | 1 | s = "<!DOCTYPE doc [<!ATTLIST doc attr (a | b) >]>" |
|---|
| 351 | 1 | self.check_events(s, [ |
|---|
| 352 | 1 | ('decl', 'DOCTYPE doc [<!ATTLIST doc attr (a | b) >]'), |
|---|
| 353 | n/a | ]) |
|---|
| 354 | n/a | |
|---|
| 355 | 1 | def test_read_chunks(self): |
|---|
| 356 | n/a | # SF bug #1541697, this caused sgml parser to hang |
|---|
| 357 | n/a | # Just verify this code doesn't cause a hang. |
|---|
| 358 | 1 | CHUNK = 1024 # increasing this to 8212 makes the problem go away |
|---|
| 359 | n/a | |
|---|
| 360 | 1 | f = open(test_support.findfile('sgml_input.html')) |
|---|
| 361 | 1 | fp = sgmllib.SGMLParser() |
|---|
| 362 | 1 | while 1: |
|---|
| 363 | 9 | data = f.read(CHUNK) |
|---|
| 364 | 9 | fp.feed(data) |
|---|
| 365 | 9 | if len(data) != CHUNK: |
|---|
| 366 | 1 | break |
|---|
| 367 | n/a | |
|---|
| 368 | 1 | def test_only_decode_ascii(self): |
|---|
| 369 | n/a | # SF bug #1651995, make sure non-ascii character references are not decoded |
|---|
| 370 | 1 | s = '<signs exclamation="!" copyright="©" quoteleft="‘">' |
|---|
| 371 | 1 | self.check_events(s, [ |
|---|
| 372 | 1 | ('starttag', 'signs', |
|---|
| 373 | 1 | [('exclamation', '!'), ('copyright', '©'), |
|---|
| 374 | 1 | ('quoteleft', '‘')]), |
|---|
| 375 | n/a | ]) |
|---|
| 376 | n/a | |
|---|
| 377 | n/a | # XXX These tests have been disabled by prefixing their names with |
|---|
| 378 | n/a | # an underscore. The first two exercise outstanding bugs in the |
|---|
| 379 | n/a | # sgmllib module, and the third exhibits questionable behavior |
|---|
| 380 | n/a | # that needs to be carefully considered before changing it. |
|---|
| 381 | n/a | |
|---|
| 382 | 1 | def _test_starttag_end_boundary(self): |
|---|
| 383 | 0 | self.check_events("<a b='<'>", [("starttag", "a", [("b", "<")])]) |
|---|
| 384 | 0 | self.check_events("<a b='>'>", [("starttag", "a", [("b", ">")])]) |
|---|
| 385 | n/a | |
|---|
| 386 | 1 | def _test_buffer_artefacts(self): |
|---|
| 387 | 0 | output = [("starttag", "a", [("b", "<")])] |
|---|
| 388 | 0 | self.check_events(["<a b='<'>"], output) |
|---|
| 389 | 0 | self.check_events(["<a ", "b='<'>"], output) |
|---|
| 390 | 0 | self.check_events(["<a b", "='<'>"], output) |
|---|
| 391 | 0 | self.check_events(["<a b=", "'<'>"], output) |
|---|
| 392 | 0 | self.check_events(["<a b='<", "'>"], output) |
|---|
| 393 | 0 | self.check_events(["<a b='<'", ">"], output) |
|---|
| 394 | n/a | |
|---|
| 395 | 0 | output = [("starttag", "a", [("b", ">")])] |
|---|
| 396 | 0 | self.check_events(["<a b='>'>"], output) |
|---|
| 397 | 0 | self.check_events(["<a ", "b='>'>"], output) |
|---|
| 398 | 0 | self.check_events(["<a b", "='>'>"], output) |
|---|
| 399 | 0 | self.check_events(["<a b=", "'>'>"], output) |
|---|
| 400 | 0 | self.check_events(["<a b='>", "'>"], output) |
|---|
| 401 | 0 | self.check_events(["<a b='>'", ">"], output) |
|---|
| 402 | n/a | |
|---|
| 403 | 0 | output = [("comment", "abc")] |
|---|
| 404 | 0 | self.check_events(["", "<!--abc-->"], output) |
|---|
| 405 | 0 | self.check_events(["<", "!--abc-->"], output) |
|---|
| 406 | 0 | self.check_events(["<!", "--abc-->"], output) |
|---|
| 407 | 0 | self.check_events(["<!-", "-abc-->"], output) |
|---|
| 408 | 0 | self.check_events(["<!--", "abc-->"], output) |
|---|
| 409 | 0 | self.check_events(["<!--a", "bc-->"], output) |
|---|
| 410 | 0 | self.check_events(["<!--ab", "c-->"], output) |
|---|
| 411 | 0 | self.check_events(["<!--abc", "-->"], output) |
|---|
| 412 | 0 | self.check_events(["<!--abc-", "->"], output) |
|---|
| 413 | 0 | self.check_events(["<!--abc--", ">"], output) |
|---|
| 414 | 0 | self.check_events(["<!--abc-->", ""], output) |
|---|
| 415 | n/a | |
|---|
| 416 | 1 | def _test_starttag_junk_chars(self): |
|---|
| 417 | 0 | self.check_parse_error("<") |
|---|
| 418 | 0 | self.check_parse_error("<>") |
|---|
| 419 | 0 | self.check_parse_error("</$>") |
|---|
| 420 | 0 | self.check_parse_error("</") |
|---|
| 421 | 0 | self.check_parse_error("</a") |
|---|
| 422 | 0 | self.check_parse_error("<$") |
|---|
| 423 | 0 | self.check_parse_error("<$>") |
|---|
| 424 | 0 | self.check_parse_error("<!") |
|---|
| 425 | 0 | self.check_parse_error("<a $>") |
|---|
| 426 | 0 | self.check_parse_error("<a") |
|---|
| 427 | 0 | self.check_parse_error("<a foo='bar'") |
|---|
| 428 | 0 | self.check_parse_error("<a foo='bar") |
|---|
| 429 | 0 | self.check_parse_error("<a foo='>'") |
|---|
| 430 | 0 | self.check_parse_error("<a foo='>") |
|---|
| 431 | 0 | self.check_parse_error("<a foo=>") |
|---|
| 432 | n/a | |
|---|
| 433 | n/a | |
|---|
| 434 | 1 | def test_main(): |
|---|
| 435 | 1 | test_support.run_unittest(SGMLParserTestCase) |
|---|
| 436 | n/a | |
|---|
| 437 | n/a | |
|---|
| 438 | 1 | if __name__ == "__main__": |
|---|
| 439 | 0 | test_main() |
|---|