From 777c1f248777f5e25716b86c24c89bd715ca5a5d Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 18:05:31 +0200 Subject: [PATCH 01/30] Break cyclic dependency 2f61c94 introduced a cyclic dependency between saxtree & htmlparser. --- src/nu/validator/saxtree/DocumentFragment.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nu/validator/saxtree/DocumentFragment.java b/src/nu/validator/saxtree/DocumentFragment.java index a9e2db84..9f496bf3 100644 --- a/src/nu/validator/saxtree/DocumentFragment.java +++ b/src/nu/validator/saxtree/DocumentFragment.java @@ -23,7 +23,7 @@ package nu.validator.saxtree; -import nu.validator.htmlparser.impl.LocatorImpl; +import org.xml.sax.ext.Locator2Impl; /** * A document fragment. @@ -37,7 +37,7 @@ public final class DocumentFragment extends ParentNode { * The constructor. */ public DocumentFragment() { - super(new LocatorImpl()); + super(new Locator2Impl()); } /** From 52edf7099ced25e1a58909d3d8c19960a2251ba6 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 18:45:40 +0200 Subject: [PATCH 02/30] Remove unnecessary superinterface Locator 2f61c94 merely appended Locator2, without removing its now-unnecessary superinterface. --- src/nu/validator/htmlparser/impl/LocatorImpl.java | 2 +- src/nu/validator/htmlparser/impl/Tokenizer.java | 3 +-- src/nu/validator/htmlparser/io/HtmlInputStreamReader.java | 4 +--- src/nu/validator/htmlparser/io/MetaSniffer.java | 2 +- src/nu/validator/saxtree/LocatorImpl.java | 2 +- src/nu/validator/saxtree/Node.java | 2 +- src/nu/validator/saxtree/TreeParser.java | 2 +- 7 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/nu/validator/htmlparser/impl/LocatorImpl.java b/src/nu/validator/htmlparser/impl/LocatorImpl.java index b334aa6a..29bb8c72 100644 --- a/src/nu/validator/htmlparser/impl/LocatorImpl.java +++ b/src/nu/validator/htmlparser/impl/LocatorImpl.java @@ -26,7 +26,7 @@ import org.xml.sax.Locator; import org.xml.sax.ext.Locator2; -public class LocatorImpl implements Locator, Locator2 { +public class LocatorImpl implements Locator2 { private final String systemId; diff --git a/src/nu/validator/htmlparser/impl/Tokenizer.java b/src/nu/validator/htmlparser/impl/Tokenizer.java index 13fd56b1..345cef28 100755 --- a/src/nu/validator/htmlparser/impl/Tokenizer.java +++ b/src/nu/validator/htmlparser/impl/Tokenizer.java @@ -36,7 +36,6 @@ package nu.validator.htmlparser.impl; import org.xml.sax.ErrorHandler; -import org.xml.sax.Locator; import org.xml.sax.ext.Locator2; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -67,7 +66,7 @@ * @version $Id$ * @author hsivonen */ -public class Tokenizer implements Locator, Locator2 { +public class Tokenizer implements Locator2 { private static final int DATA_AND_RCDATA_MASK = ~1; diff --git a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java index 3de1af2a..2971a27e 100755 --- a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java +++ b/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java @@ -41,7 +41,6 @@ import nu.validator.htmlparser.impl.Tokenizer; import org.xml.sax.ErrorHandler; -import org.xml.sax.Locator; import org.xml.sax.ext.Locator2; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -58,8 +57,7 @@ * @version $Id$ * @author hsivonen */ -public final class HtmlInputStreamReader extends Reader implements - ByteReadable, Locator, Locator2 { +public final class HtmlInputStreamReader extends Reader implements ByteReadable, Locator2 { private static final int SNIFFING_LIMIT = 1024; diff --git a/src/nu/validator/htmlparser/io/MetaSniffer.java b/src/nu/validator/htmlparser/io/MetaSniffer.java index 9deaef7a..47fbbfbc 100755 --- a/src/nu/validator/htmlparser/io/MetaSniffer.java +++ b/src/nu/validator/htmlparser/io/MetaSniffer.java @@ -34,7 +34,7 @@ import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; -public class MetaSniffer extends MetaScanner implements Locator, Locator2 { +public class MetaSniffer extends MetaScanner implements Locator2 { private Encoding characterEncoding = null; diff --git a/src/nu/validator/saxtree/LocatorImpl.java b/src/nu/validator/saxtree/LocatorImpl.java index b6416f1a..6a1f81ba 100644 --- a/src/nu/validator/saxtree/LocatorImpl.java +++ b/src/nu/validator/saxtree/LocatorImpl.java @@ -31,7 +31,7 @@ * @version $Id$ * @author hsivonen */ -public final class LocatorImpl implements Locator, Locator2 { +public final class LocatorImpl implements Locator2 { /** * The system id. diff --git a/src/nu/validator/saxtree/Node.java b/src/nu/validator/saxtree/Node.java index c9292dd2..1ead08b4 100644 --- a/src/nu/validator/saxtree/Node.java +++ b/src/nu/validator/saxtree/Node.java @@ -35,7 +35,7 @@ * @version $Id$ * @author hsivonen */ -public abstract class Node implements Locator, Locator2 { +public abstract class Node implements Locator2 { /** * The system id. diff --git a/src/nu/validator/saxtree/TreeParser.java b/src/nu/validator/saxtree/TreeParser.java index 6f86f7f0..29792f84 100644 --- a/src/nu/validator/saxtree/TreeParser.java +++ b/src/nu/validator/saxtree/TreeParser.java @@ -35,7 +35,7 @@ * @version $Id$ * @author hsivonen */ -public final class TreeParser implements Locator, Locator2 { +public final class TreeParser implements Locator2 { /** * The content handler. From 223c87c5dd2ac7efcf542b7a29b5af3dbbd3e120 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:58:08 +0200 Subject: [PATCH 03/30] Clean up: doc --- doc/README | 15 - doc/named-character-references.html | 4 - doc/tokenization.txt | 1147 -------------- doc/tree-construction.txt | 2201 --------------------------- 4 files changed, 3367 deletions(-) delete mode 100644 doc/README delete mode 100644 doc/named-character-references.html delete mode 100644 doc/tokenization.txt delete mode 100644 doc/tree-construction.txt diff --git a/doc/README b/doc/README deleted file mode 100644 index e0132a41..00000000 --- a/doc/README +++ /dev/null @@ -1,15 +0,0 @@ -tokenization.txt represents the state of the spec implemented in Tokenizer.java. - -To get a diffable version corresponding to the current spec: -lynx -display_charset=utf-8 -dump -nolist http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html > current.txt - -tree-construction.txt represents the state of the spec implemented in TreeBuilder.java. - -To get a diffable version corresponding to the current spec: -lynx -display_charset=utf-8 -dump -nolist http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html > current.txt - - -The text of the files in this directory comes from the WHATWG HTML 5 spec -which carries the following notice: -© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera Software ASA. -You are granted a license to use, reproduce and create derivative works of this document. diff --git a/doc/named-character-references.html b/doc/named-character-references.html deleted file mode 100644 index 5f05a991..00000000 --- a/doc/named-character-references.html +++ /dev/null @@ -1,4 +0,0 @@ - - -
Name Character(s) Glyph
AElig; U+000C6 Æ
AMP; U+00026 &
Aacute; U+000C1 Á
Abreve; U+00102 Ă
Acirc; U+000C2 Â
Acy; U+00410 А
Afr; U+1D504 𝔄
Agrave; U+000C0 À
Alpha; U+00391 Α
Amacr; U+00100 Ā
And; U+02A53
Aogon; U+00104 Ą
Aopf; U+1D538 𝔸
ApplyFunction; U+02061
Aring; U+000C5 Å
Ascr; U+1D49C 𝒜
Assign; U+02254
Atilde; U+000C3 Ã
Auml; U+000C4 Ä
Backslash; U+02216
Barv; U+02AE7
Barwed; U+02306
Bcy; U+00411 Б
Because; U+02235
Bernoullis; U+0212C
Beta; U+00392 Β
Bfr; U+1D505 𝔅
Bopf; U+1D539 𝔹
Breve; U+002D8 ˘
Bscr; U+0212C
Bumpeq; U+0224E
CHcy; U+00427 Ч
COPY; U+000A9 ©
Cacute; U+00106 Ć
Cap; U+022D2
CapitalDifferentialD; U+02145
Cayleys; U+0212D
Ccaron; U+0010C Č
Ccedil; U+000C7 Ç
Ccirc; U+00108 Ĉ
Cconint; U+02230
Cdot; U+0010A Ċ
Cedilla; U+000B8 ¸
CenterDot; U+000B7 ·
Cfr; U+0212D
Chi; U+003A7 Χ
CircleDot; U+02299
CircleMinus; U+02296
CirclePlus; U+02295
CircleTimes; U+02297
ClockwiseContourIntegral; U+02232
CloseCurlyDoubleQuote; U+0201D
CloseCurlyQuote; U+02019
Colon; U+02237
Colone; U+02A74
Congruent; U+02261
Conint; U+0222F
ContourIntegral; U+0222E
Copf; U+02102
Coproduct; U+02210
CounterClockwiseContourIntegral; U+02233
Cross; U+02A2F
Cscr; U+1D49E 𝒞
Cup; U+022D3
CupCap; U+0224D
DD; U+02145
DDotrahd; U+02911
DJcy; U+00402 Ђ
DScy; U+00405 Ѕ
DZcy; U+0040F Џ
Dagger; U+02021
Darr; U+021A1
Dashv; U+02AE4
Dcaron; U+0010E Ď
Dcy; U+00414 Д
Del; U+02207
Delta; U+00394 Δ
Dfr; U+1D507 𝔇
DiacriticalAcute; U+000B4 ´
DiacriticalDot; U+002D9 ˙
DiacriticalDoubleAcute; U+002DD ˝
DiacriticalGrave; U+00060 `
DiacriticalTilde; U+002DC ˜
Diamond; U+022C4
DifferentialD; U+02146
Dopf; U+1D53B 𝔻
Dot; U+000A8 ¨
DotDot; U+020DC ◌⃜
DotEqual; U+02250
DoubleContourIntegral; U+0222F
DoubleDot; U+000A8 ¨
DoubleDownArrow; U+021D3
DoubleLeftArrow; U+021D0
DoubleLeftRightArrow; U+021D4
DoubleLeftTee; U+02AE4
DoubleLongLeftArrow; U+027F8
DoubleLongLeftRightArrow; U+027FA
DoubleLongRightArrow; U+027F9
DoubleRightArrow; U+021D2
DoubleRightTee; U+022A8
DoubleUpArrow; U+021D1
DoubleUpDownArrow; U+021D5
DoubleVerticalBar; U+02225
DownArrow; U+02193
DownArrowBar; U+02913
DownArrowUpArrow; U+021F5
DownBreve; U+00311 ◌̑
DownLeftRightVector; U+02950
DownLeftTeeVector; U+0295E
DownLeftVector; U+021BD
DownLeftVectorBar; U+02956
DownRightTeeVector; U+0295F
DownRightVector; U+021C1
DownRightVectorBar; U+02957
DownTee; U+022A4
DownTeeArrow; U+021A7
Downarrow; U+021D3
Dscr; U+1D49F 𝒟
Dstrok; U+00110 Đ
ENG; U+0014A Ŋ
ETH; U+000D0 Ð
Eacute; U+000C9 É
Ecaron; U+0011A Ě
Ecirc; U+000CA Ê
Ecy; U+0042D Э
Edot; U+00116 Ė
Efr; U+1D508 𝔈
Egrave; U+000C8 È
Element; U+02208
Emacr; U+00112 Ē
EmptySmallSquare; U+025FB
EmptyVerySmallSquare; U+025AB
Eogon; U+00118 Ę
Eopf; U+1D53C 𝔼
Epsilon; U+00395 Ε
Equal; U+02A75
EqualTilde; U+02242
Equilibrium; U+021CC
Escr; U+02130
Esim; U+02A73
Eta; U+00397 Η
Euml; U+000CB Ë
Exists; U+02203
ExponentialE; U+02147
Fcy; U+00424 Ф
Ffr; U+1D509 𝔉
FilledSmallSquare; U+025FC
FilledVerySmallSquare; U+025AA
Fopf; U+1D53D 𝔽
ForAll; U+02200
Fouriertrf; U+02131
Fscr; U+02131
GJcy; U+00403 Ѓ
GT; U+0003E >
Gamma; U+00393 Γ
Gammad; U+003DC Ϝ
Gbreve; U+0011E Ğ
Gcedil; U+00122 Ģ
Gcirc; U+0011C Ĝ
Gcy; U+00413 Г
Gdot; U+00120 Ġ
Gfr; U+1D50A 𝔊
Gg; U+022D9
Gopf; U+1D53E 𝔾
GreaterEqual; U+02265
GreaterEqualLess; U+022DB
GreaterFullEqual; U+02267
GreaterGreater; U+02AA2
GreaterLess; U+02277
GreaterSlantEqual; U+02A7E
GreaterTilde; U+02273
Gscr; U+1D4A2 𝒢
Gt; U+0226B
HARDcy; U+0042A Ъ
Hacek; U+002C7 ˇ
Hat; U+0005E ^
Hcirc; U+00124 Ĥ
Hfr; U+0210C
HilbertSpace; U+0210B
Hopf; U+0210D
HorizontalLine; U+02500
Hscr; U+0210B
Hstrok; U+00126 Ħ
HumpDownHump; U+0224E
HumpEqual; U+0224F
IEcy; U+00415 Е
IJlig; U+00132 IJ
IOcy; U+00401 Ё
Iacute; U+000CD Í
Icirc; U+000CE Î
Icy; U+00418 И
Idot; U+00130 İ
Ifr; U+02111
Igrave; U+000CC Ì
Im; U+02111
Imacr; U+0012A Ī
ImaginaryI; U+02148
Implies; U+021D2
Int; U+0222C
Integral; U+0222B
Intersection; U+022C2
InvisibleComma; U+02063
InvisibleTimes; U+02062
Iogon; U+0012E Į
Iopf; U+1D540 𝕀
Iota; U+00399 Ι
Iscr; U+02110
Itilde; U+00128 Ĩ
Iukcy; U+00406 І
Iuml; U+000CF Ï
Jcirc; U+00134 Ĵ
Jcy; U+00419 Й
Jfr; U+1D50D 𝔍
Jopf; U+1D541 𝕁
Jscr; U+1D4A5 𝒥
Jsercy; U+00408 Ј
Jukcy; U+00404 Є
KHcy; U+00425 Х
KJcy; U+0040C Ќ
Kappa; U+0039A Κ
Kcedil; U+00136 Ķ
Kcy; U+0041A К
Kfr; U+1D50E 𝔎
Kopf; U+1D542 𝕂
Kscr; U+1D4A6 𝒦
LJcy; U+00409 Љ
LT; U+0003C <
Lacute; U+00139 Ĺ
Lambda; U+0039B Λ
Lang; U+027EA
Laplacetrf; U+02112
Larr; U+0219E
Lcaron; U+0013D Ľ
Lcedil; U+0013B Ļ
Lcy; U+0041B Л
LeftAngleBracket; U+027E8
LeftArrow; U+02190
LeftArrowBar; U+021E4
LeftArrowRightArrow; U+021C6
LeftCeiling; U+02308
LeftDoubleBracket; U+027E6
LeftDownTeeVector; U+02961
LeftDownVector; U+021C3
LeftDownVectorBar; U+02959
LeftFloor; U+0230A
LeftRightArrow; U+02194
LeftRightVector; U+0294E
LeftTee; U+022A3
LeftTeeArrow; U+021A4
LeftTeeVector; U+0295A
LeftTriangle; U+022B2
LeftTriangleBar; U+029CF
LeftTriangleEqual; U+022B4
LeftUpDownVector; U+02951
LeftUpTeeVector; U+02960
LeftUpVector; U+021BF
LeftUpVectorBar; U+02958
LeftVector; U+021BC
LeftVectorBar; U+02952
Leftarrow; U+021D0
Leftrightarrow; U+021D4
LessEqualGreater; U+022DA
LessFullEqual; U+02266
LessGreater; U+02276
LessLess; U+02AA1
LessSlantEqual; U+02A7D
LessTilde; U+02272
Lfr; U+1D50F 𝔏
Ll; U+022D8
Lleftarrow; U+021DA
Lmidot; U+0013F Ŀ
LongLeftArrow; U+027F5
LongLeftRightArrow; U+027F7
LongRightArrow; U+027F6
Longleftarrow; U+027F8
Longleftrightarrow; U+027FA
Longrightarrow; U+027F9
Lopf; U+1D543 𝕃
LowerLeftArrow; U+02199
LowerRightArrow; U+02198
Lscr; U+02112
Lsh; U+021B0
Lstrok; U+00141 Ł
Lt; U+0226A
Map; U+02905
Mcy; U+0041C М
MediumSpace; U+0205F
Mellintrf; U+02133
Mfr; U+1D510 𝔐
MinusPlus; U+02213
Mopf; U+1D544 𝕄
Mscr; U+02133
Mu; U+0039C Μ
NJcy; U+0040A Њ
Nacute; U+00143 Ń
Ncaron; U+00147 Ň
Ncedil; U+00145 Ņ
Ncy; U+0041D Н
NegativeMediumSpace; U+0200B
NegativeThickSpace; U+0200B
NegativeThinSpace; U+0200B
NegativeVeryThinSpace; U+0200B
NestedGreaterGreater; U+0226B
NestedLessLess; U+0226A
NewLine; U+0000A
Nfr; U+1D511 𝔑
NoBreak; U+02060
NonBreakingSpace; U+000A0  
Nopf; U+02115
Not; U+02AEC
NotCongruent; U+02262
NotCupCap; U+0226D
NotDoubleVerticalBar; U+02226
NotElement; U+02209
NotEqual; U+02260
NotEqualTilde; U+02242 U+00338 ≂̸
NotExists; U+02204
NotGreater; U+0226F
NotGreaterEqual; U+02271
NotGreaterFullEqual; U+02267 U+00338 ≧̸
NotGreaterGreater; U+0226B U+00338 ≫̸
NotGreaterLess; U+02279
NotGreaterSlantEqual; U+02A7E U+00338 ⩾̸
NotGreaterTilde; U+02275
NotHumpDownHump; U+0224E U+00338 ≎̸
NotHumpEqual; U+0224F U+00338 ≏̸
NotLeftTriangle; U+022EA
NotLeftTriangleBar; U+029CF U+00338 ⧏̸
NotLeftTriangleEqual; U+022EC
NotLess; U+0226E
NotLessEqual; U+02270
NotLessGreater; U+02278
NotLessLess; U+0226A U+00338 ≪̸
NotLessSlantEqual; U+02A7D U+00338 ⩽̸
NotLessTilde; U+02274
NotNestedGreaterGreater; U+02AA2 U+00338 ⪢̸
NotNestedLessLess; U+02AA1 U+00338 ⪡̸
NotPrecedes; U+02280
NotPrecedesEqual; U+02AAF U+00338 ⪯̸
NotPrecedesSlantEqual; U+022E0
NotReverseElement; U+0220C
NotRightTriangle; U+022EB
NotRightTriangleBar; U+029D0 U+00338 ⧐̸
NotRightTriangleEqual; U+022ED
NotSquareSubset; U+0228F U+00338 ⊏̸
NotSquareSubsetEqual; U+022E2
NotSquareSuperset; U+02290 U+00338 ⊐̸
NotSquareSupersetEqual; U+022E3
NotSubset; U+02282 U+020D2 ⊂⃒
NotSubsetEqual; U+02288
NotSucceeds; U+02281
NotSucceedsEqual; U+02AB0 U+00338 ⪰̸
NotSucceedsSlantEqual; U+022E1
NotSucceedsTilde; U+0227F U+00338 ≿̸
NotSuperset; U+02283 U+020D2 ⊃⃒
NotSupersetEqual; U+02289
NotTilde; U+02241
NotTildeEqual; U+02244
NotTildeFullEqual; U+02247
NotTildeTilde; U+02249
NotVerticalBar; U+02224
Nscr; U+1D4A9 𝒩
Ntilde; U+000D1 Ñ
Nu; U+0039D Ν
OElig; U+00152 Œ
Oacute; U+000D3 Ó
Ocirc; U+000D4 Ô
Ocy; U+0041E О
Odblac; U+00150 Ő
Ofr; U+1D512 𝔒
Ograve; U+000D2 Ò
Omacr; U+0014C Ō
Omega; U+003A9 Ω
Omicron; U+0039F Ο
Oopf; U+1D546 𝕆
OpenCurlyDoubleQuote; U+0201C
OpenCurlyQuote; U+02018
Or; U+02A54
Oscr; U+1D4AA 𝒪
Oslash; U+000D8 Ø
Otilde; U+000D5 Õ
Otimes; U+02A37
Ouml; U+000D6 Ö
OverBar; U+0203E
OverBrace; U+023DE
OverBracket; U+023B4
OverParenthesis; U+023DC
PartialD; U+02202
Pcy; U+0041F П
Pfr; U+1D513 𝔓
Phi; U+003A6 Φ
Pi; U+003A0 Π
PlusMinus; U+000B1 ±
Poincareplane; U+0210C
Popf; U+02119
Pr; U+02ABB
Precedes; U+0227A
PrecedesEqual; U+02AAF
PrecedesSlantEqual; U+0227C
PrecedesTilde; U+0227E
Prime; U+02033
Product; U+0220F
Proportion; U+02237
Proportional; U+0221D
Pscr; U+1D4AB 𝒫
Psi; U+003A8 Ψ
QUOT; U+00022 "
Qfr; U+1D514 𝔔
Qopf; U+0211A
Qscr; U+1D4AC 𝒬
RBarr; U+02910
REG; U+000AE ®
Racute; U+00154 Ŕ
Rang; U+027EB
Rarr; U+021A0
Rarrtl; U+02916
Rcaron; U+00158 Ř
Rcedil; U+00156 Ŗ
Rcy; U+00420 Р
Re; U+0211C
ReverseElement; U+0220B
ReverseEquilibrium; U+021CB
ReverseUpEquilibrium; U+0296F
Rfr; U+0211C
Rho; U+003A1 Ρ
RightAngleBracket; U+027E9
RightArrow; U+02192
RightArrowBar; U+021E5
RightArrowLeftArrow; U+021C4
RightCeiling; U+02309
RightDoubleBracket; U+027E7
RightDownTeeVector; U+0295D
RightDownVector; U+021C2
RightDownVectorBar; U+02955
RightFloor; U+0230B
RightTee; U+022A2
RightTeeArrow; U+021A6
RightTeeVector; U+0295B
RightTriangle; U+022B3
RightTriangleBar; U+029D0
RightTriangleEqual; U+022B5
RightUpDownVector; U+0294F
RightUpTeeVector; U+0295C
RightUpVector; U+021BE
RightUpVectorBar; U+02954
RightVector; U+021C0
RightVectorBar; U+02953
Rightarrow; U+021D2
Ropf; U+0211D
RoundImplies; U+02970
Rrightarrow; U+021DB
Rscr; U+0211B
Rsh; U+021B1
RuleDelayed; U+029F4
SHCHcy; U+00429 Щ
SHcy; U+00428 Ш
SOFTcy; U+0042C Ь
Sacute; U+0015A Ś
Sc; U+02ABC
Scaron; U+00160 Š
Scedil; U+0015E Ş
Scirc; U+0015C Ŝ
Scy; U+00421 С
Sfr; U+1D516 𝔖
ShortDownArrow; U+02193
ShortLeftArrow; U+02190
ShortRightArrow; U+02192
ShortUpArrow; U+02191
Sigma; U+003A3 Σ
SmallCircle; U+02218
Sopf; U+1D54A 𝕊
Sqrt; U+0221A
Square; U+025A1
SquareIntersection; U+02293
SquareSubset; U+0228F
SquareSubsetEqual; U+02291
SquareSuperset; U+02290
SquareSupersetEqual; U+02292
SquareUnion; U+02294
Sscr; U+1D4AE 𝒮
Star; U+022C6
Sub; U+022D0
Subset; U+022D0
SubsetEqual; U+02286
Succeeds; U+0227B
SucceedsEqual; U+02AB0
SucceedsSlantEqual; U+0227D
SucceedsTilde; U+0227F
SuchThat; U+0220B
Sum; U+02211
Sup; U+022D1
Superset; U+02283
SupersetEqual; U+02287
Supset; U+022D1
THORN; U+000DE Þ
TRADE; U+02122
TSHcy; U+0040B Ћ
TScy; U+00426 Ц
Tab; U+00009
Tau; U+003A4 Τ
Tcaron; U+00164 Ť
Tcedil; U+00162 Ţ
Tcy; U+00422 Т
Tfr; U+1D517 𝔗
Therefore; U+02234
Theta; U+00398 Θ
ThickSpace; U+0205F U+0200A   
ThinSpace; U+02009
Tilde; U+0223C
TildeEqual; U+02243
TildeFullEqual; U+02245
TildeTilde; U+02248
Topf; U+1D54B 𝕋
TripleDot; U+020DB ◌⃛
Tscr; U+1D4AF 𝒯
Tstrok; U+00166 Ŧ
Uacute; U+000DA Ú
Uarr; U+0219F
Uarrocir; U+02949
Ubrcy; U+0040E Ў
Ubreve; U+0016C Ŭ
Ucirc; U+000DB Û
Ucy; U+00423 У
Udblac; U+00170 Ű
Ufr; U+1D518 𝔘
Ugrave; U+000D9 Ù
Umacr; U+0016A Ū
UnderBar; U+0005F _
UnderBrace; U+023DF
UnderBracket; U+023B5
UnderParenthesis; U+023DD
Union; U+022C3
UnionPlus; U+0228E
Uogon; U+00172 Ų
Uopf; U+1D54C 𝕌
UpArrow; U+02191
UpArrowBar; U+02912
UpArrowDownArrow; U+021C5
UpDownArrow; U+02195
UpEquilibrium; U+0296E
UpTee; U+022A5
UpTeeArrow; U+021A5
Uparrow; U+021D1
Updownarrow; U+021D5
UpperLeftArrow; U+02196
UpperRightArrow; U+02197
Upsi; U+003D2 ϒ
Upsilon; U+003A5 Υ
Uring; U+0016E Ů
Uscr; U+1D4B0 𝒰
Utilde; U+00168 Ũ
Uuml; U+000DC Ü
VDash; U+022AB
Vbar; U+02AEB
Vcy; U+00412 В
Vdash; U+022A9
Vdashl; U+02AE6
Vee; U+022C1
Verbar; U+02016
Vert; U+02016
VerticalBar; U+02223
VerticalLine; U+0007C |
VerticalSeparator; U+02758
VerticalTilde; U+02240
VeryThinSpace; U+0200A
Vfr; U+1D519 𝔙
Vopf; U+1D54D 𝕍
Vscr; U+1D4B1 𝒱
Vvdash; U+022AA
Wcirc; U+00174 Ŵ
Wedge; U+022C0
Wfr; U+1D51A 𝔚
Wopf; U+1D54E 𝕎
Wscr; U+1D4B2 𝒲
Xfr; U+1D51B 𝔛
Xi; U+0039E Ξ
Xopf; U+1D54F 𝕏
Xscr; U+1D4B3 𝒳
YAcy; U+0042F Я
YIcy; U+00407 Ї
YUcy; U+0042E Ю
Yacute; U+000DD Ý
Ycirc; U+00176 Ŷ
Ycy; U+0042B Ы
Yfr; U+1D51C 𝔜
Yopf; U+1D550 𝕐
Yscr; U+1D4B4 𝒴
Yuml; U+00178 Ÿ
ZHcy; U+00416 Ж
Zacute; U+00179 Ź
Zcaron; U+0017D Ž
Zcy; U+00417 З
Zdot; U+0017B Ż
ZeroWidthSpace; U+0200B
Zeta; U+00396 Ζ
Zfr; U+02128
Zopf; U+02124
Zscr; U+1D4B5 𝒵
aacute; U+000E1 á
abreve; U+00103 ă
ac; U+0223E
acE; U+0223E U+00333 ∾̳
acd; U+0223F
acirc; U+000E2 â
acute; U+000B4 ´
acy; U+00430 а
aelig; U+000E6 æ
af; U+02061
afr; U+1D51E 𝔞
agrave; U+000E0 à
alefsym; U+02135
aleph; U+02135
alpha; U+003B1 α
amacr; U+00101 ā
amalg; U+02A3F ⨿
amp; U+00026 &
and; U+02227
andand; U+02A55
andd; U+02A5C
andslope; U+02A58
andv; U+02A5A
ang; U+02220
ange; U+029A4
angle; U+02220
angmsd; U+02221
angmsdaa; U+029A8
angmsdab; U+029A9
angmsdac; U+029AA
angmsdad; U+029AB
angmsdae; U+029AC
angmsdaf; U+029AD
angmsdag; U+029AE
angmsdah; U+029AF
angrt; U+0221F
angrtvb; U+022BE
angrtvbd; U+0299D
angsph; U+02222
angst; U+000C5 Å
angzarr; U+0237C
aogon; U+00105 ą
aopf; U+1D552 𝕒
ap; U+02248
apE; U+02A70
apacir; U+02A6F
ape; U+0224A
apid; U+0224B
apos; U+00027 '
approx; U+02248
approxeq; U+0224A
aring; U+000E5 å
ascr; U+1D4B6 𝒶
ast; U+0002A *
asymp; U+02248
asympeq; U+0224D
atilde; U+000E3 ã
auml; U+000E4 ä
awconint; U+02233
awint; U+02A11
bNot; U+02AED
backcong; U+0224C
backepsilon; U+003F6 ϶
backprime; U+02035
backsim; U+0223D
backsimeq; U+022CD
barvee; U+022BD
barwed; U+02305
barwedge; U+02305
bbrk; U+023B5
bbrktbrk; U+023B6
bcong; U+0224C
bcy; U+00431 б
bdquo; U+0201E
becaus; U+02235
because; U+02235
bemptyv; U+029B0
bepsi; U+003F6 ϶
bernou; U+0212C
beta; U+003B2 β
beth; U+02136
between; U+0226C
bfr; U+1D51F 𝔟
bigcap; U+022C2
bigcirc; U+025EF
bigcup; U+022C3
bigodot; U+02A00
bigoplus; U+02A01
bigotimes; U+02A02
bigsqcup; U+02A06
bigstar; U+02605
bigtriangledown; U+025BD
bigtriangleup; U+025B3
biguplus; U+02A04
bigvee; U+022C1
bigwedge; U+022C0
bkarow; U+0290D
blacklozenge; U+029EB
blacksquare; U+025AA
blacktriangle; U+025B4
blacktriangledown; U+025BE
blacktriangleleft; U+025C2
blacktriangleright; U+025B8
blank; U+02423
blk12; U+02592
blk14; U+02591
blk34; U+02593
block; U+02588
bne; U+0003D U+020E5 =⃥
bnequiv; U+02261 U+020E5 ≡⃥
bnot; U+02310
bopf; U+1D553 𝕓
bot; U+022A5
bottom; U+022A5
bowtie; U+022C8
boxDL; U+02557
boxDR; U+02554
boxDl; U+02556
boxDr; U+02553
boxH; U+02550
boxHD; U+02566
boxHU; U+02569
boxHd; U+02564
boxHu; U+02567
boxUL; U+0255D
boxUR; U+0255A
boxUl; U+0255C
boxUr; U+02559
boxV; U+02551
boxVH; U+0256C
boxVL; U+02563
boxVR; U+02560
boxVh; U+0256B
boxVl; U+02562
boxVr; U+0255F
boxbox; U+029C9
boxdL; U+02555
boxdR; U+02552
boxdl; U+02510
boxdr; U+0250C
boxh; U+02500
boxhD; U+02565
boxhU; U+02568
boxhd; U+0252C
boxhu; U+02534
boxminus; U+0229F
boxplus; U+0229E
boxtimes; U+022A0
boxuL; U+0255B
boxuR; U+02558
boxul; U+02518
boxur; U+02514
boxv; U+02502
boxvH; U+0256A
boxvL; U+02561
boxvR; U+0255E
boxvh; U+0253C
boxvl; U+02524
boxvr; U+0251C
bprime; U+02035
breve; U+002D8 ˘
brvbar; U+000A6 ¦
bscr; U+1D4B7 𝒷
bsemi; U+0204F
bsim; U+0223D
bsime; U+022CD
bsol; U+0005C \
bsolb; U+029C5
bsolhsub; U+027C8
bull; U+02022
bullet; U+02022
bump; U+0224E
bumpE; U+02AAE
bumpe; U+0224F
bumpeq; U+0224F
cacute; U+00107 ć
cap; U+02229
capand; U+02A44
capbrcup; U+02A49
capcap; U+02A4B
capcup; U+02A47
capdot; U+02A40
caps; U+02229 U+0FE00 ∩︀
caret; U+02041
caron; U+002C7 ˇ
ccaps; U+02A4D
ccaron; U+0010D č
ccedil; U+000E7 ç
ccirc; U+00109 ĉ
ccups; U+02A4C
ccupssm; U+02A50
cdot; U+0010B ċ
cedil; U+000B8 ¸
cemptyv; U+029B2
cent; U+000A2 ¢
centerdot; U+000B7 ·
cfr; U+1D520 𝔠
chcy; U+00447 ч
check; U+02713
checkmark; U+02713
chi; U+003C7 χ
cir; U+025CB
cirE; U+029C3
circ; U+002C6 ˆ
circeq; U+02257
circlearrowleft; U+021BA
circlearrowright; U+021BB
circledR; U+000AE ®
circledS; U+024C8
circledast; U+0229B
circledcirc; U+0229A
circleddash; U+0229D
cire; U+02257
cirfnint; U+02A10
cirmid; U+02AEF
cirscir; U+029C2
clubs; U+02663
clubsuit; U+02663
colon; U+0003A :
colone; U+02254
coloneq; U+02254
comma; U+0002C ,
commat; U+00040 @
comp; U+02201
compfn; U+02218
complement; U+02201
complexes; U+02102
cong; U+02245
congdot; U+02A6D
conint; U+0222E
copf; U+1D554 𝕔
coprod; U+02210
copy; U+000A9 ©
copysr; U+02117
crarr; U+021B5
cross; U+02717
cscr; U+1D4B8 𝒸
csub; U+02ACF
csube; U+02AD1
csup; U+02AD0
csupe; U+02AD2
ctdot; U+022EF
cudarrl; U+02938
cudarrr; U+02935
cuepr; U+022DE
cuesc; U+022DF
cularr; U+021B6
cularrp; U+0293D
cup; U+0222A
cupbrcap; U+02A48
cupcap; U+02A46
cupcup; U+02A4A
cupdot; U+0228D
cupor; U+02A45
cups; U+0222A U+0FE00 ∪︀
curarr; U+021B7
curarrm; U+0293C
curlyeqprec; U+022DE
curlyeqsucc; U+022DF
curlyvee; U+022CE
curlywedge; U+022CF
curren; U+000A4 ¤
curvearrowleft; U+021B6
curvearrowright; U+021B7
cuvee; U+022CE
cuwed; U+022CF
cwconint; U+02232
cwint; U+02231
cylcty; U+0232D
dArr; U+021D3
dHar; U+02965
dagger; U+02020
daleth; U+02138
darr; U+02193
dash; U+02010
dashv; U+022A3
dbkarow; U+0290F
dblac; U+002DD ˝
dcaron; U+0010F ď
dcy; U+00434 д
dd; U+02146
ddagger; U+02021
ddarr; U+021CA
ddotseq; U+02A77
deg; U+000B0 °
delta; U+003B4 δ
demptyv; U+029B1
dfisht; U+0297F ⥿
dfr; U+1D521 𝔡
dharl; U+021C3
dharr; U+021C2
diam; U+022C4
diamond; U+022C4
diamondsuit; U+02666
diams; U+02666
die; U+000A8 ¨
digamma; U+003DD ϝ
disin; U+022F2
div; U+000F7 ÷
divide; U+000F7 ÷
divideontimes; U+022C7
divonx; U+022C7
djcy; U+00452 ђ
dlcorn; U+0231E
dlcrop; U+0230D
dollar; U+00024 $
dopf; U+1D555 𝕕
dot; U+002D9 ˙
doteq; U+02250
doteqdot; U+02251
dotminus; U+02238
dotplus; U+02214
dotsquare; U+022A1
doublebarwedge; U+02306
downarrow; U+02193
downdownarrows; U+021CA
downharpoonleft; U+021C3
downharpoonright; U+021C2
drbkarow; U+02910
drcorn; U+0231F
drcrop; U+0230C
dscr; U+1D4B9 𝒹
dscy; U+00455 ѕ
dsol; U+029F6
dstrok; U+00111 đ
dtdot; U+022F1
dtri; U+025BF
dtrif; U+025BE
duarr; U+021F5
duhar; U+0296F
dwangle; U+029A6
dzcy; U+0045F џ
dzigrarr; U+027FF
eDDot; U+02A77
eDot; U+02251
eacute; U+000E9 é
easter; U+02A6E
ecaron; U+0011B ě
ecir; U+02256
ecirc; U+000EA ê
ecolon; U+02255
ecy; U+0044D э
edot; U+00117 ė
ee; U+02147
efDot; U+02252
efr; U+1D522 𝔢
eg; U+02A9A
egrave; U+000E8 è
egs; U+02A96
egsdot; U+02A98
el; U+02A99
elinters; U+023E7
ell; U+02113
els; U+02A95
elsdot; U+02A97
emacr; U+00113 ē
empty; U+02205
emptyset; U+02205
emptyv; U+02205
emsp; U+02003
emsp13; U+02004
emsp14; U+02005
eng; U+0014B ŋ
ensp; U+02002
eogon; U+00119 ę
eopf; U+1D556 𝕖
epar; U+022D5
eparsl; U+029E3
eplus; U+02A71
epsi; U+003B5 ε
epsilon; U+003B5 ε
epsiv; U+003F5 ϵ
eqcirc; U+02256
eqcolon; U+02255
eqsim; U+02242
eqslantgtr; U+02A96
eqslantless; U+02A95
equals; U+0003D =
equest; U+0225F
equiv; U+02261
equivDD; U+02A78
eqvparsl; U+029E5
erDot; U+02253
erarr; U+02971
escr; U+0212F
esdot; U+02250
esim; U+02242
eta; U+003B7 η
eth; U+000F0 ð
euml; U+000EB ë
euro; U+020AC
excl; U+00021 !
exist; U+02203
expectation; U+02130
exponentiale; U+02147
fallingdotseq; U+02252
fcy; U+00444 ф
female; U+02640
ffilig; U+0FB03
fflig; U+0FB00
ffllig; U+0FB04
ffr; U+1D523 𝔣
filig; U+0FB01
fjlig; U+00066 U+0006A fj
flat; U+0266D
fllig; U+0FB02
fltns; U+025B1
fnof; U+00192 ƒ
fopf; U+1D557 𝕗
forall; U+02200
fork; U+022D4
forkv; U+02AD9
fpartint; U+02A0D
frac12; U+000BD ½
frac13; U+02153
frac14; U+000BC ¼
frac15; U+02155
frac16; U+02159
frac18; U+0215B
frac23; U+02154
frac25; U+02156
frac34; U+000BE ¾
frac35; U+02157
frac38; U+0215C
frac45; U+02158
frac56; U+0215A
frac58; U+0215D
frac78; U+0215E
frasl; U+02044
frown; U+02322
fscr; U+1D4BB 𝒻
gE; U+02267
gEl; U+02A8C
gacute; U+001F5 ǵ
gamma; U+003B3 γ
gammad; U+003DD ϝ
gap; U+02A86
gbreve; U+0011F ğ
gcirc; U+0011D ĝ
gcy; U+00433 г
gdot; U+00121 ġ
ge; U+02265
gel; U+022DB
geq; U+02265
geqq; U+02267
geqslant; U+02A7E
ges; U+02A7E
gescc; U+02AA9
gesdot; U+02A80
gesdoto; U+02A82
gesdotol; U+02A84
gesl; U+022DB U+0FE00 ⋛︀
gesles; U+02A94
gfr; U+1D524 𝔤
gg; U+0226B
ggg; U+022D9
gimel; U+02137
gjcy; U+00453 ѓ
gl; U+02277
glE; U+02A92
gla; U+02AA5
glj; U+02AA4
gnE; U+02269
gnap; U+02A8A
gnapprox; U+02A8A
gne; U+02A88
gneq; U+02A88
gneqq; U+02269
gnsim; U+022E7
gopf; U+1D558 𝕘
grave; U+00060 `
gscr; U+0210A
gsim; U+02273
gsime; U+02A8E
gsiml; U+02A90
gt; U+0003E >
gtcc; U+02AA7
gtcir; U+02A7A
gtdot; U+022D7
gtlPar; U+02995
gtquest; U+02A7C
gtrapprox; U+02A86
gtrarr; U+02978
gtrdot; U+022D7
gtreqless; U+022DB
gtreqqless; U+02A8C
gtrless; U+02277
gtrsim; U+02273
gvertneqq; U+02269 U+0FE00 ≩︀
gvnE; U+02269 U+0FE00 ≩︀
hArr; U+021D4
hairsp; U+0200A
half; U+000BD ½
hamilt; U+0210B
hardcy; U+0044A ъ
harr; U+02194
harrcir; U+02948
harrw; U+021AD
hbar; U+0210F
hcirc; U+00125 ĥ
hearts; U+02665
heartsuit; U+02665
hellip; U+02026
hercon; U+022B9
hfr; U+1D525 𝔥
hksearow; U+02925
hkswarow; U+02926
hoarr; U+021FF
homtht; U+0223B
hookleftarrow; U+021A9
hookrightarrow; U+021AA
hopf; U+1D559 𝕙
horbar; U+02015
hscr; U+1D4BD 𝒽
hslash; U+0210F
hstrok; U+00127 ħ
hybull; U+02043
hyphen; U+02010
iacute; U+000ED í
ic; U+02063
icirc; U+000EE î
icy; U+00438 и
iecy; U+00435 е
iexcl; U+000A1 ¡
iff; U+021D4
ifr; U+1D526 𝔦
igrave; U+000EC ì
ii; U+02148
iiiint; U+02A0C
iiint; U+0222D
iinfin; U+029DC
iiota; U+02129
ijlig; U+00133 ij
imacr; U+0012B ī
image; U+02111
imagline; U+02110
imagpart; U+02111
imath; U+00131 ı
imof; U+022B7
imped; U+001B5 Ƶ
in; U+02208
incare; U+02105
infin; U+0221E
infintie; U+029DD
inodot; U+00131 ı
int; U+0222B
intcal; U+022BA
integers; U+02124
intercal; U+022BA
intlarhk; U+02A17
intprod; U+02A3C
iocy; U+00451 ё
iogon; U+0012F į
iopf; U+1D55A 𝕚
iota; U+003B9 ι
iprod; U+02A3C
iquest; U+000BF ¿
iscr; U+1D4BE 𝒾
isin; U+02208
isinE; U+022F9
isindot; U+022F5
isins; U+022F4
isinsv; U+022F3
isinv; U+02208
it; U+02062
itilde; U+00129 ĩ
iukcy; U+00456 і
iuml; U+000EF ï
jcirc; U+00135 ĵ
jcy; U+00439 й
jfr; U+1D527 𝔧
jmath; U+00237 ȷ
jopf; U+1D55B 𝕛
jscr; U+1D4BF 𝒿
jsercy; U+00458 ј
jukcy; U+00454 є
kappa; U+003BA κ
kappav; U+003F0 ϰ
kcedil; U+00137 ķ
kcy; U+0043A к
kfr; U+1D528 𝔨
kgreen; U+00138 ĸ
khcy; U+00445 х
kjcy; U+0045C ќ
kopf; U+1D55C 𝕜
kscr; U+1D4C0 𝓀
lAarr; U+021DA
lArr; U+021D0
lAtail; U+0291B
lBarr; U+0290E
lE; U+02266
lEg; U+02A8B
lHar; U+02962
lacute; U+0013A ĺ
laemptyv; U+029B4
lagran; U+02112
lambda; U+003BB λ
lang; U+027E8
langd; U+02991
langle; U+027E8
lap; U+02A85
laquo; U+000AB «
larr; U+02190
larrb; U+021E4
larrbfs; U+0291F
larrfs; U+0291D
larrhk; U+021A9
larrlp; U+021AB
larrpl; U+02939
larrsim; U+02973
larrtl; U+021A2
lat; U+02AAB
latail; U+02919
late; U+02AAD
lates; U+02AAD U+0FE00 ⪭︀
lbarr; U+0290C
lbbrk; U+02772
lbrace; U+0007B {
lbrack; U+0005B [
lbrke; U+0298B
lbrksld; U+0298F
lbrkslu; U+0298D
lcaron; U+0013E ľ
lcedil; U+0013C ļ
lceil; U+02308
lcub; U+0007B {
lcy; U+0043B л
ldca; U+02936
ldquo; U+0201C
ldquor; U+0201E
ldrdhar; U+02967
ldrushar; U+0294B
ldsh; U+021B2
le; U+02264
leftarrow; U+02190
leftarrowtail; U+021A2
leftharpoondown; U+021BD
leftharpoonup; U+021BC
leftleftarrows; U+021C7
leftrightarrow; U+02194
leftrightarrows; U+021C6
leftrightharpoons; U+021CB
leftrightsquigarrow; U+021AD
leftthreetimes; U+022CB
leg; U+022DA
leq; U+02264
leqq; U+02266
leqslant; U+02A7D
les; U+02A7D
lescc; U+02AA8
lesdot; U+02A7F ⩿
lesdoto; U+02A81
lesdotor; U+02A83
lesg; U+022DA U+0FE00 ⋚︀
lesges; U+02A93
lessapprox; U+02A85
lessdot; U+022D6
lesseqgtr; U+022DA
lesseqqgtr; U+02A8B
lessgtr; U+02276
lesssim; U+02272
lfisht; U+0297C
lfloor; U+0230A
lfr; U+1D529 𝔩
lg; U+02276
lgE; U+02A91
lhard; U+021BD
lharu; U+021BC
lharul; U+0296A
lhblk; U+02584
ljcy; U+00459 љ
ll; U+0226A
llarr; U+021C7
llcorner; U+0231E
llhard; U+0296B
lltri; U+025FA
lmidot; U+00140 ŀ
lmoust; U+023B0
lmoustache; U+023B0
lnE; U+02268
lnap; U+02A89
lnapprox; U+02A89
lne; U+02A87
lneq; U+02A87
lneqq; U+02268
lnsim; U+022E6
loang; U+027EC
loarr; U+021FD
lobrk; U+027E6
longleftarrow; U+027F5
longleftrightarrow; U+027F7
longmapsto; U+027FC
longrightarrow; U+027F6
looparrowleft; U+021AB
looparrowright; U+021AC
lopar; U+02985
lopf; U+1D55D 𝕝
loplus; U+02A2D
lotimes; U+02A34
lowast; U+02217
lowbar; U+0005F _
loz; U+025CA
lozenge; U+025CA
lozf; U+029EB
lpar; U+00028 (
lparlt; U+02993
lrarr; U+021C6
lrcorner; U+0231F
lrhar; U+021CB
lrhard; U+0296D
lrm; U+0200E
lrtri; U+022BF
lsaquo; U+02039
lscr; U+1D4C1 𝓁
lsh; U+021B0
lsim; U+02272
lsime; U+02A8D
lsimg; U+02A8F
lsqb; U+0005B [
lsquo; U+02018
lsquor; U+0201A
lstrok; U+00142 ł
lt; U+0003C <
ltcc; U+02AA6
ltcir; U+02A79
ltdot; U+022D6
lthree; U+022CB
ltimes; U+022C9
ltlarr; U+02976
ltquest; U+02A7B
ltrPar; U+02996
ltri; U+025C3
ltrie; U+022B4
ltrif; U+025C2
lurdshar; U+0294A
luruhar; U+02966
lvertneqq; U+02268 U+0FE00 ≨︀
lvnE; U+02268 U+0FE00 ≨︀
mDDot; U+0223A
macr; U+000AF ¯
male; U+02642
malt; U+02720
maltese; U+02720
map; U+021A6
mapsto; U+021A6
mapstodown; U+021A7
mapstoleft; U+021A4
mapstoup; U+021A5
marker; U+025AE
mcomma; U+02A29
mcy; U+0043C м
mdash; U+02014
measuredangle; U+02221
mfr; U+1D52A 𝔪
mho; U+02127
micro; U+000B5 µ
mid; U+02223
midast; U+0002A *
midcir; U+02AF0
middot; U+000B7 ·
minus; U+02212
minusb; U+0229F
minusd; U+02238
minusdu; U+02A2A
mlcp; U+02ADB
mldr; U+02026
mnplus; U+02213
models; U+022A7
mopf; U+1D55E 𝕞
mp; U+02213
mscr; U+1D4C2 𝓂
mstpos; U+0223E
mu; U+003BC μ
multimap; U+022B8
mumap; U+022B8
nGg; U+022D9 U+00338 ⋙̸
nGt; U+0226B U+020D2 ≫⃒
nGtv; U+0226B U+00338 ≫̸
nLeftarrow; U+021CD
nLeftrightarrow; U+021CE
nLl; U+022D8 U+00338 ⋘̸
nLt; U+0226A U+020D2 ≪⃒
nLtv; U+0226A U+00338 ≪̸
nRightarrow; U+021CF
nVDash; U+022AF
nVdash; U+022AE
nabla; U+02207
nacute; U+00144 ń
nang; U+02220 U+020D2 ∠⃒
nap; U+02249
napE; U+02A70 U+00338 ⩰̸
napid; U+0224B U+00338 ≋̸
napos; U+00149 ʼn
napprox; U+02249
natur; U+0266E
natural; U+0266E
naturals; U+02115
nbsp; U+000A0  
nbump; U+0224E U+00338 ≎̸
nbumpe; U+0224F U+00338 ≏̸
ncap; U+02A43
ncaron; U+00148 ň
ncedil; U+00146 ņ
ncong; U+02247
ncongdot; U+02A6D U+00338 ⩭̸
ncup; U+02A42
ncy; U+0043D н
ndash; U+02013
ne; U+02260
neArr; U+021D7
nearhk; U+02924
nearr; U+02197
nearrow; U+02197
nedot; U+02250 U+00338 ≐̸
nequiv; U+02262
nesear; U+02928
nesim; U+02242 U+00338 ≂̸
nexist; U+02204
nexists; U+02204
nfr; U+1D52B 𝔫
ngE; U+02267 U+00338 ≧̸
nge; U+02271
ngeq; U+02271
ngeqq; U+02267 U+00338 ≧̸
ngeqslant; U+02A7E U+00338 ⩾̸
nges; U+02A7E U+00338 ⩾̸
ngsim; U+02275
ngt; U+0226F
ngtr; U+0226F
nhArr; U+021CE
nharr; U+021AE
nhpar; U+02AF2
ni; U+0220B
nis; U+022FC
nisd; U+022FA
niv; U+0220B
njcy; U+0045A њ
nlArr; U+021CD
nlE; U+02266 U+00338 ≦̸
nlarr; U+0219A
nldr; U+02025
nle; U+02270
nleftarrow; U+0219A
nleftrightarrow; U+021AE
nleq; U+02270
nleqq; U+02266 U+00338 ≦̸
nleqslant; U+02A7D U+00338 ⩽̸
nles; U+02A7D U+00338 ⩽̸
nless; U+0226E
nlsim; U+02274
nlt; U+0226E
nltri; U+022EA
nltrie; U+022EC
nmid; U+02224
nopf; U+1D55F 𝕟
not; U+000AC ¬
notin; U+02209
notinE; U+022F9 U+00338 ⋹̸
notindot; U+022F5 U+00338 ⋵̸
notinva; U+02209
notinvb; U+022F7
notinvc; U+022F6
notni; U+0220C
notniva; U+0220C
notnivb; U+022FE
notnivc; U+022FD
npar; U+02226
nparallel; U+02226
nparsl; U+02AFD U+020E5 ⫽⃥
npart; U+02202 U+00338 ∂̸
npolint; U+02A14
npr; U+02280
nprcue; U+022E0
npre; U+02AAF U+00338 ⪯̸
nprec; U+02280
npreceq; U+02AAF U+00338 ⪯̸
nrArr; U+021CF
nrarr; U+0219B
nrarrc; U+02933 U+00338 ⤳̸
nrarrw; U+0219D U+00338 ↝̸
nrightarrow; U+0219B
nrtri; U+022EB
nrtrie; U+022ED
nsc; U+02281
nsccue; U+022E1
nsce; U+02AB0 U+00338 ⪰̸
nscr; U+1D4C3 𝓃
nshortmid; U+02224
nshortparallel; U+02226
nsim; U+02241
nsime; U+02244
nsimeq; U+02244
nsmid; U+02224
nspar; U+02226
nsqsube; U+022E2
nsqsupe; U+022E3
nsub; U+02284
nsubE; U+02AC5 U+00338 ⫅̸
nsube; U+02288
nsubset; U+02282 U+020D2 ⊂⃒
nsubseteq; U+02288
nsubseteqq; U+02AC5 U+00338 ⫅̸
nsucc; U+02281
nsucceq; U+02AB0 U+00338 ⪰̸
nsup; U+02285
nsupE; U+02AC6 U+00338 ⫆̸
nsupe; U+02289
nsupset; U+02283 U+020D2 ⊃⃒
nsupseteq; U+02289
nsupseteqq; U+02AC6 U+00338 ⫆̸
ntgl; U+02279
ntilde; U+000F1 ñ
ntlg; U+02278
ntriangleleft; U+022EA
ntrianglelefteq; U+022EC
ntriangleright; U+022EB
ntrianglerighteq; U+022ED
nu; U+003BD ν
num; U+00023 #
numero; U+02116
numsp; U+02007
nvDash; U+022AD
nvHarr; U+02904
nvap; U+0224D U+020D2 ≍⃒
nvdash; U+022AC
nvge; U+02265 U+020D2 ≥⃒
nvgt; U+0003E U+020D2 >⃒
nvinfin; U+029DE
nvlArr; U+02902
nvle; U+02264 U+020D2 ≤⃒
nvlt; U+0003C U+020D2 <⃒
nvltrie; U+022B4 U+020D2 ⊴⃒
nvrArr; U+02903
nvrtrie; U+022B5 U+020D2 ⊵⃒
nvsim; U+0223C U+020D2 ∼⃒
nwArr; U+021D6
nwarhk; U+02923
nwarr; U+02196
nwarrow; U+02196
nwnear; U+02927
oS; U+024C8
oacute; U+000F3 ó
oast; U+0229B
ocir; U+0229A
ocirc; U+000F4 ô
ocy; U+0043E о
odash; U+0229D
odblac; U+00151 ő
odiv; U+02A38
odot; U+02299
odsold; U+029BC
oelig; U+00153 œ
ofcir; U+029BF ⦿
ofr; U+1D52C 𝔬
ogon; U+002DB ˛
ograve; U+000F2 ò
ogt; U+029C1
ohbar; U+029B5
ohm; U+003A9 Ω
oint; U+0222E
olarr; U+021BA
olcir; U+029BE
olcross; U+029BB
oline; U+0203E
olt; U+029C0
omacr; U+0014D ō
omega; U+003C9 ω
omicron; U+003BF ο
omid; U+029B6
ominus; U+02296
oopf; U+1D560 𝕠
opar; U+029B7
operp; U+029B9
oplus; U+02295
or; U+02228
orarr; U+021BB
ord; U+02A5D
order; U+02134
orderof; U+02134
ordf; U+000AA ª
ordm; U+000BA º
origof; U+022B6
oror; U+02A56
orslope; U+02A57
orv; U+02A5B
oscr; U+02134
oslash; U+000F8 ø
osol; U+02298
otilde; U+000F5 õ
otimes; U+02297
otimesas; U+02A36
ouml; U+000F6 ö
ovbar; U+0233D
par; U+02225
para; U+000B6
parallel; U+02225
parsim; U+02AF3
parsl; U+02AFD
part; U+02202
pcy; U+0043F п
percnt; U+00025 %
period; U+0002E .
permil; U+02030
perp; U+022A5
pertenk; U+02031
pfr; U+1D52D 𝔭
phi; U+003C6 φ
phiv; U+003D5 ϕ
phmmat; U+02133
phone; U+0260E
pi; U+003C0 π
pitchfork; U+022D4
piv; U+003D6 ϖ
planck; U+0210F
planckh; U+0210E
plankv; U+0210F
plus; U+0002B +
plusacir; U+02A23
plusb; U+0229E
pluscir; U+02A22
plusdo; U+02214
plusdu; U+02A25
pluse; U+02A72
plusmn; U+000B1 ±
plussim; U+02A26
plustwo; U+02A27
pm; U+000B1 ±
pointint; U+02A15
popf; U+1D561 𝕡
pound; U+000A3 £
pr; U+0227A
prE; U+02AB3
prap; U+02AB7
prcue; U+0227C
pre; U+02AAF
prec; U+0227A
precapprox; U+02AB7
preccurlyeq; U+0227C
preceq; U+02AAF
precnapprox; U+02AB9
precneqq; U+02AB5
precnsim; U+022E8
precsim; U+0227E
prime; U+02032
primes; U+02119
prnE; U+02AB5
prnap; U+02AB9
prnsim; U+022E8
prod; U+0220F
profalar; U+0232E
profline; U+02312
profsurf; U+02313
prop; U+0221D
propto; U+0221D
prsim; U+0227E
prurel; U+022B0
pscr; U+1D4C5 𝓅
psi; U+003C8 ψ
puncsp; U+02008
qfr; U+1D52E 𝔮
qint; U+02A0C
qopf; U+1D562 𝕢
qprime; U+02057
qscr; U+1D4C6 𝓆
quaternions; U+0210D
quatint; U+02A16
quest; U+0003F ?
questeq; U+0225F
quot; U+00022 "
rAarr; U+021DB
rArr; U+021D2
rAtail; U+0291C
rBarr; U+0290F
rHar; U+02964
race; U+0223D U+00331 ∽̱
racute; U+00155 ŕ
radic; U+0221A
raemptyv; U+029B3
rang; U+027E9
rangd; U+02992
range; U+029A5
rangle; U+027E9
raquo; U+000BB »
rarr; U+02192
rarrap; U+02975
rarrb; U+021E5
rarrbfs; U+02920
rarrc; U+02933
rarrfs; U+0291E
rarrhk; U+021AA
rarrlp; U+021AC
rarrpl; U+02945
rarrsim; U+02974
rarrtl; U+021A3
rarrw; U+0219D
ratail; U+0291A
ratio; U+02236
rationals; U+0211A
rbarr; U+0290D
rbbrk; U+02773
rbrace; U+0007D }
rbrack; U+0005D ]
rbrke; U+0298C
rbrksld; U+0298E
rbrkslu; U+02990
rcaron; U+00159 ř
rcedil; U+00157 ŗ
rceil; U+02309
rcub; U+0007D }
rcy; U+00440 р
rdca; U+02937
rdldhar; U+02969
rdquo; U+0201D
rdquor; U+0201D
rdsh; U+021B3
real; U+0211C
realine; U+0211B
realpart; U+0211C
reals; U+0211D
rect; U+025AD
reg; U+000AE ®
rfisht; U+0297D
rfloor; U+0230B
rfr; U+1D52F 𝔯
rhard; U+021C1
rharu; U+021C0
rharul; U+0296C
rho; U+003C1 ρ
rhov; U+003F1 ϱ
rightarrow; U+02192
rightarrowtail; U+021A3
rightharpoondown; U+021C1
rightharpoonup; U+021C0
rightleftarrows; U+021C4
rightleftharpoons; U+021CC
rightrightarrows; U+021C9
rightsquigarrow; U+0219D
rightthreetimes; U+022CC
ring; U+002DA ˚
risingdotseq; U+02253
rlarr; U+021C4
rlhar; U+021CC
rlm; U+0200F
rmoust; U+023B1
rmoustache; U+023B1
rnmid; U+02AEE
roang; U+027ED
roarr; U+021FE
robrk; U+027E7
ropar; U+02986
ropf; U+1D563 𝕣
roplus; U+02A2E
rotimes; U+02A35
rpar; U+00029 )
rpargt; U+02994
rppolint; U+02A12
rrarr; U+021C9
rsaquo; U+0203A
rscr; U+1D4C7 𝓇
rsh; U+021B1
rsqb; U+0005D ]
rsquo; U+02019
rsquor; U+02019
rthree; U+022CC
rtimes; U+022CA
rtri; U+025B9
rtrie; U+022B5
rtrif; U+025B8
rtriltri; U+029CE
ruluhar; U+02968
rx; U+0211E
sacute; U+0015B ś
sbquo; U+0201A
sc; U+0227B
scE; U+02AB4
scap; U+02AB8
scaron; U+00161 š
sccue; U+0227D
sce; U+02AB0
scedil; U+0015F ş
scirc; U+0015D ŝ
scnE; U+02AB6
scnap; U+02ABA
scnsim; U+022E9
scpolint; U+02A13
scsim; U+0227F
scy; U+00441 с
sdot; U+022C5
sdotb; U+022A1
sdote; U+02A66
seArr; U+021D8
searhk; U+02925
searr; U+02198
searrow; U+02198
sect; U+000A7 §
semi; U+0003B ;
seswar; U+02929
setminus; U+02216
setmn; U+02216
sext; U+02736
sfr; U+1D530 𝔰
sfrown; U+02322
sharp; U+0266F
shchcy; U+00449 щ
shcy; U+00448 ш
shortmid; U+02223
shortparallel; U+02225
shy; U+000AD ­
sigma; U+003C3 σ
sigmaf; U+003C2 ς
sigmav; U+003C2 ς
sim; U+0223C
simdot; U+02A6A
sime; U+02243
simeq; U+02243
simg; U+02A9E
simgE; U+02AA0
siml; U+02A9D
simlE; U+02A9F
simne; U+02246
simplus; U+02A24
simrarr; U+02972
slarr; U+02190
smallsetminus; U+02216
smashp; U+02A33
smeparsl; U+029E4
smid; U+02223
smile; U+02323
smt; U+02AAA
smte; U+02AAC
smtes; U+02AAC U+0FE00 ⪬︀
softcy; U+0044C ь
sol; U+0002F /
solb; U+029C4
solbar; U+0233F
sopf; U+1D564 𝕤
spades; U+02660
spadesuit; U+02660
spar; U+02225
sqcap; U+02293
sqcaps; U+02293 U+0FE00 ⊓︀
sqcup; U+02294
sqcups; U+02294 U+0FE00 ⊔︀
sqsub; U+0228F
sqsube; U+02291
sqsubset; U+0228F
sqsubseteq; U+02291
sqsup; U+02290
sqsupe; U+02292
sqsupset; U+02290
sqsupseteq; U+02292
squ; U+025A1
square; U+025A1
squarf; U+025AA
squf; U+025AA
srarr; U+02192
sscr; U+1D4C8 𝓈
ssetmn; U+02216
ssmile; U+02323
sstarf; U+022C6
star; U+02606
starf; U+02605
straightepsilon; U+003F5 ϵ
straightphi; U+003D5 ϕ
strns; U+000AF ¯
sub; U+02282
subE; U+02AC5
subdot; U+02ABD
sube; U+02286
subedot; U+02AC3
submult; U+02AC1
subnE; U+02ACB
subne; U+0228A
subplus; U+02ABF ⪿
subrarr; U+02979
subset; U+02282
subseteq; U+02286
subseteqq; U+02AC5
subsetneq; U+0228A
subsetneqq; U+02ACB
subsim; U+02AC7
subsub; U+02AD5
subsup; U+02AD3
succ; U+0227B
succapprox; U+02AB8
succcurlyeq; U+0227D
succeq; U+02AB0
succnapprox; U+02ABA
succneqq; U+02AB6
succnsim; U+022E9
succsim; U+0227F
sum; U+02211
sung; U+0266A
sup; U+02283
sup1; U+000B9 ¹
sup2; U+000B2 ²
sup3; U+000B3 ³
supE; U+02AC6
supdot; U+02ABE
supdsub; U+02AD8
supe; U+02287
supedot; U+02AC4
suphsol; U+027C9
suphsub; U+02AD7
suplarr; U+0297B
supmult; U+02AC2
supnE; U+02ACC
supne; U+0228B
supplus; U+02AC0
supset; U+02283
supseteq; U+02287
supseteqq; U+02AC6
supsetneq; U+0228B
supsetneqq; U+02ACC
supsim; U+02AC8
supsub; U+02AD4
supsup; U+02AD6
swArr; U+021D9
swarhk; U+02926
swarr; U+02199
swarrow; U+02199
swnwar; U+0292A
szlig; U+000DF ß
target; U+02316
tau; U+003C4 τ
tbrk; U+023B4
tcaron; U+00165 ť
tcedil; U+00163 ţ
tcy; U+00442 т
tdot; U+020DB ◌⃛
telrec; U+02315
tfr; U+1D531 𝔱
there4; U+02234
therefore; U+02234
theta; U+003B8 θ
thetasym; U+003D1 ϑ
thetav; U+003D1 ϑ
thickapprox; U+02248
thicksim; U+0223C
thinsp; U+02009
thkap; U+02248
thksim; U+0223C
thorn; U+000FE þ
tilde; U+002DC ˜
times; U+000D7 ×
timesb; U+022A0
timesbar; U+02A31
timesd; U+02A30
tint; U+0222D
toea; U+02928
top; U+022A4
topbot; U+02336
topcir; U+02AF1
topf; U+1D565 𝕥
topfork; U+02ADA
tosa; U+02929
tprime; U+02034
trade; U+02122
triangle; U+025B5
triangledown; U+025BF
triangleleft; U+025C3
trianglelefteq; U+022B4
triangleq; U+0225C
triangleright; U+025B9
trianglerighteq; U+022B5
tridot; U+025EC
trie; U+0225C
triminus; U+02A3A
triplus; U+02A39
trisb; U+029CD
tritime; U+02A3B
trpezium; U+023E2
tscr; U+1D4C9 𝓉
tscy; U+00446 ц
tshcy; U+0045B ћ
tstrok; U+00167 ŧ
twixt; U+0226C
twoheadleftarrow; U+0219E
twoheadrightarrow; U+021A0
uArr; U+021D1
uHar; U+02963
uacute; U+000FA ú
uarr; U+02191
ubrcy; U+0045E ў
ubreve; U+0016D ŭ
ucirc; U+000FB û
ucy; U+00443 у
udarr; U+021C5
udblac; U+00171 ű
udhar; U+0296E
ufisht; U+0297E
ufr; U+1D532 𝔲
ugrave; U+000F9 ù
uharl; U+021BF
uharr; U+021BE
uhblk; U+02580
ulcorn; U+0231C
ulcorner; U+0231C
ulcrop; U+0230F
ultri; U+025F8
umacr; U+0016B ū
uml; U+000A8 ¨
uogon; U+00173 ų
uopf; U+1D566 𝕦
uparrow; U+02191
updownarrow; U+02195
upharpoonleft; U+021BF
upharpoonright; U+021BE
uplus; U+0228E
upsi; U+003C5 υ
upsih; U+003D2 ϒ
upsilon; U+003C5 υ
upuparrows; U+021C8
urcorn; U+0231D
urcorner; U+0231D
urcrop; U+0230E
uring; U+0016F ů
urtri; U+025F9
uscr; U+1D4CA 𝓊
utdot; U+022F0
utilde; U+00169 ũ
utri; U+025B5
utrif; U+025B4
uuarr; U+021C8
uuml; U+000FC ü
uwangle; U+029A7
vArr; U+021D5
vBar; U+02AE8
vBarv; U+02AE9
vDash; U+022A8
vangrt; U+0299C
varepsilon; U+003F5 ϵ
varkappa; U+003F0 ϰ
varnothing; U+02205
varphi; U+003D5 ϕ
varpi; U+003D6 ϖ
varpropto; U+0221D
varr; U+02195
varrho; U+003F1 ϱ
varsigma; U+003C2 ς
varsubsetneq; U+0228A U+0FE00 ⊊︀
varsubsetneqq; U+02ACB U+0FE00 ⫋︀
varsupsetneq; U+0228B U+0FE00 ⊋︀
varsupsetneqq; U+02ACC U+0FE00 ⫌︀
vartheta; U+003D1 ϑ
vartriangleleft; U+022B2
vartriangleright; U+022B3
vcy; U+00432 в
vdash; U+022A2
vee; U+02228
veebar; U+022BB
veeeq; U+0225A
vellip; U+022EE
verbar; U+0007C |
vert; U+0007C |
vfr; U+1D533 𝔳
vltri; U+022B2
vnsub; U+02282 U+020D2 ⊂⃒
vnsup; U+02283 U+020D2 ⊃⃒
vopf; U+1D567 𝕧
vprop; U+0221D
vrtri; U+022B3
vscr; U+1D4CB 𝓋
vsubnE; U+02ACB U+0FE00 ⫋︀
vsubne; U+0228A U+0FE00 ⊊︀
vsupnE; U+02ACC U+0FE00 ⫌︀
vsupne; U+0228B U+0FE00 ⊋︀
vzigzag; U+0299A
wcirc; U+00175 ŵ
wedbar; U+02A5F
wedge; U+02227
wedgeq; U+02259
weierp; U+02118
wfr; U+1D534 𝔴
wopf; U+1D568 𝕨
wp; U+02118
wr; U+02240
wreath; U+02240
wscr; U+1D4CC 𝓌
xcap; U+022C2
xcirc; U+025EF
xcup; U+022C3
xdtri; U+025BD
xfr; U+1D535 𝔵
xhArr; U+027FA
xharr; U+027F7
xi; U+003BE ξ
xlArr; U+027F8
xlarr; U+027F5
xmap; U+027FC
xnis; U+022FB
xodot; U+02A00
xopf; U+1D569 𝕩
xoplus; U+02A01
xotime; U+02A02
xrArr; U+027F9
xrarr; U+027F6
xscr; U+1D4CD 𝓍
xsqcup; U+02A06
xuplus; U+02A04
xutri; U+025B3
xvee; U+022C1
xwedge; U+022C0
yacute; U+000FD ý
yacy; U+0044F я
ycirc; U+00177 ŷ
ycy; U+0044B ы
yen; U+000A5 ¥
yfr; U+1D536 𝔶
yicy; U+00457 ї
yopf; U+1D56A 𝕪
yscr; U+1D4CE 𝓎
yucy; U+0044E ю
yuml; U+000FF ÿ
zacute; U+0017A ź
zcaron; U+0017E ž
zcy; U+00437 з
zdot; U+0017C ż
zeetrf; U+02128
zeta; U+003B6 ζ
zfr; U+1D537 𝔷
zhcy; U+00436 ж
zigrarr; U+021DD
zopf; U+1D56B 𝕫
zscr; U+1D4CF 𝓏
zwj; U+0200D
zwnj; U+0200C
AElig U+000C6 Æ
AMP U+00026 &
Aacute U+000C1 Á
Acirc U+000C2 Â
Agrave U+000C0 À
Aring U+000C5 Å
Atilde U+000C3 Ã
Auml U+000C4 Ä
COPY U+000A9 ©
Ccedil U+000C7 Ç
ETH U+000D0 Ð
Eacute U+000C9 É
Ecirc U+000CA Ê
Egrave U+000C8 È
Euml U+000CB Ë
GT U+0003E >
Iacute U+000CD Í
Icirc U+000CE Î
Igrave U+000CC Ì
Iuml U+000CF Ï
LT U+0003C <
Ntilde U+000D1 Ñ
Oacute U+000D3 Ó
Ocirc U+000D4 Ô
Ograve U+000D2 Ò
Oslash U+000D8 Ø
Otilde U+000D5 Õ
Ouml U+000D6 Ö
QUOT U+00022 "
REG U+000AE ®
THORN U+000DE Þ
Uacute U+000DA Ú
Ucirc U+000DB Û
Ugrave U+000D9 Ù
Uuml U+000DC Ü
Yacute U+000DD Ý
aacute U+000E1 á
acirc U+000E2 â
acute U+000B4 ´
aelig U+000E6 æ
agrave U+000E0 à
amp U+00026 &
aring U+000E5 å
atilde U+000E3 ã
auml U+000E4 ä
brvbar U+000A6 ¦
ccedil U+000E7 ç
cedil U+000B8 ¸
cent U+000A2 ¢
copy U+000A9 ©
curren U+000A4 ¤
deg U+000B0 °
divide U+000F7 ÷
eacute U+000E9 é
ecirc U+000EA ê
egrave U+000E8 è
eth U+000F0 ð
euml U+000EB ë
frac12 U+000BD ½
frac14 U+000BC ¼
frac34 U+000BE ¾
gt U+0003E >
iacute U+000ED í
icirc U+000EE î
iexcl U+000A1 ¡
igrave U+000EC ì
iquest U+000BF ¿
iuml U+000EF ï
laquo U+000AB «
lt U+0003C <
macr U+000AF ¯
micro U+000B5 µ
middot U+000B7 ·
nbsp U+000A0  
not U+000AC ¬
ntilde U+000F1 ñ
oacute U+000F3 ó
ocirc U+000F4 ô
ograve U+000F2 ò
ordf U+000AA ª
ordm U+000BA º
oslash U+000F8 ø
otilde U+000F5 õ
ouml U+000F6 ö
para U+000B6
plusmn U+000B1 ±
pound U+000A3 £
quot U+00022 "
raquo U+000BB »
reg U+000AE ®
sect U+000A7 §
shy U+000AD ­
sup1 U+000B9 ¹
sup2 U+000B2 ²
sup3 U+000B3 ³
szlig U+000DF ß
thorn U+000FE þ
times U+000D7 ×
uacute U+000FA ú
ucirc U+000FB û
ugrave U+000F9 ù
uml U+000A8 ¨
uuml U+000FC ü
yacute U+000FD ý
yen U+000A5 ¥
yuml U+000FF ÿ
"), set the escape flag to false. - - In any case, emit the input character as a character token. Stay - in the data state. - - EOF - Emit an end-of-file token. - - Anything else - Emit the input character as a character token. Stay in the data - state. - - 8.2.4.2 Character reference data state - - (This cannot happen if the content model flag is set to the CDATA - state.) - - Attempt to consume a character reference, with no additional allowed - character. - - If nothing is returned, emit a U+0026 AMPERSAND character token. - - Otherwise, emit the character token that was returned. - - Finally, switch to the data state. - - 8.2.4.3 Tag open state - - The behavior of this state depends on the content model flag. - - If the content model flag is set to the RCDATA or CDATA states - Consume the next input character. If it is a U+002F SOLIDUS (/) - character, switch to the close tag open state. Otherwise, emit a - U+003C LESS-THAN SIGN character token and reconsume the current - input character in the data state. - - If the content model flag is set to the PCDATA state - Consume the next input character: - - U+0021 EXCLAMATION MARK (!) - Switch to the markup declaration open state. - - U+002F SOLIDUS (/) - Switch to the close tag open state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL - LETTER Z - Create a new start tag token, set its tag name to the - lowercase version of the input character (add 0x0020 to - the character's code point), then switch to the tag name - state. (Don't emit the token yet; further details will be - filled in before it is emitted.) - - U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z - Create a new start tag token, set its tag name to the - input character, then switch to the tag name state. (Don't - emit the token yet; further details will be filled in - before it is emitted.) - - U+003E GREATER-THAN SIGN (>) - Parse error. Emit a U+003C LESS-THAN SIGN character token - and a U+003E GREATER-THAN SIGN character token. Switch to - the data state. - - U+003F QUESTION MARK (?) - Parse error. Switch to the bogus comment state. - - Anything else - Parse error. Emit a U+003C LESS-THAN SIGN character token - and reconsume the current input character in the data - state. - - 8.2.4.4 Close tag open state - - If the content model flag is set to the RCDATA or CDATA states but no - start tag token has ever been emitted by this instance of the tokeniser - (fragment case), or, if the content model flag is set to the RCDATA or - CDATA states and the next few characters do not match the tag name of - the last start tag token emitted (compared in an ASCII case-insensitive - manner), or if they do but they are not immediately followed by one of - the following characters: - * U+0009 CHARACTER TABULATION - * U+000A LINE FEED (LF) - * U+000C FORM FEED (FF) - * U+0020 SPACE - * U+003E GREATER-THAN SIGN (>) - * U+002F SOLIDUS (/) - * EOF - - ...then emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS - character token, and switch to the data state to process the next input - character. - - Otherwise, if the content model flag is set to the PCDATA state, or if - the next few characters do match that tag name, consume the next input - character: - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Create a new end tag token, set its tag name to the lowercase - version of the input character (add 0x0020 to the character's - code point), then switch to the tag name state. (Don't emit the - token yet; further details will be filled in before it is - emitted.) - - U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z - Create a new end tag token, set its tag name to the input - character, then switch to the tag name state. (Don't emit the - token yet; further details will be filled in before it is - emitted.) - - U+003E GREATER-THAN SIGN (>) - Parse error. Switch to the data state. - - EOF - Parse error. Emit a U+003C LESS-THAN SIGN character token and a - U+002F SOLIDUS character token. Reconsume the EOF character in - the data state. - - Anything else - Parse error. Switch to the bogus comment state. - - 8.2.4.5 Tag name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. - - U+002F SOLIDUS (/) - Switch to the self-closing start tag state. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Append the lowercase version of the current input character (add - 0x0020 to the character's code point) to the current tag token's - tag name. Stay in the tag name state. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Append the current input character to the current tag token's - tag name. Stay in the tag name state. - - 8.2.4.6 Before attribute name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute name state. - - U+002F SOLIDUS (/) - Switch to the self-closing start tag state. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Start a new attribute in the current tag token. Set that - attribute's name to the lowercase version of the current input - character (add 0x0020 to the character's code point), and its - value to the empty string. Switch to the attribute name state. - - U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - U+003D EQUALS SIGN (=) - Parse error. Treat it as per the "anything else" entry below. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Start a new attribute in the current tag token. Set that - attribute's name to the current input character, and its value - to the empty string. Switch to the attribute name state. - - 8.2.4.7 Attribute name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the after attribute name state. - - U+002F SOLIDUS (/) - Switch to the self-closing start tag state. - - U+003D EQUALS SIGN (=) - Switch to the before attribute value state. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Append the lowercase version of the current input character (add - 0x0020 to the character's code point) to the current attribute's - name. Stay in the attribute name state. - - U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - Parse error. Treat it as per the "anything else" entry below. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Append the current input character to the current attribute's - name. Stay in the attribute name state. - - When the user agent leaves the attribute name state (and before - emitting the tag token, if appropriate), the complete attribute's name - must be compared to the other attributes on the same token; if there is - already an attribute on the token with the exact same name, then this - is a parse error and the new attribute must be dropped, along with the - value that gets associated with it (if any). - - 8.2.4.8 After attribute name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after attribute name state. - - U+002F SOLIDUS (/) - Switch to the self-closing start tag state. - - U+003D EQUALS SIGN (=) - Switch to the before attribute value state. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Start a new attribute in the current tag token. Set that - attribute's name to the lowercase version of the current input - character (add 0x0020 to the character's code point), and its - value to the empty string. Switch to the attribute name state. - - U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - Parse error. Treat it as per the "anything else" entry below. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Start a new attribute in the current tag token. Set that - attribute's name to the current input character, and its value - to the empty string. Switch to the attribute name state. - - 8.2.4.9 Before attribute value state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute value state. - - U+0022 QUOTATION MARK (") - Switch to the attribute value (double-quoted) state. - - U+0026 AMPERSAND (&) - Switch to the attribute value (unquoted) state and reconsume - this input character. - - U+0027 APOSTROPHE (') - Switch to the attribute value (single-quoted) state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Emit the current tag token. Switch to the data - state. - - U+003D EQUALS SIGN (=) - Parse error. Treat it as per the "anything else" entry below. - - EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. - - Anything else - Append the current input character to the current attribute's - value. Switch to the attribute value (unquoted) state. - - 8.2.4.10 Attribute value (double-quoted) state - - Consume the next input character: - - U+0022 QUOTATION MARK (") - Switch to the after attribute value (quoted) state. - - U+0026 AMPERSAND (&) - Switch to the character reference in attribute value state, with - the additional allowed character being U+0022 QUOTATION MARK - ("). - - EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. - - Anything else - Append the current input character to the current attribute's - value. Stay in the attribute value (double-quoted) state. - - 8.2.4.11 Attribute value (single-quoted) state - - Consume the next input character: - - U+0027 APOSTROPHE (') - Switch to the after attribute value (quoted) state. - - U+0026 AMPERSAND (&) - Switch to the character reference in attribute value state, with - the additional allowed character being U+0027 APOSTROPHE ('). - - EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. - - Anything else - Append the current input character to the current attribute's - value. Stay in the attribute value (single-quoted) state. - - 8.2.4.12 Attribute value (unquoted) state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. - - U+0026 AMPERSAND (&) - Switch to the character reference in attribute value state, with - no additional allowed character. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - U+003D EQUALS SIGN (=) - Parse error. Treat it as per the "anything else" entry below. - - EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. - - Anything else - Append the current input character to the current attribute's - value. Stay in the attribute value (unquoted) state. - - 8.2.4.13 Character reference in attribute value state - - Attempt to consume a character reference. - - If nothing is returned, append a U+0026 AMPERSAND character to the - current attribute's value. - - Otherwise, append the returned character token to the current - attribute's value. - - Finally, switch back to the attribute value state that you were in when - were switched into this state. - - 8.2.4.14 After attribute value (quoted) state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. - - U+002F SOLIDUS (/) - Switch to the self-closing start tag state. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Parse error. Reconsume the character in the before attribute - name state. - - 8.2.4.15 Self-closing start tag state - - Consume the next input character: - - U+003E GREATER-THAN SIGN (>) - Set the self-closing flag of the current tag token. Emit the - current tag token. Switch to the data state. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Parse error. Reconsume the character in the before attribute - name state. - - 8.2.4.16 Bogus comment state - - (This can only happen if the content model flag is set to the PCDATA - state.) - - Consume every character up to and including the first U+003E - GREATER-THAN SIGN character (>) or the end of the file (EOF), whichever - comes first. Emit a comment token whose data is the concatenation of - all the characters starting from and including the character that - caused the state machine to switch into the bogus comment state, up to - and including the character immediately before the last consumed - character (i.e. up to the character just before the U+003E or EOF - character). (If the comment was started by the end of the file (EOF), - the token is empty.) - - Switch to the data state. - - If the end of the file was reached, reconsume the EOF character. - - 8.2.4.17 Markup declaration open state - - (This can only happen if the content model flag is set to the PCDATA - state.) - - If the next two characters are both U+002D HYPHEN-MINUS (-) characters, - consume those two characters, create a comment token whose data is the - empty string, and switch to the comment start state. - - Otherwise, if the next seven characters are an ASCII case-insensitive - match for the word "DOCTYPE", then consume those characters and switch - to the DOCTYPE state. - - Otherwise, if the insertion mode is "in foreign content" and the - current node is not an element in the HTML namespace and the next seven - characters are an ASCII case-sensitive match for the string "[CDATA[" - (the five uppercase letters "CDATA" with a U+005B LEFT SQUARE BRACKET - character before and after), then consume those characters and switch - to the CDATA section state (which is unrelated to the content model - flag's CDATA state). - - Otherwise, this is a parse error. Switch to the bogus comment state. - The next character that is consumed, if any, is the first character - that will be in the comment. - - 8.2.4.18 Comment start state - - Consume the next input character: - - U+002D HYPHEN-MINUS (-) - Switch to the comment start dash state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Emit the comment token. Switch to the data state. - - EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. - - Anything else - Append the input character to the comment token's data. Switch - to the comment state. - - 8.2.4.19 Comment start dash state - - Consume the next input character: - - U+002D HYPHEN-MINUS (-) - Switch to the comment end state - - U+003E GREATER-THAN SIGN (>) - Parse error. Emit the comment token. Switch to the data state. - - EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. - - Anything else - Append a U+002D HYPHEN-MINUS (-) character and the input - character to the comment token's data. Switch to the comment - state. - - 8.2.4.20 Comment state - - Consume the next input character: - - U+002D HYPHEN-MINUS (-) - Switch to the comment end dash state - - EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. - - Anything else - Append the input character to the comment token's data. Stay in - the comment state. - - 8.2.4.21 Comment end dash state - - Consume the next input character: - - U+002D HYPHEN-MINUS (-) - Switch to the comment end state - - EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. - - Anything else - Append a U+002D HYPHEN-MINUS (-) character and the input - character to the comment token's data. Switch to the comment - state. - - 8.2.4.22 Comment end state - - Consume the next input character: - - U+003E GREATER-THAN SIGN (>) - Emit the comment token. Switch to the data state. - - U+002D HYPHEN-MINUS (-) - Parse error. Append a U+002D HYPHEN-MINUS (-) character to the - comment token's data. Stay in the comment end state. - - EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. - - Anything else - Parse error. Append two U+002D HYPHEN-MINUS (-) characters and - the input character to the comment token's data. Switch to the - comment state. - - 8.2.4.23 DOCTYPE state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before DOCTYPE name state. - - Anything else - Parse error. Reconsume the current character in the before - DOCTYPE name state. - - 8.2.4.24 Before DOCTYPE name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before DOCTYPE name state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Create a new DOCTYPE token. Set its force-quirks - flag to on. Emit the token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Create a new DOCTYPE token. Set the token's name to the - lowercase version of the input character (add 0x0020 to the - character's code point). Switch to the DOCTYPE name state. - - EOF - Parse error. Create a new DOCTYPE token. Set its force-quirks - flag to on. Emit the token. Reconsume the EOF character in the - data state. - - Anything else - Create a new DOCTYPE token. Set the token's name to the current - input character. Switch to the DOCTYPE name state. - - 8.2.4.25 DOCTYPE name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the after DOCTYPE name state. - - U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Append the lowercase version of the input character (add 0x0020 - to the character's code point) to the current DOCTYPE token's - name. Stay in the DOCTYPE name state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Append the current input character to the current DOCTYPE - token's name. Stay in the DOCTYPE name state. - - 8.2.4.26 After DOCTYPE name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after DOCTYPE name state. - - U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - If the six characters starting from the current input character - are an ASCII case-insensitive match for the word "PUBLIC", then - consume those characters and switch to the before DOCTYPE public - identifier state. - - Otherwise, if the six characters starting from the current input - character are an ASCII case-insensitive match for the word - "SYSTEM", then consume those characters and switch to the before - DOCTYPE system identifier state. - - Otherwise, this is the parse error. Set the DOCTYPE token's - force-quirks flag to on. Switch to the bogus DOCTYPE state. - - 8.2.4.27 Before DOCTYPE public identifier state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before DOCTYPE public identifier state. - - U+0022 QUOTATION MARK (") - Set the DOCTYPE token's public identifier to the empty string - (not missing), then switch to the DOCTYPE public identifier - (double-quoted) state. - - U+0027 APOSTROPHE (') - Set the DOCTYPE token's public identifier to the empty string - (not missing), then switch to the DOCTYPE public identifier - (single-quoted) state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Switch to the bogus DOCTYPE state. - - 8.2.4.28 DOCTYPE public identifier (double-quoted) state - - Consume the next input character: - - U+0022 QUOTATION MARK (") - Switch to the after DOCTYPE public identifier state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Append the current input character to the current DOCTYPE - token's public identifier. Stay in the DOCTYPE public identifier - (double-quoted) state. - - 8.2.4.29 DOCTYPE public identifier (single-quoted) state - - Consume the next input character: - - U+0027 APOSTROPHE (') - Switch to the after DOCTYPE public identifier state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Append the current input character to the current DOCTYPE - token's public identifier. Stay in the DOCTYPE public identifier - (single-quoted) state. - - 8.2.4.30 After DOCTYPE public identifier state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after DOCTYPE public identifier state. - - U+0022 QUOTATION MARK (") - Set the DOCTYPE token's system identifier to the empty string - (not missing), then switch to the DOCTYPE system identifier - (double-quoted) state. - - U+0027 APOSTROPHE (') - Set the DOCTYPE token's system identifier to the empty string - (not missing), then switch to the DOCTYPE system identifier - (single-quoted) state. - - U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Switch to the bogus DOCTYPE state. - - 8.2.4.31 Before DOCTYPE system identifier state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before DOCTYPE system identifier state. - - U+0022 QUOTATION MARK (") - Set the DOCTYPE token's system identifier to the empty string - (not missing), then switch to the DOCTYPE system identifier - (double-quoted) state. - - U+0027 APOSTROPHE (') - Set the DOCTYPE token's system identifier to the empty string - (not missing), then switch to the DOCTYPE system identifier - (single-quoted) state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Switch to the bogus DOCTYPE state. - - 8.2.4.32 DOCTYPE system identifier (double-quoted) state - - Consume the next input character: - - U+0022 QUOTATION MARK (") - Switch to the after DOCTYPE system identifier state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Append the current input character to the current DOCTYPE - token's system identifier. Stay in the DOCTYPE system identifier - (double-quoted) state. - - 8.2.4.33 DOCTYPE system identifier (single-quoted) state - - Consume the next input character: - - U+0027 APOSTROPHE (') - Switch to the after DOCTYPE system identifier state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Append the current input character to the current DOCTYPE - token's system identifier. Stay in the DOCTYPE system identifier - (single-quoted) state. - - 8.2.4.34 After DOCTYPE system identifier state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after DOCTYPE system identifier state. - - U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Parse error. Switch to the bogus DOCTYPE state. (This does not - set the DOCTYPE token's force-quirks flag to on.) - - 8.2.4.35 Bogus DOCTYPE state - - Consume the next input character: - - U+003E GREATER-THAN SIGN (>) - Emit the DOCTYPE token. Switch to the data state. - - EOF - Emit the DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Stay in the bogus DOCTYPE state. - - 8.2.4.36 CDATA section state - - (This can only happen if the content model flag is set to the PCDATA - state, and is unrelated to the content model flag's CDATA state.) - - Consume every character up to the next occurrence of the three - character sequence U+005D RIGHT SQUARE BRACKET U+005D RIGHT SQUARE - BRACKET U+003E GREATER-THAN SIGN (]]>), or the end of the file (EOF), - whichever comes first. Emit a series of character tokens consisting of - all the characters consumed except the matching three character - sequence at the end (if one was found before the end of the file). - - Switch to the data state. - - If the end of the file was reached, reconsume the EOF character. - - 8.2.4.37 Tokenizing character references - - This section defines how to consume a character reference. This - definition is used when parsing character references in text and in - attributes. - - The behavior depends on the identity of the next character (the one - immediately after the U+0026 AMPERSAND character): - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - U+003C LESS-THAN SIGN - U+0026 AMPERSAND - EOF - The additional allowed character, if there is one - Not a character reference. No characters are consumed, and - nothing is returned. (This is not an error, either.) - - U+0023 NUMBER SIGN (#) - Consume the U+0023 NUMBER SIGN. - - The behavior further depends on the character after the U+0023 - NUMBER SIGN: - - U+0078 LATIN SMALL LETTER X - U+0058 LATIN CAPITAL LETTER X - Consume the X. - - Follow the steps below, but using the range of characters - U+0030 DIGIT ZERO through to U+0039 DIGIT NINE, U+0061 - LATIN SMALL LETTER A through to U+0066 LATIN SMALL LETTER - F, and U+0041 LATIN CAPITAL LETTER A, through to U+0046 - LATIN CAPITAL LETTER F (in other words, 0-9, A-F, a-f). - - When it comes to interpreting the number, interpret it as - a hexadecimal number. - - Anything else - Follow the steps below, but using the range of characters - U+0030 DIGIT ZERO through to U+0039 DIGIT NINE (i.e. just - 0-9). - - When it comes to interpreting the number, interpret it as - a decimal number. - - Consume as many characters as match the range of characters - given above. - - If no characters match the range, then don't consume any - characters (and unconsume the U+0023 NUMBER SIGN character and, - if appropriate, the X character). This is a parse error; nothing - is returned. - - Otherwise, if the next character is a U+003B SEMICOLON, consume - that too. If it isn't, there is a parse error. - - If one or more characters match the range, then take them all - and interpret the string of characters as a number (either - hexadecimal or decimal as appropriate). - - If that number is one of the numbers in the first column of the - following table, then this is a parse error. Find the row with - that number in the first column, and return a character token - for the Unicode character given in the second column of that - row. - - Number Unicode character - 0x0D U+000A LINE FEED (LF) - 0x80 U+20AC EURO SIGN ('€') - 0x81 U+FFFD REPLACEMENT CHARACTER - 0x82 U+201A SINGLE LOW-9 QUOTATION MARK ('‚') - 0x83 U+0192 LATIN SMALL LETTER F WITH HOOK ('ƒ') - 0x84 U+201E DOUBLE LOW-9 QUOTATION MARK ('„') - 0x85 U+2026 HORIZONTAL ELLIPSIS ('…') - 0x86 U+2020 DAGGER ('†') - 0x87 U+2021 DOUBLE DAGGER ('‡') - 0x88 U+02C6 MODIFIER LETTER CIRCUMFLEX ACCENT ('ˆ') - 0x89 U+2030 PER MILLE SIGN ('‰') - 0x8A U+0160 LATIN CAPITAL LETTER S WITH CARON ('Š') - 0x8B U+2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('‹') - 0x8C U+0152 LATIN CAPITAL LIGATURE OE ('Œ') - 0x8D U+FFFD REPLACEMENT CHARACTER - 0x8E U+017D LATIN CAPITAL LETTER Z WITH CARON ('Ž') - 0x8F U+FFFD REPLACEMENT CHARACTER - 0x90 U+FFFD REPLACEMENT CHARACTER - 0x91 U+2018 LEFT SINGLE QUOTATION MARK ('‘') - 0x92 U+2019 RIGHT SINGLE QUOTATION MARK ('’') - 0x93 U+201C LEFT DOUBLE QUOTATION MARK ('“') - 0x94 U+201D RIGHT DOUBLE QUOTATION MARK ('”') - 0x95 U+2022 BULLET ('•') - 0x96 U+2013 EN DASH ('–') - 0x97 U+2014 EM DASH ('—') - 0x98 U+02DC SMALL TILDE ('˜') - 0x99 U+2122 TRADE MARK SIGN ('™') - 0x9A U+0161 LATIN SMALL LETTER S WITH CARON ('š') - 0x9B U+203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('›') - 0x9C U+0153 LATIN SMALL LIGATURE OE ('œ') - 0x9D U+FFFD REPLACEMENT CHARACTER - 0x9E U+017E LATIN SMALL LETTER Z WITH CARON ('ž') - 0x9F U+0178 LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ') - - Otherwise, if the number is in the range 0x0000 to 0x0008, - 0x000E to 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF, 0xFDD0 to - 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, - 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, - 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, - 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, - 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, - 0x10FFFE, or 0x10FFFF, or is higher than 0x10FFFF, then this is - a parse error; return a character token for the U+FFFD - REPLACEMENT CHARACTER character instead. - - Otherwise, return a character token for the Unicode character - whose code point is that number. - - Anything else - Consume the maximum number of characters possible, with the - consumed characters matching one of the identifiers in the first - column of the named character references table (in a - case-sensitive manner). - - If no match can be made, then this is a parse error. No - characters are consumed, and nothing is returned. - - If the last character matched is not a U+003B SEMICOLON (;), - there is a parse error. - - If the character reference is being consumed as part of an - attribute, and the last character matched is not a U+003B - SEMICOLON (;), and the next character is in the range U+0030 - DIGIT ZERO to U+0039 DIGIT NINE, U+0041 LATIN CAPITAL LETTER A - to U+005A LATIN CAPITAL LETTER Z, or U+0061 LATIN SMALL LETTER A - to U+007A LATIN SMALL LETTER Z, then, for historical reasons, - all the characters that were matched after the U+0026 AMPERSAND - (&) must be unconsumed, and nothing is returned. - - Otherwise, return a character token for the character - corresponding to the character reference name (as given by the - second column of the named character references table). - - If the markup contains I'm ¬it; I tell you, the character - reference is parsed as "not", as in, I'm ¬it; I tell you. But if - the markup was I'm ∉ I tell you, the character reference - would be parsed as "notin;", resulting in I'm ∉ I tell you. diff --git a/doc/tree-construction.txt b/doc/tree-construction.txt deleted file mode 100644 index 0febf147..00000000 --- a/doc/tree-construction.txt +++ /dev/null @@ -1,2201 +0,0 @@ - #8.2.4 Tokenization Table of contents 8.4 Serializing HTML fragments - - WHATWG - -HTML 5 - -Draft Recommendation — 13 January 2009 - - ← 8.2.4 Tokenization – Table of contents – 8.4 Serializing HTML - fragments → - - 8.2.5 Tree construction - - The input to the tree construction stage is a sequence of tokens from - the tokenization stage. The tree construction stage is associated with - a DOM Document object when a parser is created. The "output" of this - stage consists of dynamically modifying or extending that document's - DOM tree. - - This specification does not define when an interactive user agent has - to render the Document so that it is available to the user, or when it - has to begin accepting user input. - - As each token is emitted from the tokeniser, the user agent must - process the token according to the rules given in the section - corresponding to the current insertion mode. - - When the steps below require the UA to insert a character into a node, - if that node has a child immediately before where the character is to - be inserted, and that child is a Text node, and that Text node was the - last node that the parser inserted into the document, then the - character must be appended to that Text node; otherwise, a new Text - node whose data is just that character must be inserted in the - appropriate place. - - DOM mutation events must not fire for changes caused by the UA parsing - the document. (Conceptually, the parser is not mutating the DOM, it is - constructing it.) This includes the parsing of any content inserted - using document.write() and document.writeln() calls. [DOM3EVENTS] - - Not all of the tag names mentioned below are conformant tag names in - this specification; many are included to handle legacy content. They - still form part of the algorithm that implementations are required to - implement to claim conformance. - - The algorithm described below places no limit on the depth of the DOM - tree generated, or on the length of tag names, attribute names, - attribute values, text nodes, etc. While implementors are encouraged to - avoid arbitrary limits, it is recognized that practical concerns will - likely force user agents to impose nesting depths. - - 8.2.5.1 Creating and inserting elements - - When the steps below require the UA to create an element for a token in - a particular namespace, the UA must create a node implementing the - interface appropriate for the element type corresponding to the tag - name of the token in the given namespace (as given in the specification - that defines that element, e.g. for an a element in the HTML namespace, - this specification defines it to be the HTMLAnchorElement interface), - with the tag name being the name of that element, with the node being - in the given namespace, and with the attributes on the node being those - given in the given token. - - The interface appropriate for an element in the HTML namespace that is - not defined in this specification is HTMLElement. The interface - appropriate for an element in another namespace that is not defined by - that namespace's specification is Element. - - When a resettable element is created in this manner, its reset - algorithm must be invoked once the attributes are set. (This - initializes the element's value and checkedness based on the element's - attributes.) - __________________________________________________________________ - - When the steps below require the UA to insert an HTML element for a - token, the UA must first create an element for the token in the HTML - namespace, and then append this node to the current node, and push it - onto the stack of open elements so that it is the new current node. - - The steps below may also require that the UA insert an HTML element in - a particular place, in which case the UA must follow the same steps - except that it must insert or append the new node in the location - specified instead of appending it to the current node. (This happens in - particular during the parsing of tables with invalid content.) - - If an element created by the insert an HTML element algorithm is a - form-associated element, and the form element pointer is not null, and - the newly created element doesn't have a form attribute, the user agent - must associate the newly created element with the form element pointed - to by the form element pointer before inserting it wherever it is to be - inserted. - __________________________________________________________________ - - When the steps below require the UA to insert a foreign element for a - token, the UA must first create an element for the token in the given - namespace, and then append this node to the current node, and push it - onto the stack of open elements so that it is the new current node. If - the newly created element has an xmlns attribute in the XMLNS namespace - whose value is not exactly the same as the element's namespace, that is - a parse error. - - When the steps below require the user agent to adjust MathML attributes - for a token, then, if the token has an attribute named definitionurl, - change its name to definitionURL (note the case difference). - - When the steps below require the user agent to adjust foreign - attributes for a token, then, if any of the attributes on the token - match the strings given in the first column of the following table, let - the attribute be a namespaced attribute, with the prefix being the - string given in the corresponding cell in the second column, the local - name being the string given in the corresponding cell in the third - column, and the namespace being the namespace given in the - corresponding cell in the fourth column. (This fixes the use of - namespaced attributes, in particular xml:lang.) - - Attribute name Prefix Local name Namespace - xlink:actuate xlink actuate XLink namespace - xlink:arcrole xlink arcrole XLink namespace - xlink:href xlink href XLink namespace - xlink:role xlink role XLink namespace - xlink:show xlink show XLink namespace - xlink:title xlink title XLink namespace - xlink:type xlink type XLink namespace - xml:base xml base XML namespace - xml:lang xml lang XML namespace - xml:space xml space XML namespace - xmlns (none) xmlns XMLNS namespace - xmlns:xlink xmlns xlink XMLNS namespace - __________________________________________________________________ - - The generic CDATA element parsing algorithm and the generic RCDATA - element parsing algorithm consist of the following steps. These - algorithms are always invoked in response to a start tag token. - 1. Insert an HTML element for the token. - 2. If the algorithm that was invoked is the generic CDATA element - parsing algorithm, switch the tokeniser's content model flag to the - CDATA state; otherwise the algorithm invoked was the generic RCDATA - element parsing algorithm, switch the tokeniser's content model - flag to the RCDATA state. - 3. Let the original insertion mode be the current insertion mode. - 4. Then, switch the insertion mode to "in CDATA/RCDATA". - - 8.2.5.2 Closing elements that have implied end tags - - When the steps below require the UA to generate implied end tags, then, - while the current node is a dd element, a dt element, an li element, an - option element, an optgroup element, a p element, an rp element, or an - rt element, the UA must pop the current node off the stack of open - elements. - - If a step requires the UA to generate implied end tags but lists an - element to exclude from the process, then the UA must perform the above - steps as if that element was not in the above list. - - 8.2.5.3 Foster parenting - - Foster parenting happens when content is misnested in tables. - - When a node node is to be foster parented, the node node must be - inserted into the foster parent element, and the current table must be - marked as tainted. (Once the current table has been tainted, whitespace - characters are inserted into the foster parent element instead of the - current node.) - - The foster parent element is the parent element of the last table - element in the stack of open elements, if there is a table element and - it has such a parent element. If there is no table element in the stack - of open elements (fragment case), then the foster parent element is the - first element in the stack of open elements (the html element). - Otherwise, if there is a table element in the stack of open elements, - but the last table element in the stack of open elements has no parent, - or its parent node is not an element, then the foster parent element is - the element before the last table element in the stack of open - elements. - - If the foster parent element is the parent element of the last table - element in the stack of open elements, then node must be inserted - immediately before the last table element in the stack of open elements - in the foster parent element; otherwise, node must be appended to the - foster parent element. - - 8.2.5.4 The "initial" insertion mode - - When the insertion mode is "initial", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Ignore the token. - - A comment token - Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - If the DOCTYPE token's name is not a case-sensitive match for - the string "html", or if the token's public identifier is - neither missing nor a case-sensitive match for the string - "XSLT-compat", or if the token's system identifier is not - missing, then there is a parse error (this is the DOCTYPE parse - error). Conformance checkers may, instead of reporting this - error, switch to a conformance checking mode for another - language (e.g. based on the DOCTYPE token a conformance checker - could recognize that the document is an HTML4-era document, and - defer to an HTML4 conformance checker.) - - Append a DocumentType node to the Document node, with the name - attribute set to the name given in the DOCTYPE token; the - publicId attribute set to the public identifier given in the - DOCTYPE token, or the empty string if the public identifier was - missing; the systemId attribute set to the system identifier - given in the DOCTYPE token, or the empty string if the system - identifier was missing; and the other attributes specific to - DocumentType objects set to null and empty lists as appropriate. - Associate the DocumentType node with the Document object so that - it is returned as the value of the doctype attribute of the - Document object. - - Then, if the DOCTYPE token matches one of the conditions in the - following list, then set the document to quirks mode: - - + The force-quirks flag is set to on. - + The name is set to anything other than "HTML". - + The public identifier starts with: "+//Silmaril//dtd html Pro - v0r11 19970101//" - + The public identifier starts with: "-//AdvaSoft Ltd//DTD HTML - 3.0 asWedit + extensions//" - + The public identifier starts with: "-//AS//DTD HTML 3.0 - asWedit + extensions//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0 - Level 1//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0 - Level 2//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0 - Strict Level 1//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0 - Strict Level 2//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0 - Strict//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0//" - + The public identifier starts with: "-//IETF//DTD HTML 2.1E//" - + The public identifier starts with: "-//IETF//DTD HTML 3.0//" - + The public identifier starts with: "-//IETF//DTD HTML 3.2 - Final//" - + The public identifier starts with: "-//IETF//DTD HTML 3.2//" - + The public identifier starts with: "-//IETF//DTD HTML 3//" - + The public identifier starts with: "-//IETF//DTD HTML Level - 0//" - + The public identifier starts with: "-//IETF//DTD HTML Level - 1//" - + The public identifier starts with: "-//IETF//DTD HTML Level - 2//" - + The public identifier starts with: "-//IETF//DTD HTML Level - 3//" - + The public identifier starts with: "-//IETF//DTD HTML Strict - Level 0//" - + The public identifier starts with: "-//IETF//DTD HTML Strict - Level 1//" - + The public identifier starts with: "-//IETF//DTD HTML Strict - Level 2//" - + The public identifier starts with: "-//IETF//DTD HTML Strict - Level 3//" - + The public identifier starts with: "-//IETF//DTD HTML - Strict//" - + The public identifier starts with: "-//IETF//DTD HTML//" - + The public identifier starts with: "-//Metrius//DTD Metrius - Presentational//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 2.0 HTML Strict//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 2.0 HTML//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 2.0 Tables//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 3.0 HTML Strict//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 3.0 HTML//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 3.0 Tables//" - + The public identifier starts with: "-//Netscape Comm. - Corp.//DTD HTML//" - + The public identifier starts with: "-//Netscape Comm. - Corp.//DTD Strict HTML//" - + The public identifier starts with: "-//O'Reilly and - Associates//DTD HTML 2.0//" - + The public identifier starts with: "-//O'Reilly and - Associates//DTD HTML Extended 1.0//" - + The public identifier starts with: "-//O'Reilly and - Associates//DTD HTML Extended Relaxed 1.0//" - + The public identifier starts with: "-//SoftQuad Software//DTD - HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//" - + The public identifier starts with: "-//SoftQuad//DTD HoTMetaL - PRO 4.0::19971010::extensions to HTML 4.0//" - + The public identifier starts with: "-//Spyglass//DTD HTML 2.0 - Extended//" - + The public identifier starts with: "-//SQ//DTD HTML 2.0 - HoTMetaL + extensions//" - + The public identifier starts with: "-//Sun Microsystems - Corp.//DTD HotJava HTML//" - + The public identifier starts with: "-//Sun Microsystems - Corp.//DTD HotJava Strict HTML//" - + The public identifier starts with: "-//W3C//DTD HTML 3 - 1995-03-24//" - + The public identifier starts with: "-//W3C//DTD HTML 3.2 - Draft//" - + The public identifier starts with: "-//W3C//DTD HTML 3.2 - Final//" - + The public identifier starts with: "-//W3C//DTD HTML 3.2//" - + The public identifier starts with: "-//W3C//DTD HTML 3.2S - Draft//" - + The public identifier starts with: "-//W3C//DTD HTML 4.0 - Frameset//" - + The public identifier starts with: "-//W3C//DTD HTML 4.0 - Transitional//" - + The public identifier starts with: "-//W3C//DTD HTML - Experimental 19960712//" - + The public identifier starts with: "-//W3C//DTD HTML - Experimental 970421//" - + The public identifier starts with: "-//W3C//DTD W3 HTML//" - + The public identifier starts with: "-//W3O//DTD W3 HTML 3.0//" - + The public identifier is set to: "-//W3O//DTD W3 HTML Strict - 3.0//EN//" - + The public identifier starts with: "-//WebTechs//DTD Mozilla - HTML 2.0//" - + The public identifier starts with: "-//WebTechs//DTD Mozilla - HTML//" - + The public identifier is set to: "-/W3C/DTD HTML 4.0 - Transitional/EN" - + The public identifier is set to: "HTML" - + The system identifier is set to: - "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" - + The system identifier is missing and the public identifier - starts with: "-//W3C//DTD HTML 4.01 Frameset//" - + The system identifier is missing and the public identifier - starts with: "-//W3C//DTD HTML 4.01 Transitional//" - - Otherwise, if the DOCTYPE token matches one of the conditions in - the following list, then set the document to limited quirks - mode: - - + The public identifier starts with: "-//W3C//DTD XHTML 1.0 - Frameset//" - + The public identifier starts with: "-//W3C//DTD XHTML 1.0 - Transitional//" - + The system identifier is not missing and the public identifier - starts with: "-//W3C//DTD HTML 4.01 Frameset//" - + The system identifier is not missing and the public identifier - starts with: "-//W3C//DTD HTML 4.01 Transitional//" - - The name, system identifier, and public identifier strings must - be compared to the values given in the lists above in an ASCII - case-insensitive manner. A system identifier whose value is the - empty string is not considered missing for the purposes of the - conditions above. - - Then, switch the insertion mode to "before html". - - Anything else - Parse error. - - Set the document to quirks mode. - - Switch the insertion mode to "before html", then reprocess the - current token. - - 8.2.5.5 The "before html" insertion mode - - When the insertion mode is "before html", tokens must be handled as - follows: - - A DOCTYPE token - Parse error. Ignore the token. - - A comment token - Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Ignore the token. - - A start tag whose tag name is "html" - Create an element for the token in the HTML namespace. Append it - to the Document object. Put this element in the stack of open - elements. - - If the token has an attribute "manifest", then resolve the value - of that attribute to an absolute URL, and if that is successful, - run the application cache selection algorithm with the resulting - absolute URL. Otherwise, if there is no such attribute or - resolving it fails, run the application cache selection - algorithm with no manifest. The algorithm must be passed the - Document object. - - Switch the insertion mode to "before head". - - Anything else - Create an HTMLElement node with the tag name html, in the HTML - namespace. Append it to the Document object. Put this element in - the stack of open elements. - - Run the application cache selection algorithm with no manifest, - passing it the Document object. - - Switch the insertion mode to "before head", then reprocess the - current token. - - Should probably make end tags be ignored, so that "" puts the comment before the root node (or should we?) - - The root element can end up being removed from the Document object, - e.g. by scripts; nothing in particular happens in such cases, content - continues being appended to the nodes as described in the next section. - - 8.2.5.6 The "before head" insertion mode - - When the insertion mode is "before head", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Ignore the token. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is "head" - Insert an HTML element for the token. - - Set the head element pointer to the newly created head element. - - Switch the insertion mode to "in head". - - An end tag whose tag name is one of: "head", "br" - Act as if a start tag token with the tag name "head" and no - attributes had been seen, then reprocess the current token. - - Any other end tag - Parse error. Ignore the token. - - Anything else - Act as if a start tag token with the tag name "head" and no - attributes had been seen, then reprocess the current token. - - This will result in an empty head element being generated, with - the current token being reprocessed in the "after head" - insertion mode. - - 8.2.5.7 The "in head" insertion mode - - When the insertion mode is "in head", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is one of: "base", "command", "eventsource", - "link" - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - A start tag whose tag name is "meta" - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - If the element has a charset attribute, and its value is a - supported encoding, and the confidence is currently tentative, - then change the encoding to the encoding given by the value of - the charset attribute. - - Otherwise, if the element has a content attribute, and applying - the algorithm for extracting an encoding from a Content-Type to - its value returns a supported encoding encoding, and the - confidence is currently tentative, then change the encoding to - the encoding encoding. - - A start tag whose tag name is "title" - Follow the generic RCDATA element parsing algorithm. - - A start tag whose tag name is "noscript", if the scripting flag is - enabled - - A start tag whose tag name is one of: "noframes", "style" - Follow the generic CDATA element parsing algorithm. - - A start tag whose tag name is "noscript", if the scripting flag is - disabled - Insert an HTML element for the token. - - Switch the insertion mode to "in head noscript". - - A start tag whose tag name is "script" - - 1. Create an element for the token in the HTML namespace. - 2. Mark the element as being "parser-inserted". - This ensures that, if the script is external, any - document.write() calls in the script will execute in-line, - instead of blowing the document away, as would happen in most - other cases. It also prevents the script from executing until - the end tag is seen. - 3. If the parser was originally created for the HTML fragment - parsing algorithm, then mark the script element as "already - executed". (fragment case) - 4. Append the new element to the current node. - 5. Switch the tokeniser's content model flag to the CDATA state. - 6. Let the original insertion mode be the current insertion mode. - 7. Switch the insertion mode to "in CDATA/RCDATA". - - An end tag whose tag name is "head" - Pop the current node (which will be the head element) off the - stack of open elements. - - Switch the insertion mode to "after head". - - An end tag whose tag name is "br" - Act as described in the "anything else" entry below. - - A start tag whose tag name is "head" - Any other end tag - Parse error. Ignore the token. - - Anything else - Act as if an end tag token with the tag name "head" had been - seen, and reprocess the current token. - - In certain UAs, some elements don't trigger the "in body" mode - straight away, but instead get put into the head. Do we want to - copy that? - - 8.2.5.8 The "in head noscript" insertion mode - - When the insertion mode is "in head noscript", tokens must be handled - as follows: - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - An end tag whose tag name is "noscript" - Pop the current node (which will be a noscript element) from the - stack of open elements; the new current node will be a head - element. - - Switch the insertion mode to "in head". - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - - A comment token - A start tag whose tag name is one of: "link", "meta", "noframes", - "style" - Process the token using the rules for the "in head" insertion - mode. - - An end tag whose tag name is "br" - Act as described in the "anything else" entry below. - - A start tag whose tag name is one of: "head", "noscript" - Any other end tag - Parse error. Ignore the token. - - Anything else - Parse error. Act as if an end tag with the tag name "noscript" - had been seen and reprocess the current token. - - 8.2.5.9 The "after head" insertion mode - - When the insertion mode is "after head", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is "body" - Insert an HTML element for the token. - - Switch the insertion mode to "in body". - - A start tag whose tag name is "frameset" - Insert an HTML element for the token. - - Switch the insertion mode to "in frameset". - - A start tag token whose tag name is one of: "base", "link", "meta", - "noframes", "script", "style", "title" - Parse error. - - Push the node pointed to by the head element pointer onto the - stack of open elements. - - Process the token using the rules for the "in head" insertion - mode. - - Remove the node pointed to by the head element pointer from the - stack of open elements. - - An end tag whose tag name is "br" - Act as described in the "anything else" entry below. - - A start tag whose tag name is "head" - Any other end tag - Parse error. Ignore the token. - - Anything else - Act as if a start tag token with the tag name "body" and no - attributes had been seen, and then reprocess the current token. - - 8.2.5.10 The "in body" insertion mode - - When the insertion mode is "in body", tokens must be handled as - follows: - - A character token - Reconstruct the active formatting elements, if any. - - Insert the token's character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Parse error. For each attribute on the token, check to see if - the attribute is already present on the top element of the stack - of open elements. If it is not, add the attribute and its - corresponding value to that element. - - A start tag token whose tag name is one of: "base", "command", - "eventsource", "link", "meta", "noframes", "script", "style", - "title" - Process the token using the rules for the "in head" insertion - mode. - - A start tag whose tag name is "body" - Parse error. - - If the second element on the stack of open elements is not a - body element, or, if the stack of open elements has only one - node on it, then ignore the token. (fragment case) - - Otherwise, for each attribute on the token, check to see if the - attribute is already present on the body element (the second - element) on the stack of open elements. If it is not, add the - attribute and its corresponding value to that element. - - An end-of-file token - If there is a node in the stack of open elements that is not - either a dd element, a dt element, an li element, a p element, a - tbody element, a td element, a tfoot element, a th element, a - thead element, a tr element, the body element, or the html - element, then this is a parse error. - - Stop parsing. - - An end tag whose tag name is "body" - If the stack of open elements does not have a body element in - scope, this is a parse error; ignore the token. - - Otherwise, if there is a node in the stack of open elements that - is not either a dd element, a dt element, an li element, a p - element, a tbody element, a td element, a tfoot element, a th - element, a thead element, a tr element, the body element, or the - html element, then this is a parse error. - - Switch the insertion mode to "after body". - - An end tag whose tag name is "html" - Act as if an end tag with tag name "body" had been seen, then, - if that token wasn't ignored, reprocess the current token. - - The fake end tag token here can only be ignored in the fragment - case. - - A start tag whose tag name is one of: "address", "article", "aside", - "blockquote", "center", "datagrid", "details", "dialog", "dir", - "div", "dl", "fieldset", "figure", "footer", "header", "menu", - "nav", "ol", "p", "section", "ul" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token. - - A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", - "h6" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - If the current node is an element whose tag name is one of "h1", - "h2", "h3", "h4", "h5", or "h6", then this is a parse error; pop - the current node off the stack of open elements. - - Insert an HTML element for the token. - - A start tag whose tag name is one of: "pre", "listing" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token. - - If the next token is a U+000A LINE FEED (LF) character token, - then ignore that token and move on to the next one. (Newlines at - the start of pre blocks are ignored as an authoring - convenience.) - - A start tag whose tag name is "form" - If the form element pointer is not null, then this is a parse - error; ignore the token. - - Otherwise: - - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token, and set the form element - pointer to point to the element created. - - A start tag whose tag name is "li" - Run the following algorithm: - - 1. Initialize node to be the current node (the bottommost node of - the stack). - 2. If node is an li element, then act as if an end tag with the - tag name "li" had been seen, then jump to the last step. - 3. If node is not in the formatting category, and is not in the - phrasing category, and is not an address, div, or p element, - then jump to the last step. - 4. Otherwise, set node to the previous entry in the stack of open - elements and return to step 2. - 5. This is the last step. - If the stack of open elements has a p element in scope, then - act as if an end tag with the tag name "p" had been seen. - Finally, insert an HTML element for the token. - - A start tag whose tag name is one of: "dd", "dt" - Run the following algorithm: - - 1. Initialize node to be the current node (the bottommost node of - the stack). - 2. If node is a dd or dt element, then act as if an end tag with - the same tag name as node had been seen, then jump to the last - step. - 3. If node is not in the formatting category, and is not in the - phrasing category, and is not an address, div, or p element, - then jump to the last step. - 4. Otherwise, set node to the previous entry in the stack of open - elements and return to step 2. - 5. This is the last step. - If the stack of open elements has a p element in scope, then - act as if an end tag with the tag name "p" had been seen. - Finally, insert an HTML element for the token. - - A start tag whose tag name is "plaintext" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token. - - Switch the content model flag to the PLAINTEXT state. - - Once a start tag with the tag name "plaintext" has been seen, - that will be the last token ever seen other than character - tokens (and the end-of-file token), because there is no way to - switch the content model flag out of the PLAINTEXT state. - - An end tag whose tag name is one of: "address", "article", "aside", - "blockquote", "center", "datagrid", "details", "dialog", "dir", - "div", "dl", "fieldset", "figure", "footer", "header", - "listing", "menu", "nav", "ol", "pre", "section", "ul" - If the stack of open elements does not have an element in scope - with the same tag name as that of the token, then this is a - parse error; ignore the token. - - Otherwise, run these steps: - - 1. Generate implied end tags. - 2. If the current node is not an element with the same tag name - as that of the token, then this is a parse error. - 3. Pop elements from the stack of open elements until an element - with the same tag name as the token has been popped from the - stack. - - An end tag whose tag name is "form" - Let node be the element that the form element pointer is set to. - - Set the form element pointer to null. - - If node is null or the stack of open elements does not have node - in scope, then this is a parse error; ignore the token. - - Otherwise, run these steps: - - 1. Generate implied end tags. - 2. If the current node is not node, then this is a parse error. - 3. Remove node from the stack of open elements. - - An end tag whose tag name is "p" - If the stack of open elements does not have an element in scope - with the same tag name as that of the token, then this is a - parse error; act as if a start tag with the tag name p had been - seen, then reprocess the current token. - - Otherwise, run these steps: - - 1. Generate implied end tags, except for elements with the same - tag name as the token. - 2. If the current node is not an element with the same tag name - as that of the token, then this is a parse error. - 3. Pop elements from the stack of open elements until an element - with the same tag name as the token has been popped from the - stack. - - An end tag whose tag name is one of: "dd", "dt", "li" - If the stack of open elements does not have an element in scope - with the same tag name as that of the token, then this is a - parse error; ignore the token. - - Otherwise, run these steps: - - 1. Generate implied end tags, except for elements with the same - tag name as the token. - 2. If the current node is not an element with the same tag name - as that of the token, then this is a parse error. - 3. Pop elements from the stack of open elements until an element - with the same tag name as the token has been popped from the - stack. - - An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" - If the stack of open elements does not have an element in scope - whose tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", - then this is a parse error; ignore the token. - - Otherwise, run these steps: - - 1. Generate implied end tags. - 2. If the current node is not an element with the same tag name - as that of the token, then this is a parse error. - 3. Pop elements from the stack of open elements until an element - whose tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6" - has been popped from the stack. - - An end tag whose tag name is "sarcasm" - Take a deep breath, then act as described in the "any other end - tag" entry below. - - A start tag whose tag name is "a" - If the list of active formatting elements contains an element - whose tag name is "a" between the end of the list and the last - marker on the list (or the start of the list if there is no - marker on the list), then this is a parse error; act as if an - end tag with the tag name "a" had been seen, then remove that - element from the list of active formatting elements and the - stack of open elements if the end tag didn't already remove it - (it might not have if the element is not in table scope). - - In the non-conforming stream - ab
x, the first a element - would be closed upon seeing the second one, and the "x" - character would be inside a link to "b", not to "a". This is - despite the fact that the outer a element is not in table scope - (meaning that a regular end tag at the start of the table - wouldn't close the outer a element). - - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. Add that element to the - list of active formatting elements. - - A start tag whose tag name is one of: "b", "big", "em", "font", "i", - "s", "small", "strike", "strong", "tt", "u" - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. Add that element to the - list of active formatting elements. - - A start tag whose tag name is "nobr" - Reconstruct the active formatting elements, if any. - - If the stack of open elements has a nobr element in scope, then - this is a parse error; act as if an end tag with the tag name - "nobr" had been seen, then once again reconstruct the active - formatting elements, if any. - - Insert an HTML element for the token. Add that element to the - list of active formatting elements. - - An end tag whose tag name is one of: "a", "b", "big", "em", "font", - "i", "nobr", "s", "small", "strike", "strong", "tt", "u" - Follow these steps: - - 1. Let the formatting element be the last element in the list of - active formatting elements that: - o is between the end of the list and the last scope marker - in the list, if any, or the start of the list otherwise, - and - o has the same tag name as the token. - If there is no such node, or, if that node is also in the - stack of open elements but the element is not in scope, then - this is a parse error; ignore the token, and abort these - steps. - Otherwise, if there is such a node, but that node is not in - the stack of open elements, then this is a parse error; remove - the element from the list, and abort these steps. - Otherwise, there is a formatting element and that element is - in the stack and is in scope. If the element is not the - current node, this is a parse error. In any case, proceed with - the algorithm as written in the following steps. - 2. Let the furthest block be the topmost node in the stack of - open elements that is lower in the stack than the formatting - element, and is not an element in the phrasing or formatting - categories. There might not be one. - 3. If there is no furthest block, then the UA must skip the - subsequent steps and instead just pop all the nodes from the - bottom of the stack of open elements, from the current node up - to and including the formatting element, and remove the - formatting element from the list of active formatting - elements. - 4. Let the common ancestor be the element immediately above the - formatting element in the stack of open elements. - 5. If the furthest block has a parent node, then remove the - furthest block from its parent node. - 6. Let a bookmark note the position of the formatting element in - the list of active formatting elements relative to the - elements on either side of it in the list. - 7. Let node and last node be the furthest block. Follow these - steps: - 1. Let node be the element immediately above node in the - stack of open elements. - 2. If node is not in the list of active formatting elements, - then remove node from the stack of open elements and then - go back to step 1. - 3. Otherwise, if node is the formatting element, then go to - the next step in the overall algorithm. - 4. Otherwise, if last node is the furthest block, then move - the aforementioned bookmark to be immediately after the - node in the list of active formatting elements. - 5. If node has any children, perform a shallow clone of - node, replace the entry for node in the list of active - formatting elements with an entry for the clone, replace - the entry for node in the stack of open elements with an - entry for the clone, and let node be the clone. - 6. Insert last node into node, first removing it from its - previous parent node if any. - 7. Let last node be node. - 8. Return to step 1 of this inner set of steps. - 8. If the common ancestor node is a table, tbody, tfoot, thead, - or tr element, then, foster parent whatever last node ended up - being in the previous step. - Otherwise, append whatever last node ended up being in the - previous step to the common ancestor node, first removing it - from its previous parent node if any. - 9. Perform a shallow clone of the formatting element. - 10. Take all of the child nodes of the furthest block and append - them to the clone created in the last step. - 11. Append that clone to the furthest block. - 12. Remove the formatting element from the list of active - formatting elements, and insert the clone into the list of - active formatting elements at the position of the - aforementioned bookmark. - 13. Remove the formatting element from the stack of open elements, - and insert the clone into the stack of open elements - immediately below the position of the furthest block in that - stack. - 14. Jump back to step 1 in this series of steps. - - The way these steps are defined, only elements in the formatting - category ever get cloned by this algorithm. - - Because of the way this algorithm causes elements to change - parents, it has been dubbed the "adoption agency algorithm" (in - contrast with other possibly algorithms for dealing with - misnested content, which included the "incest algorithm", the - "secret affair algorithm", and the "Heisenberg algorithm"). - - A start tag whose tag name is "button" - If the stack of open elements has a button element in scope, - then this is a parse error; act as if an end tag with the tag - name "button" had been seen, then reprocess the token. - - Otherwise: - - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. - - Insert a marker at the end of the list of active formatting - elements. - - A start tag token whose tag name is one of: "applet", "marquee", - "object" - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. - - Insert a marker at the end of the list of active formatting - elements. - - An end tag token whose tag name is one of: "applet", "button", - "marquee", "object" - If the stack of open elements does not have an element in scope - with the same tag name as that of the token, then this is a - parse error; ignore the token. - - Otherwise, run these steps: - - 1. Generate implied end tags. - 2. If the current node is not an element with the same tag name - as that of the token, then this is a parse error. - 3. Pop elements from the stack of open elements until an element - with the same tag name as the token has been popped from the - stack. - 4. Clear the list of active formatting elements up to the last - marker. - - A start tag whose tag name is "xmp" - Reconstruct the active formatting elements, if any. - - Follow the generic CDATA element parsing algorithm. - - A start tag whose tag name is "table" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token. - - Switch the insertion mode to "in table". - - A start tag whose tag name is one of: "area", "basefont", "bgsound", - "br", "embed", "img", "input", "spacer", "wbr" - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - A start tag whose tag name is one of: "param", "source" - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - A start tag whose tag name is "hr" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - A start tag whose tag name is "image" - Parse error. Change the token's tag name to "img" and reprocess - it. (Don't ask.) - - A start tag whose tag name is "isindex" - Parse error. - - If the form element pointer is not null, then ignore the token. - - Otherwise: - - Acknowledge the token's self-closing flag, if it is set. - - Act as if a start tag token with the tag name "form" had been - seen. - - If the token has an attribute called "action", set the action - attribute on the resulting form element to the value of the - "action" attribute of the token. - - Act as if a start tag token with the tag name "hr" had been - seen. - - Act as if a start tag token with the tag name "p" had been seen. - - Act as if a start tag token with the tag name "label" had been - seen. - - Act as if a stream of character tokens had been seen (see below - for what they should say). - - Act as if a start tag token with the tag name "input" had been - seen, with all the attributes from the "isindex" token except - "name", "action", and "prompt". Set the name attribute of the - resulting input element to the value "isindex". - - Act as if a stream of character tokens had been seen (see below - for what they should say). - - Act as if an end tag token with the tag name "label" had been - seen. - - Act as if an end tag token with the tag name "p" had been seen. - - Act as if a start tag token with the tag name "hr" had been - seen. - - Act as if an end tag token with the tag name "form" had been - seen. - - If the token has an attribute with the name "prompt", then the - first stream of characters must be the same string as given in - that attribute, and the second stream of characters must be - empty. Otherwise, the two streams of character tokens together - should, together with the input element, express the equivalent - of "This is a searchable index. Insert your search keywords - here: (input field)" in the user's preferred language. - - A start tag whose tag name is "textarea" - - 1. Insert an HTML element for the token. - 2. If the next token is a U+000A LINE FEED (LF) character token, - then ignore that token and move on to the next one. (Newlines - at the start of textarea elements are ignored as an authoring - convenience.) - 3. Switch the tokeniser's content model flag to the RCDATA state. - 4. Let the original insertion mode be the current insertion mode. - 5. Switch the insertion mode to "in CDATA/RCDATA". - - A start tag whose tag name is one of: "iframe", "noembed" - A start tag whose tag name is "noscript", if the scripting flag is - enabled - Follow the generic CDATA element parsing algorithm. - - A start tag whose tag name is "select" - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. - - If the insertion mode is one of in table", "in caption", "in - column group", "in table body", "in row", or "in cell", then - switch the insertion mode to "in select in table". Otherwise, - switch the insertion mode to "in select". - - A start tag whose tag name is one of: "optgroup", "option" - If the stack of open elements has an option element in scope, - then act as if an end tag with the tag name "option" had been - seen. - - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. - - A start tag whose tag name is one of: "rp", "rt" - If the stack of open elements has a ruby element in scope, then - generate implied end tags. If the current node is not then a - ruby element, this is a parse error; pop all the nodes from the - current node up to the node immediately before the bottommost - ruby element on the stack of open elements. - - Insert an HTML element for the token. - - An end tag whose tag name is "br" - Parse error. Act as if a start tag token with the tag name "br" - had been seen. Ignore the end tag token. - - A start tag whose tag name is "math" - Reconstruct the active formatting elements, if any. - - Adjust MathML attributes for the token. (This fixes the case of - MathML attributes that are not all lowercase.) - - Adjust foreign attributes for the token. (This fixes the use of - namespaced attributes, in particular XLink.) - - Insert a foreign element for the token, in the MathML namespace. - - If the token has its self-closing flag set, pop the current node - off the stack of open elements and acknowledge the token's - self-closing flag. - - Otherwise, let the secondary insertion mode be the current - insertion mode, and then switch the insertion mode to "in - foreign content". - - A start tag whose tag name is one of: "caption", "col", "colgroup", - "frame", "frameset", "head", "tbody", "td", "tfoot", "th", - "thead", "tr" - Parse error. Ignore the token. - - Any other start tag - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. - - This element will be a phrasing element. - - Any other end tag - Run the following steps: - - 1. Initialize node to be the current node (the bottommost node of - the stack). - 2. If node has the same tag name as the end tag token, then: - 1. Generate implied end tags. - 2. If the tag name of the end tag token does not match the - tag name of the current node, this is a parse error. - 3. Pop all the nodes from the current node up to node, - including node, then stop these steps. - 3. Otherwise, if node is in neither the formatting category nor - the phrasing category, then this is a parse error; ignore the - token, and abort these steps. - 4. Set node to the previous entry in the stack of open elements. - 5. Return to step 2. - - 8.2.5.11 The "in CDATA/RCDATA" insertion mode - - When the insertion mode is "in CDATA/RCDATA", tokens must be handled as - follows: - - A character token - Insert the token's character into the current node. - - An end-of-file token - Parse error. - - If the current node is a script element, mark the script element - as "already executed". - - Pop the current node off the stack of open elements. - - Switch the insertion mode to the original insertion mode and - reprocess the current token. - - An end tag whose tag name is "script" - Let script be the current node (which will be a script element). - - Pop the current node off the stack of open elements. - - Switch the insertion mode to the original insertion mode. - - Let the old insertion point have the same value as the current - insertion point. Let the insertion point be just before the next - input character. - - Increment the parser's script nesting level by one. - - Run the script. This might cause some script to execute, which - might cause new characters to be inserted into the tokeniser, - and might cause the tokeniser to output more tokens, resulting - in a reentrant invocation of the parser. - - Decrement the parser's script nesting level by one. If the - parser's script nesting level is zero, then set the parser pause - flag to false. - - Let the insertion point have the value of the old insertion - point. (In other words, restore the insertion point to the value - it had before the previous paragraph. This value might be the - "undefined" value.) - - At this stage, if there is a pending external script, then: - - If the tree construction stage is being called reentrantly, say - from a call to document.write(): - Set the parser pause flag to true, and abort the - processing of any nested invocations of the tokeniser, - yielding control back to the caller. (Tokenization will - resume when the caller returns to the "outer" tree - construction stage.) - - Otherwise: - Follow these steps: - - 1. Let the script be the pending external script. There is - no longer a pending external script. - 2. Pause until the script has completed loading. - 3. Let the insertion point be just before the next input - character. - 4. Execute the script. - 5. Let the insertion point be undefined again. - 6. If there is once again a pending external script, then - repeat these steps from step 1. - - Any other end tag - Pop the current node off the stack of open elements. - - Switch the insertion mode to the original insertion mode. - - 8.2.5.12 The "in table" insertion mode - - When the insertion mode is "in table", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - If the current table is tainted, then act as described in the - "anything else" entry below. - - Otherwise, insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "caption" - Clear the stack back to a table context. (See below.) - - Insert a marker at the end of the list of active formatting - elements. - - Insert an HTML element for the token, then switch the insertion - mode to "in caption". - - A start tag whose tag name is "colgroup" - Clear the stack back to a table context. (See below.) - - Insert an HTML element for the token, then switch the insertion - mode to "in column group". - - A start tag whose tag name is "col" - Act as if a start tag token with the tag name "colgroup" had - been seen, then reprocess the current token. - - A start tag whose tag name is one of: "tbody", "tfoot", "thead" - Clear the stack back to a table context. (See below.) - - Insert an HTML element for the token, then switch the insertion - mode to "in table body". - - A start tag whose tag name is one of: "td", "th", "tr" - Act as if a start tag token with the tag name "tbody" had been - seen, then reprocess the current token. - - A start tag whose tag name is "table" - Parse error. Act as if an end tag token with the tag name - "table" had been seen, then, if that token wasn't ignored, - reprocess the current token. - - The fake end tag token here can only be ignored in the fragment - case. - - An end tag whose tag name is "table" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. (fragment case) - - Otherwise: - - Pop elements from this stack until a table element has been - popped from the stack. - - Reset the insertion mode appropriately. - - An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" - Parse error. Ignore the token. - - A start tag whose tag name is one of: "style", "script" - If the current table is tainted then act as described in the - "anything else" entry below. - - Otherwise, process the token using the rules for the "in head" - insertion mode. - - A start tag whose tag name is "input" - If the token does not have an attribute with the name "type", or - if it does, but that attribute's value is not an ASCII - case-insensitive match for the string "hidden", or, if the - current table is tainted, then: act as described in the - "anything else" entry below. - - Otherwise: - - Parse error. - - Insert an HTML element for the token. - - Pop that input element off the stack of open elements. - - An end-of-file token - If the current node is not the root html element, then this is a - parse error. - - It can only be the current node in the fragment case. - - Stop parsing. - - Anything else - Parse error. Process the token using the rules for the "in body" - insertion mode, except that if the current node is a table, - tbody, tfoot, thead, or tr element, then, whenever a node would - be inserted into the current node, it must instead be foster - parented. - - When the steps above require the UA to clear the stack back to a table - context, it means that the UA must, while the current node is not a - table element or an html element, pop elements from the stack of open - elements. - - The current node being an html element after this process is a fragment - case. - - 8.2.5.13 The "in caption" insertion mode - - When the insertion mode is "in caption", tokens must be handled as - follows: - - An end tag whose tag name is "caption" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. (fragment case) - - Otherwise: - - Generate implied end tags. - - Now, if the current node is not a caption element, then this is - a parse error. - - Pop elements from this stack until a caption element has been - popped from the stack. - - Clear the list of active formatting elements up to the last - marker. - - Switch the insertion mode to "in table". - - A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "td", "tfoot", "th", "thead", "tr" - - An end tag whose tag name is "table" - Parse error. Act as if an end tag with the tag name "caption" - had been seen, then, if that token wasn't ignored, reprocess the - current token. - - The fake end tag token here can only be ignored in the fragment - case. - - An end tag whose tag name is one of: "body", "col", "colgroup", "html", - "tbody", "td", "tfoot", "th", "thead", "tr" - Parse error. Ignore the token. - - Anything else - Process the token using the rules for the "in body" insertion - mode. - - 8.2.5.14 The "in column group" insertion mode - - When the insertion mode is "in column group", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is "col" - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - An end tag whose tag name is "colgroup" - If the current node is the root html element, then this is a - parse error; ignore the token. (fragment case) - - Otherwise, pop the current node (which will be a colgroup - element) from the stack of open elements. Switch the insertion - mode to "in table". - - An end tag whose tag name is "col" - Parse error. Ignore the token. - - An end-of-file token - If the current node is the root html element, then stop parsing. - (fragment case) - - Otherwise, act as described in the "anything else" entry below. - - Anything else - Act as if an end tag with the tag name "colgroup" had been seen, - and then, if that token wasn't ignored, reprocess the current - token. - - The fake end tag token here can only be ignored in the fragment - case. - - 8.2.5.15 The "in table body" insertion mode - - When the insertion mode is "in table body", tokens must be handled as - follows: - - A start tag whose tag name is "tr" - Clear the stack back to a table body context. (See below.) - - Insert an HTML element for the token, then switch the insertion - mode to "in row". - - A start tag whose tag name is one of: "th", "td" - Parse error. Act as if a start tag with the tag name "tr" had - been seen, then reprocess the current token. - - An end tag whose tag name is one of: "tbody", "tfoot", "thead" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. - - Otherwise: - - Clear the stack back to a table body context. (See below.) - - Pop the current node from the stack of open elements. Switch the - insertion mode to "in table". - - A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "tfoot", "thead" - - An end tag whose tag name is "table" - If the stack of open elements does not have a tbody, thead, or - tfoot element in table scope, this is a parse error. Ignore the - token. (fragment case) - - Otherwise: - - Clear the stack back to a table body context. (See below.) - - Act as if an end tag with the same tag name as the current node - ("tbody", "tfoot", or "thead") had been seen, then reprocess the - current token. - - An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "td", "th", "tr" - Parse error. Ignore the token. - - Anything else - Process the token using the rules for the "in table" insertion - mode. - - When the steps above require the UA to clear the stack back to a table - body context, it means that the UA must, while the current node is not - a tbody, tfoot, thead, or html element, pop elements from the stack of - open elements. - - The current node being an html element after this process is a fragment - case. - - 8.2.5.16 The "in row" insertion mode - - When the insertion mode is "in row", tokens must be handled as follows: - - A start tag whose tag name is one of: "th", "td" - Clear the stack back to a table row context. (See below.) - - Insert an HTML element for the token, then switch the insertion - mode to "in cell". - - Insert a marker at the end of the list of active formatting - elements. - - An end tag whose tag name is "tr" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. (fragment case) - - Otherwise: - - Clear the stack back to a table row context. (See below.) - - Pop the current node (which will be a tr element) from the stack - of open elements. Switch the insertion mode to "in table body". - - A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "tfoot", "thead", "tr" - - An end tag whose tag name is "table" - Act as if an end tag with the tag name "tr" had been seen, then, - if that token wasn't ignored, reprocess the current token. - - The fake end tag token here can only be ignored in the fragment - case. - - An end tag whose tag name is one of: "tbody", "tfoot", "thead" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. - - Otherwise, act as if an end tag with the tag name "tr" had been - seen, then reprocess the current token. - - An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "td", "th" - Parse error. Ignore the token. - - Anything else - Process the token using the rules for the "in table" insertion - mode. - - When the steps above require the UA to clear the stack back to a table - row context, it means that the UA must, while the current node is not a - tr element or an html element, pop elements from the stack of open - elements. - - The current node being an html element after this process is a fragment - case. - - 8.2.5.17 The "in cell" insertion mode - - When the insertion mode is "in cell", tokens must be handled as - follows: - - An end tag whose tag name is one of: "td", "th" - If the stack of open elements does not have an element in table - scope with the same tag name as that of the token, then this is - a parse error and the token must be ignored. - - Otherwise: - - Generate implied end tags. - - Now, if the current node is not an element with the same tag - name as the token, then this is a parse error. - - Pop elements from this stack until an element with the same tag - name as the token has been popped from the stack. - - Clear the list of active formatting elements up to the last - marker. - - Switch the insertion mode to "in row". (The current node will be - a tr element at this point.) - - A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "td", "tfoot", "th", "thead", "tr" - If the stack of open elements does not have a td or th element - in table scope, then this is a parse error; ignore the token. - (fragment case) - - Otherwise, close the cell (see below) and reprocess the current - token. - - An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html" - Parse error. Ignore the token. - - An end tag whose tag name is one of: "table", "tbody", "tfoot", - "thead", "tr" - If the stack of open elements does not have an element in table - scope with the same tag name as that of the token (which can - only happen for "tbody", "tfoot" and "thead", or, in the - fragment case), then this is a parse error and the token must be - ignored. - - Otherwise, close the cell (see below) and reprocess the current - token. - - Anything else - Process the token using the rules for the "in body" insertion - mode. - - Where the steps above say to close the cell, they mean to run the - following algorithm: - 1. If the stack of open elements has a td element in table scope, then - act as if an end tag token with the tag name "td" had been seen. - 2. Otherwise, the stack of open elements will have a th element in - table scope; act as if an end tag token with the tag name "th" had - been seen. - - The stack of open elements cannot have both a td and a th element in - table scope at the same time, nor can it have neither when the - insertion mode is "in cell". - - 8.2.5.18 The "in select" insertion mode - - When the insertion mode is "in select", tokens must be handled as - follows: - - A character token - Insert the token's character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is "option" - If the current node is an option element, act as if an end tag - with the tag name "option" had been seen. - - Insert an HTML element for the token. - - A start tag whose tag name is "optgroup" - If the current node is an option element, act as if an end tag - with the tag name "option" had been seen. - - If the current node is an optgroup element, act as if an end tag - with the tag name "optgroup" had been seen. - - Insert an HTML element for the token. - - An end tag whose tag name is "optgroup" - First, if the current node is an option element, and the node - immediately before it in the stack of open elements is an - optgroup element, then act as if an end tag with the tag name - "option" had been seen. - - If the current node is an optgroup element, then pop that node - from the stack of open elements. Otherwise, this is a parse - error; ignore the token. - - An end tag whose tag name is "option" - If the current node is an option element, then pop that node - from the stack of open elements. Otherwise, this is a parse - error; ignore the token. - - An end tag whose tag name is "select" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. (fragment case) - - Otherwise: - - Pop elements from the stack of open elements until a select - element has been popped from the stack. - - Reset the insertion mode appropriately. - - A start tag whose tag name is "select" - Parse error. Act as if the token had been an end tag with the - tag name "select" instead. - - A start tag whose tag name is one of: "input", "textarea" - Parse error. Act as if an end tag with the tag name "select" had - been seen, and reprocess the token. - - A start tag token whose tag name is "script" - Process the token using the rules for the "in head" insertion - mode. - - An end-of-file token - If the current node is not the root html element, then this is a - parse error. - - It can only be the current node in the fragment case. - - Stop parsing. - - Anything else - Parse error. Ignore the token. - - 8.2.5.19 The "in select in table" insertion mode - - When the insertion mode is "in select in table", tokens must be handled - as follows: - - A start tag whose tag name is one of: "caption", "table", "tbody", - "tfoot", "thead", "tr", "td", "th" - Parse error. Act as if an end tag with the tag name "select" had - been seen, and reprocess the token. - - An end tag whose tag name is one of: "caption", "table", "tbody", - "tfoot", "thead", "tr", "td", "th" - Parse error. - - If the stack of open elements has an element in table scope with - the same tag name as that of the token, then act as if an end - tag with the tag name "select" had been seen, and reprocess the - token. Otherwise, ignore the token. - - Anything else - Process the token using the rules for the "in select" insertion - mode. - - 8.2.5.20 The "in foreign content" insertion mode - - When the insertion mode is "in foreign content", tokens must be handled - as follows: - - A character token - Insert the token's character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is neither "mglyph" nor "malignmark", if the - current node is an mi element in the MathML namespace. - - A start tag whose tag name is neither "mglyph" nor "malignmark", if the - current node is an mo element in the MathML namespace. - - A start tag whose tag name is neither "mglyph" nor "malignmark", if the - current node is an mn element in the MathML namespace. - - A start tag whose tag name is neither "mglyph" nor "malignmark", if the - current node is an ms element in the MathML namespace. - - A start tag whose tag name is neither "mglyph" nor "malignmark", if the - current node is an mtext element in the MathML namespace. - - A start tag, if the current node is an element in the HTML namespace. - An end tag - Process the token using the rules for the secondary insertion - mode. - - If, after doing so, the insertion mode is still "in foreign - content", but there is no element in scope that has a namespace - other than the HTML namespace, switch the insertion mode to the - secondary insertion mode. - - A start tag whose tag name is one of: "b", "big", "blockquote", "body", - "br", "center", "code", "dd", "div", "dl", "dt", "em", "embed", - "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i", "img", - "li", "listing", "menu", "meta", "nobr", "ol", "p", "pre", - "ruby", "s", "small", "span", "strong", "strike", "sub", "sup", - "table", "tt", "u", "ul", "var" - - A start tag whose tag name is "font", if the token has any attributes - named "color", "face", or "size" - - An end-of-file token - Parse error. - - Pop elements from the stack of open elements until the current - node is in the HTML namespace. - - Switch the insertion mode to the secondary insertion mode, and - reprocess the token. - - Any other start tag - If the current node is an element in the MathML namespace, - adjust MathML attributes for the token. (This fixes the case of - MathML attributes that are not all lowercase.) - - Adjust foreign attributes for the token. (This fixes the use of - namespaced attributes, in particular XLink in SVG.) - - Insert a foreign element for the token, in the same namespace as - the current node. - - If the token has its self-closing flag set, pop the current node - off the stack of open elements and acknowledge the token's - self-closing flag. - - 8.2.5.21 The "after body" insertion mode - - When the insertion mode is "after body", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Process the token using the rules for the "in body" insertion - mode. - - A comment token - Append a Comment node to the first element in the stack of open - elements (the html element), with the data attribute set to the - data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - An end tag whose tag name is "html" - If the parser was originally created as part of the HTML - fragment parsing algorithm, this is a parse error; ignore the - token. (fragment case) - - Otherwise, switch the insertion mode to "after after body". - - An end-of-file token - Stop parsing. - - Anything else - Parse error. Switch the insertion mode to "in body" and - reprocess the token. - - 8.2.5.22 The "in frameset" insertion mode - - When the insertion mode is "in frameset", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is "frameset" - Insert an HTML element for the token. - - An end tag whose tag name is "frameset" - If the current node is the root html element, then this is a - parse error; ignore the token. (fragment case) - - Otherwise, pop the current node from the stack of open elements. - - If the parser was not originally created as part of the HTML - fragment parsing algorithm (fragment case), and the current node - is no longer a frameset element, then switch the insertion mode - to "after frameset". - - A start tag whose tag name is "frame" - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - A start tag whose tag name is "noframes" - Process the token using the rules for the "in head" insertion - mode. - - An end-of-file token - If the current node is not the root html element, then this is a - parse error. - - It can only be the current node in the fragment case. - - Stop parsing. - - Anything else - Parse error. Ignore the token. - - 8.2.5.23 The "after frameset" insertion mode - - When the insertion mode is "after frameset", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - An end tag whose tag name is "html" - Switch the insertion mode to "after after frameset". - - A start tag whose tag name is "noframes" - Process the token using the rules for the "in head" insertion - mode. - - An end-of-file token - Stop parsing. - - Anything else - Parse error. Ignore the token. - - This doesn't handle UAs that don't support frames, or that do support - frames but want to show the NOFRAMES content. Supporting the former is - easy; supporting the latter is harder. - - 8.2.5.24 The "after after body" insertion mode - - When the insertion mode is "after after body", tokens must be handled - as follows: - - A comment token - Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - An end-of-file token - Stop parsing. - - Anything else - Parse error. Switch the insertion mode to "in body" and - reprocess the token. - - 8.2.5.25 The "after after frameset" insertion mode - - When the insertion mode is "after after frameset", tokens must be - handled as follows: - - A comment token - Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - An end-of-file token - Stop parsing. - - A start tag whose tag name is "noframes" - Process the token using the rules for the "in head" insertion - mode. - - Anything else - Parse error. Ignore the token. - - 8.2.6 The end - - Once the user agent stops parsing the document, the user agent must - follow the steps in this section. - - First, the current document readiness must be set to "interactive". - - Then, the rules for when a script completes loading start applying - (script execution is no longer managed by the parser). - - If any of the scripts in the list of scripts that will execute as soon - as possible have completed loading, or if the list of scripts that will - execute asynchronously is not empty and the first script in that list - has completed loading, then the user agent must act as if those scripts - just completed loading, following the rules given for that in the - script element definition. - - Then, if the list of scripts that will execute when the document has - finished parsing is not empty, and the first item in this list has - already completed loading, then the user agent must act as if that - script just finished loading. - - By this point, there will be no scripts that have loaded but have not - yet been executed. - - The user agent must then fire a simple event called DOMContentLoaded at - the Document. - - Once everything that delays the load event has completed, the user - agent must set the current document readiness to "complete", and then - fire a load event at the body element. - - delaying the load event for things like image loads allows for intranet - port scans (even without javascript!). Should we really encode that - into the spec? - - 8.2.7 Coercing an HTML DOM into an infoset - - When an application uses an HTML parser in conjunction with an XML - pipeline, it is possible that the constructed DOM is not compatible - with the XML tool chain in certain subtle ways. For example, an XML - toolchain might not be able to represent attributes with the name - xmlns, since they conflict with the Namespaces in XML syntax. There is - also some data that the HTML parser generates that isn't included in - the DOM itself. This section specifies some rules for handling these - issues. - - If the XML API being used doesn't support DOCTYPEs, the tool may drop - DOCTYPEs altogether. - - If the XML API doesn't support attributes in no namespace that are - named "xmlns", attributes whose names start with "xmlns:", or - attributes in the XMLNS namespace, then the tool may drop such - attributes. - - The tool may annotate the output with any namespace declarations - required for proper operation. - - If the XML API being used restricts the allowable characters in the - local names of elements and attributes, then the tool may map all - element and attribute local names that the API wouldn't support to a - set of names that are allowed, by replacing any character that isn't - supported with the uppercase letter U and the five digits of the - character's Unicode codepoint when expressed in hexadecimal, using - digits 0-9 and capital letters A-F as the symbols, in increasing - numeric order. - - For example, the element name foo start tag will be closed - by a end tag, and never by a end tag, even if - the user agent is using the rules above to then generate an actual - element in the DOM with the name aU0003AU0003A for that start tag. - - 8.3 Namespaces - - The HTML namespace is: http://www.w3.org/1999/xhtml - - The MathML namespace is: http://www.w3.org/1998/Math/MathML - - The SVG namespace is: http://www.w3.org/2000/svg - - The XLink namespace is: http://www.w3.org/1999/xlink - - The XML namespace is: http://www.w3.org/XML/1998/namespace - - The XMLNS namespace is: http://www.w3.org/2000/xmlns/ From 78674ec1e831bb6116fb0b22ee811976351f923b Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:58:13 +0200 Subject: [PATCH 04/30] Clean up: gwt-src et al. --- HtmlParser-compile | 3 - HtmlParser-compile-detailed | 3 - HtmlParser-compile-detailed.launch | 24 - HtmlParser-compile.launch | 22 - HtmlParser-linux | 3 - HtmlParser-shell | 3 - HtmlParser.launch | 23 - .../validator/htmlparser/HtmlParser.gwt.xml | 12 - .../htmlparser/gwt/BrowserTreeBuilder.java | 477 ------------------ .../validator/htmlparser/gwt/HtmlParser.java | 265 ---------- .../htmlparser/gwt/HtmlParserModule.java | 87 ---- .../htmlparser/gwt/ParseEndListener.java | 46 -- .../htmlparser/public/HtmlParser.html | 225 --------- .../public/LICENSE.Live-DOM-viewer.txt | 25 - .../nu/validator/htmlparser/public/blank.html | 2 - 15 files changed, 1220 deletions(-) delete mode 100755 HtmlParser-compile delete mode 100755 HtmlParser-compile-detailed delete mode 100644 HtmlParser-compile-detailed.launch delete mode 100644 HtmlParser-compile.launch delete mode 100755 HtmlParser-linux delete mode 100755 HtmlParser-shell delete mode 100644 HtmlParser.launch delete mode 100644 gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml delete mode 100644 gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java delete mode 100644 gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java delete mode 100644 gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java delete mode 100644 gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java delete mode 100644 gwt-src/nu/validator/htmlparser/public/HtmlParser.html delete mode 100644 gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt delete mode 100644 gwt-src/nu/validator/htmlparser/public/blank.html diff --git a/HtmlParser-compile b/HtmlParser-compile deleted file mode 100755 index 3e867827..00000000 --- a/HtmlParser-compile +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -APPDIR=`dirname $0`; -java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTCompiler -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser; diff --git a/HtmlParser-compile-detailed b/HtmlParser-compile-detailed deleted file mode 100755 index a4102d64..00000000 --- a/HtmlParser-compile-detailed +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -APPDIR=`dirname $0`; -java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTCompiler -style DETAILED -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser; diff --git a/HtmlParser-compile-detailed.launch b/HtmlParser-compile-detailed.launch deleted file mode 100644 index 0347fd6c..00000000 --- a/HtmlParser-compile-detailed.launch +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/HtmlParser-compile.launch b/HtmlParser-compile.launch deleted file mode 100644 index 54e7bc33..00000000 --- a/HtmlParser-compile.launch +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff --git a/HtmlParser-linux b/HtmlParser-linux deleted file mode 100755 index 0a9e9def..00000000 --- a/HtmlParser-linux +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -APPDIR=`dirname $0`; -java -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:$APPDIR/bin:/home/hsivonen/gwt-linux-1.5.1/gwt-user.jar:/home/hsivonen/gwt-linux-1.5.1/gwt-dev-linux.jar" com.google.gwt.dev.GWTShell -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser/HtmlParser.html; diff --git a/HtmlParser-shell b/HtmlParser-shell deleted file mode 100755 index ffcf2e29..00000000 --- a/HtmlParser-shell +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -APPDIR=`dirname $0`; -java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:$APPDIR/bin:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTShell -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser/HtmlParser.html; diff --git a/HtmlParser.launch b/HtmlParser.launch deleted file mode 100644 index 9335abf6..00000000 --- a/HtmlParser.launch +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - diff --git a/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml b/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml deleted file mode 100644 index 1eab09c2..00000000 --- a/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - diff --git a/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java b/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java deleted file mode 100644 index 2eaa6764..00000000 --- a/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java +++ /dev/null @@ -1,477 +0,0 @@ -/* - * Copyright (c) 2007 Henri Sivonen - * Copyright (c) 2008-2017 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.gwt; - -import java.util.LinkedList; - -import nu.validator.htmlparser.common.DocumentMode; -import nu.validator.htmlparser.impl.CoalescingTreeBuilder; -import nu.validator.htmlparser.impl.HtmlAttributes; - -import org.xml.sax.SAXException; - -import com.google.gwt.core.client.JavaScriptException; -import com.google.gwt.core.client.JavaScriptObject; - -class BrowserTreeBuilder extends CoalescingTreeBuilder { - - private JavaScriptObject document; - - private JavaScriptObject script; - - private JavaScriptObject placeholder; - - private boolean readyToRun; - - private final LinkedList scriptStack = new LinkedList(); - - private class ScriptHolder { - private final JavaScriptObject script; - - private final JavaScriptObject placeholder; - - /** - * @param script - * @param placeholder - */ - public ScriptHolder(JavaScriptObject script, - JavaScriptObject placeholder) { - this.script = script; - this.placeholder = placeholder; - } - - /** - * Returns the script. - * - * @return the script - */ - public JavaScriptObject getScript() { - return script; - } - - /** - * Returns the placeholder. - * - * @return the placeholder - */ - public JavaScriptObject getPlaceholder() { - return placeholder; - } - } - - protected BrowserTreeBuilder(JavaScriptObject document) { - super(); - this.document = document; - installExplorerCreateElementNS(document); - } - - private static native boolean installExplorerCreateElementNS( - JavaScriptObject doc) /*-{ - if (!doc.createElementNS) { - doc.createElementNS = function (uri, local) { - if ("http://www.w3.org/1999/xhtml" == uri) { - return doc.createElement(local); - } else if ("http://www.w3.org/1998/Math/MathML" == uri) { - if (!doc.mathplayerinitialized) { - var obj = document.createElement("object"); - obj.setAttribute("id", "mathplayer"); - obj.setAttribute("classid", "clsid:32F66A20-7614-11D4-BD11-00104BD3F987"); - document.getElementsByTagName("head")[0].appendChild(obj); - document.namespaces.add("m", "http://www.w3.org/1998/Math/MathML", "#mathplayer"); - doc.mathplayerinitialized = true; - } - return doc.createElement("m:" + local); - } else if ("http://www.w3.org/2000/svg" == uri) { - if (!doc.renesisinitialized) { - var obj = document.createElement("object"); - obj.setAttribute("id", "renesis"); - obj.setAttribute("classid", "clsid:AC159093-1683-4BA2-9DCF-0C350141D7F2"); - document.getElementsByTagName("head")[0].appendChild(obj); - document.namespaces.add("s", "http://www.w3.org/2000/svg", "#renesis"); - doc.renesisinitialized = true; - } - return doc.createElement("s:" + local); - } else { - // throw - } - } - } - }-*/; - - private static native boolean hasAttributeNS(JavaScriptObject element, - String uri, String localName) /*-{ - return element.hasAttributeNS(uri, localName); - }-*/; - - private static native void setAttributeNS(JavaScriptObject element, - String uri, String localName, String value) /*-{ - element.setAttributeNS(uri, localName, value); - }-*/; - - @Override protected void addAttributesToElement(JavaScriptObject element, - HtmlAttributes attributes) throws SAXException { - try { - for (int i = 0; i < attributes.getLength(); i++) { - String localName = attributes.getLocalNameNoBoundsCheck(i); - String uri = attributes.getURINoBoundsCheck(i); - if (!hasAttributeNS(element, uri, localName)) { - setAttributeNS(element, uri, localName, - attributes.getValueNoBoundsCheck(i)); - } - } - } catch (JavaScriptException e) { - fatal(e); - } - } - - private static native void appendChild(JavaScriptObject parent, - JavaScriptObject child) /*-{ - parent.appendChild(child); - }-*/; - - private static native JavaScriptObject createTextNode(JavaScriptObject doc, - String text) /*-{ - return doc.createTextNode(text); - }-*/; - - private static native JavaScriptObject getLastChild(JavaScriptObject node) /*-{ - return node.lastChild; - }-*/; - - private static native void extendTextNode(JavaScriptObject node, String text) /*-{ - node.data += text; - }-*/; - - @Override protected void appendCharacters(JavaScriptObject parent, - String text) throws SAXException { - try { - if (parent == placeholder) { - appendChild(script, createTextNode(document, text)); - - } - JavaScriptObject lastChild = getLastChild(parent); - if (lastChild != null && getNodeType(lastChild) == 3) { - extendTextNode(lastChild, text); - return; - } - appendChild(parent, createTextNode(document, text)); - } catch (JavaScriptException e) { - fatal(e); - } - } - - private static native boolean hasChildNodes(JavaScriptObject element) /*-{ - return element.hasChildNodes(); - }-*/; - - private static native JavaScriptObject getFirstChild( - JavaScriptObject element) /*-{ - return element.firstChild; - }-*/; - - @Override protected void appendChildrenToNewParent( - JavaScriptObject oldParent, JavaScriptObject newParent) - throws SAXException { - try { - while (hasChildNodes(oldParent)) { - appendChild(newParent, getFirstChild(oldParent)); - } - } catch (JavaScriptException e) { - fatal(e); - } - } - - private static native JavaScriptObject createComment(JavaScriptObject doc, - String text) /*-{ - return doc.createComment(text); - }-*/; - - @Override protected void appendComment(JavaScriptObject parent, - String comment) throws SAXException { - try { - if (parent == placeholder) { - appendChild(script, createComment(document, comment)); - } - appendChild(parent, createComment(document, comment)); - } catch (JavaScriptException e) { - fatal(e); - } - } - - @Override protected void appendCommentToDocument(String comment) - throws SAXException { - try { - appendChild(document, createComment(document, comment)); - } catch (JavaScriptException e) { - fatal(e); - } - } - - private static native JavaScriptObject createElementNS( - JavaScriptObject doc, String ns, String local) /*-{ - return doc.createElementNS(ns, local); - }-*/; - - @Override protected JavaScriptObject createElement(String ns, String name, - HtmlAttributes attributes) throws SAXException { - try { - JavaScriptObject rv = createElementNS(document, ns, name); - for (int i = 0; i < attributes.getLength(); i++) { - setAttributeNS(rv, attributes.getURINoBoundsCheck(i), - attributes.getLocalNameNoBoundsCheck(i), - attributes.getValueNoBoundsCheck(i)); - } - - if ("script" == name) { - if (placeholder != null) { - scriptStack.addLast(new ScriptHolder(script, placeholder)); - } - script = rv; - placeholder = createElementNS(document, - "http://n.validator.nu/placeholder/", "script"); - rv = placeholder; - for (int i = 0; i < attributes.getLength(); i++) { - setAttributeNS(rv, attributes.getURINoBoundsCheck(i), - attributes.getLocalNameNoBoundsCheck(i), - attributes.getValueNoBoundsCheck(i)); - } - } - - return rv; - } catch (JavaScriptException e) { - fatal(e); - throw new RuntimeException("Unreachable"); - } - } - - @Override protected JavaScriptObject createHtmlElementSetAsRoot( - HtmlAttributes attributes) throws SAXException { - try { - JavaScriptObject rv = createElementNS(document, - "http://www.w3.org/1999/xhtml", "html"); - for (int i = 0; i < attributes.getLength(); i++) { - setAttributeNS(rv, attributes.getURINoBoundsCheck(i), - attributes.getLocalNameNoBoundsCheck(i), - attributes.getValueNoBoundsCheck(i)); - } - appendChild(document, rv); - return rv; - } catch (JavaScriptException e) { - fatal(e); - throw new RuntimeException("Unreachable"); - } - } - - private static native JavaScriptObject getParentNode( - JavaScriptObject element) /*-{ - return element.parentNode; - }-*/; - - @Override protected void appendElement(JavaScriptObject child, - JavaScriptObject newParent) throws SAXException { - try { - if (newParent == placeholder) { - appendChild(script, cloneNodeDeep(child)); - } - appendChild(newParent, child); - } catch (JavaScriptException e) { - fatal(e); - } - } - - @Override protected boolean hasChildren(JavaScriptObject element) - throws SAXException { - try { - return hasChildNodes(element); - } catch (JavaScriptException e) { - fatal(e); - throw new RuntimeException("Unreachable"); - } - } - - private static native void insertBeforeNative(JavaScriptObject parent, - JavaScriptObject child, JavaScriptObject sibling) /*-{ - parent.insertBefore(child, sibling); - }-*/; - - private static native int getNodeType(JavaScriptObject node) /*-{ - return node.nodeType; - }-*/; - - private static native JavaScriptObject cloneNodeDeep(JavaScriptObject node) /*-{ - return node.cloneNode(true); - }-*/; - - /** - * Returns the document. - * - * @return the document - */ - JavaScriptObject getDocument() { - JavaScriptObject rv = document; - document = null; - return rv; - } - - private static native JavaScriptObject createDocumentFragment( - JavaScriptObject doc) /*-{ - return doc.createDocumentFragment(); - }-*/; - - JavaScriptObject getDocumentFragment() { - JavaScriptObject rv = createDocumentFragment(document); - JavaScriptObject rootElt = getFirstChild(document); - while (hasChildNodes(rootElt)) { - appendChild(rv, getFirstChild(rootElt)); - } - document = null; - return rv; - } - - /** - * @see nu.validator.htmlparser.impl.TreeBuilder#createJavaScriptObject(String, - * java.lang.String, org.xml.sax.Attributes, java.lang.Object) - */ - @Override protected JavaScriptObject createElement(String ns, String name, - HtmlAttributes attributes, JavaScriptObject form) - throws SAXException { - try { - JavaScriptObject rv = createElement(ns, name, attributes); - // rv.setUserData("nu.validator.form-pointer", form, null); - return rv; - } catch (JavaScriptException e) { - fatal(e); - return null; - } - } - - /** - * @see nu.validator.htmlparser.impl.TreeBuilder#start() - */ - @Override protected void start(boolean fragment) throws SAXException { - script = null; - placeholder = null; - readyToRun = false; - } - - protected void documentMode(DocumentMode mode, String publicIdentifier, - String systemIdentifier) - throws SAXException { - // document.setUserData("nu.validator.document-mode", mode, null); - } - - /** - * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(java.lang.String, - * java.lang.String, java.lang.Object) - */ - @Override protected void elementPopped(String ns, String name, - JavaScriptObject node) throws SAXException { - if (node == placeholder) { - readyToRun = true; - requestSuspension(); - } - } - - private static native void replace(JavaScriptObject oldNode, - JavaScriptObject newNode) /*-{ - oldNode.parentNode.replaceChild(newNode, oldNode); - }-*/; - - private static native JavaScriptObject getPreviousSibling(JavaScriptObject node) /*-{ - return node.previousSibling; - }-*/; - - void maybeRunScript() { - if (readyToRun) { - readyToRun = false; - replace(placeholder, script); - if (scriptStack.isEmpty()) { - script = null; - placeholder = null; - } else { - ScriptHolder scriptHolder = scriptStack.removeLast(); - script = scriptHolder.getScript(); - placeholder = scriptHolder.getPlaceholder(); - } - } - } - - @Override protected void insertFosterParentedCharacters(String text, - JavaScriptObject table, JavaScriptObject stackParent) - throws SAXException { - try { - JavaScriptObject parent = getParentNode(table); - if (parent != null) { // always an element if not null - JavaScriptObject previousSibling = getPreviousSibling(table); - if (previousSibling != null - && getNodeType(previousSibling) == 3) { - extendTextNode(previousSibling, text); - return; - } - insertBeforeNative(parent, createTextNode(document, text), table); - return; - } - JavaScriptObject lastChild = getLastChild(stackParent); - if (lastChild != null && getNodeType(lastChild) == 3) { - extendTextNode(lastChild, text); - return; - } - appendChild(stackParent, createTextNode(document, text)); - } catch (JavaScriptException e) { - fatal(e); - } - } - - @Override protected void insertFosterParentedChild(JavaScriptObject child, - JavaScriptObject table, JavaScriptObject stackParent) - throws SAXException { - JavaScriptObject parent = getParentNode(table); - try { - if (parent != null && getNodeType(parent) == 1) { - insertBeforeNative(parent, child, table); - } else { - appendChild(stackParent, child); - } - } catch (JavaScriptException e) { - fatal(e); - } - } - - private static native void removeChild(JavaScriptObject parent, - JavaScriptObject child) /*-{ - parent.removeChild(child); - }-*/; - - @Override protected void detachFromParent(JavaScriptObject element) - throws SAXException { - try { - JavaScriptObject parent = getParentNode(element); - if (parent != null) { - removeChild(parent, element); - } - } catch (JavaScriptException e) { - fatal(e); - } - } -} diff --git a/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java b/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java deleted file mode 100644 index 1d71cdfd..00000000 --- a/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2007 Henri Sivonen - * Copyright (c) 2007-2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.gwt; - -import java.util.LinkedList; - -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.impl.ErrorReportingTokenizer; -import nu.validator.htmlparser.impl.Tokenizer; -import nu.validator.htmlparser.impl.UTF16Buffer; - -import org.xml.sax.ErrorHandler; -import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; - -import com.google.gwt.core.client.JavaScriptObject; -import com.google.gwt.user.client.Timer; - -/** - * This class implements an HTML5 parser that exposes data through the DOM - * interface. - * - *

By default, when using the constructor without arguments, the - * this parser treats XML 1.0-incompatible infosets as fatal errors. - * This corresponds to - * FATAL as the general XML violation policy. To make the parser - * support non-conforming HTML fully per the HTML 5 spec while on the other - * hand potentially violating the DOM API contract, set the general XML - * violation policy to ALLOW. This does not work with a standard - * DOM implementation. Handling all input without fatal errors and without - * violating the DOM API contract is possible by setting - * the general XML violation policy to ALTER_INFOSET. This - * makes the parser non-conforming but is probably the most useful - * setting for most applications. - * - *

The doctype is not represented in the tree. - * - *

The document mode is represented as user data DocumentMode - * object with the key nu.validator.document-mode on the document - * node. - * - *

The form pointer is also stored as user data with the key - * nu.validator.form-pointer. - * - * @version $Id: HtmlDocumentBuilder.java 255 2008-05-29 08:57:38Z hsivonen $ - * @author hsivonen - */ -public class HtmlParser { - - private static final int CHUNK_SIZE = 512; - - private final Tokenizer tokenizer; - - private final BrowserTreeBuilder domTreeBuilder; - - private final StringBuilder documentWriteBuffer = new StringBuilder(); - - private ErrorHandler errorHandler; - - private UTF16Buffer stream; - - private int streamLength; - - private boolean lastWasCR; - - private boolean ending; - - private ParseEndListener parseEndListener; - - private final LinkedList bufferStack = new LinkedList(); - - /** - * Instantiates the parser - * - * @param implementation - * the DOM implementation - * @param xmlPolicy the policy - */ - public HtmlParser(JavaScriptObject document) { - this.domTreeBuilder = new BrowserTreeBuilder(document); - this.tokenizer = new ErrorReportingTokenizer(domTreeBuilder); - this.domTreeBuilder.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); - this.tokenizer.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET); - this.tokenizer.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET); - this.tokenizer.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET); - this.tokenizer.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); - this.tokenizer.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET); - } - - /** - * Parses a document from a SAX InputSource. - * @param is the source - * @return the doc - * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource) - */ - public void parse(String source, ParseEndListener callback) throws SAXException { - parseEndListener = callback; - domTreeBuilder.setFragmentContext(null); - tokenize(source, null); - } - - /** - * @param is - * @throws SAXException - * @throws IOException - * @throws MalformedURLException - */ - private void tokenize(String source, String context) throws SAXException { - lastWasCR = false; - ending = false; - documentWriteBuffer.setLength(0); - streamLength = source.length(); - stream = new UTF16Buffer(source.toCharArray(), 0, - (streamLength < CHUNK_SIZE ? streamLength : CHUNK_SIZE)); - bufferStack.clear(); - push(stream); - domTreeBuilder.setFragmentContext(context == null ? null : context.intern()); - tokenizer.start(); - pump(); - } - - private void pump() throws SAXException { - if (ending) { - tokenizer.end(); - domTreeBuilder.getDocument(); // drops the internal reference - parseEndListener.parseComplete(); - // Don't schedule timeout - return; - } - - int docWriteLen = documentWriteBuffer.length(); - if (docWriteLen > 0) { - char[] newBuf = new char[docWriteLen]; - documentWriteBuffer.getChars(0, docWriteLen, newBuf, 0); - push(new UTF16Buffer(newBuf, 0, docWriteLen)); - documentWriteBuffer.setLength(0); - } - - for (;;) { - UTF16Buffer buffer = peek(); - if (!buffer.hasMore()) { - if (buffer == stream) { - if (buffer.getEnd() == streamLength) { - // Stop parsing - tokenizer.eof(); - ending = true; - break; - } else { - int newEnd = buffer.getStart() + CHUNK_SIZE; - buffer.setEnd(newEnd < streamLength ? newEnd - : streamLength); - continue; - } - } else { - pop(); - continue; - } - } - // now we have a non-empty buffer - buffer.adjust(lastWasCR); - lastWasCR = false; - if (buffer.hasMore()) { - lastWasCR = tokenizer.tokenizeBuffer(buffer); - domTreeBuilder.maybeRunScript(); - break; - } else { - continue; - } - } - - // schedule - Timer timer = new Timer() { - - @Override public void run() { - try { - pump(); - } catch (SAXException e) { - ending = true; - if (errorHandler != null) { - try { - errorHandler.fatalError(new SAXParseException( - e.getMessage(), null, null, -1, -1, e)); - } catch (SAXException e1) { - } - } - } - } - - }; - timer.schedule(1); - } - - private void push(UTF16Buffer buffer) { - bufferStack.addLast(buffer); - } - - private UTF16Buffer peek() { - return bufferStack.getLast(); - } - - private void pop() { - bufferStack.removeLast(); - } - - public void documentWrite(String text) throws SAXException { - UTF16Buffer buffer = new UTF16Buffer(text.toCharArray(), 0, text.length()); - while (buffer.hasMore()) { - buffer.adjust(lastWasCR); - lastWasCR = false; - if (buffer.hasMore()) { - lastWasCR = tokenizer.tokenizeBuffer(buffer); - domTreeBuilder.maybeRunScript(); - } - } - } - - /** - * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler) - */ - public void setErrorHandler(ErrorHandler errorHandler) { - this.errorHandler = errorHandler; - domTreeBuilder.setErrorHandler(errorHandler); - tokenizer.setErrorHandler(errorHandler); - } - - /** - * Sets whether comment nodes appear in the tree. - * @param ignoreComments true to ignore comments - * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) - */ - public void setIgnoringComments(boolean ignoreComments) { - domTreeBuilder.setIgnoringComments(ignoreComments); - } - - /** - * Sets whether the parser considers scripting to be enabled for noscript treatment. - * @param scriptingEnabled true to enable - * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) - */ - public void setScriptingEnabled(boolean scriptingEnabled) { - domTreeBuilder.setScriptingEnabled(scriptingEnabled); - } - -} diff --git a/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java b/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java deleted file mode 100644 index 255a02d1..00000000 --- a/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.gwt; - -import org.xml.sax.SAXException; - -import com.google.gwt.core.client.EntryPoint; -import com.google.gwt.core.client.JavaScriptObject; - -public class HtmlParserModule implements EntryPoint { - - private static native void zapChildren(JavaScriptObject node) /*-{ - while (node.hasChildNodes()) { - node.removeChild(node.lastChild); - } - }-*/; - - private static native void installDocWrite(JavaScriptObject doc, HtmlParser parser) /*-{ - doc.write = function() { - if (arguments.length == 0) { - return; - } - var text = arguments[0]; - for (var i = 1; i < arguments.length; i++) { - text += arguments[i]; - } - parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text); - } - doc.writeln = function() { - if (arguments.length == 0) { - parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)("\n"); - return; - } - var text = arguments[0]; - for (var i = 1; i < arguments.length; i++) { - text += arguments[i]; - } - text += "\n"; - parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text); - } - }-*/; - - @SuppressWarnings("unused") - private static void parseHtmlDocument(String source, JavaScriptObject document, JavaScriptObject readyCallback, JavaScriptObject errorHandler) throws SAXException { - if (readyCallback == null) { - readyCallback = JavaScriptObject.createFunction(); - } - zapChildren(document); - HtmlParser parser = new HtmlParser(document); - parser.setScriptingEnabled(true); - // XXX error handler - - installDocWrite(document, parser); - - parser.parse(source, new ParseEndListener(readyCallback)); - } - - private static native void exportEntryPoints() /*-{ - $wnd.parseHtmlDocument = @nu.validator.htmlparser.gwt.HtmlParserModule::parseHtmlDocument(Ljava/lang/String;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;); - }-*/; - - - public void onModuleLoad() { - exportEntryPoints(); - } - -} diff --git a/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java b/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java deleted file mode 100644 index 43235c5b..00000000 --- a/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.gwt; - -import com.google.gwt.core.client.JavaScriptObject; - -public class ParseEndListener { - - private final JavaScriptObject callback; - - /** - * @param callback - */ - public ParseEndListener(JavaScriptObject callback) { - this.callback = callback; - } - - public void parseComplete() { - call(callback); - } - - private static native void call(JavaScriptObject callback) /*-{ - callback(); - }-*/; - -} diff --git a/gwt-src/nu/validator/htmlparser/public/HtmlParser.html b/gwt-src/nu/validator/htmlparser/public/HtmlParser.html deleted file mode 100644 index 4d9cde81..00000000 --- a/gwt-src/nu/validator/htmlparser/public/HtmlParser.html +++ /dev/null @@ -1,225 +0,0 @@ - - - - Live DOM Viewer - - - - - - -

Live DOM Viewer

-

Markup to test (, upload, download, hide):

-

-

DOM view (hide, refresh):

-
    -

    Rendered view: (hide):

    -

    -

    innerHTML view: (show, refresh):

    - -

    Log: (hide):

    -
    Script not loaded.
    - -

    This script puts a function w(s) into the - global scope of the test page, where s is a string to - output to the log. Also, five files are accessible in the current - directory for test purposes: image (a GIF image), - flash (a Flash file), script (a JS file), - style (a CSS file), and document (an HTML - file).

    - - \ No newline at end of file diff --git a/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt b/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt deleted file mode 100644 index bd2f4fcf..00000000 --- a/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt +++ /dev/null @@ -1,25 +0,0 @@ -From: -http://software.hixie.ch/utilities/js/live-dom-viewer/LICENSE -regarding the upstream of HtmlParser.html: - -The MIT License - -Copyright (c) 2000, 2006, 2008 Ian Hickson and various contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/gwt-src/nu/validator/htmlparser/public/blank.html b/gwt-src/nu/validator/htmlparser/public/blank.html deleted file mode 100644 index a8756c9f..00000000 --- a/gwt-src/nu/validator/htmlparser/public/blank.html +++ /dev/null @@ -1,2 +0,0 @@ - - \ No newline at end of file From db75e902da2701dc41bb514081c6ccd7ce5fa4a2 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:58:16 +0200 Subject: [PATCH 05/30] Clean up: mozilla-export-scripts --- mozilla-export-scripts/README.txt | 25 -------- mozilla-export-scripts/export-all.sh | 24 ------- mozilla-export-scripts/export-java-srcs.sh | 25 -------- mozilla-export-scripts/export-translator.sh | 24 ------- mozilla-export-scripts/make-translator-jar.sh | 63 ------------------- mozilla-export-scripts/util.sh | 23 ------- 6 files changed, 184 deletions(-) delete mode 100644 mozilla-export-scripts/README.txt delete mode 100644 mozilla-export-scripts/export-all.sh delete mode 100644 mozilla-export-scripts/export-java-srcs.sh delete mode 100644 mozilla-export-scripts/export-translator.sh delete mode 100644 mozilla-export-scripts/make-translator-jar.sh delete mode 100644 mozilla-export-scripts/util.sh diff --git a/mozilla-export-scripts/README.txt b/mozilla-export-scripts/README.txt deleted file mode 100644 index 3567b846..00000000 --- a/mozilla-export-scripts/README.txt +++ /dev/null @@ -1,25 +0,0 @@ -These scripts export the Java-to-C++ translator and the java source files that -implement the HTML5 parser. The exported translator may be used (with no -external dependencies) to translate the exported java source files into Gecko- -compatible C++. - -Hacking the translator itself still requires a working copy of the Java HTML5 -parser repository, but hacking the parser (modifying the Java source files and -performing the translation) should now be possible using only files committed -to the mozilla source tree. - -Run any of these scripts without arguments to receive usage instructions. - - make-translator-jar.sh: compiles the Java-to-C++ translator into a .jar file - export-java-srcs.sh: exports minimal java source files implementing the - HTML5 parser - export-translator.sh: exports the compiled translator and javaparser.jar - export-all.sh: runs the previous two scripts - util.sh: provides various shell utility functions to the - scripts listed above (does nothing if run directly) - -All path arguments may be either absolute or relative. This includes the path -to the script itself ($0), so the directory from which you run these scripts -doesn't matter. - -Ben Newman (7 July 2009) diff --git a/mozilla-export-scripts/export-all.sh b/mozilla-export-scripts/export-all.sh deleted file mode 100644 index 9ae07d33..00000000 --- a/mozilla-export-scripts/export-all.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env sh - -SCRIPT_DIR=`dirname $0` -source $SCRIPT_DIR/util.sh -SCRIPT_DIR=`abs $SCRIPT_DIR` - -if [ $# -eq 1 ] -then - MOZ_PARSER_PATH=`abs $1` -else - echo - echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html" - echo "Note that relative paths will work just fine." - echo - exit 1 -fi - -$SCRIPT_DIR/export-translator.sh $MOZ_PARSER_PATH -$SCRIPT_DIR/export-java-srcs.sh $MOZ_PARSER_PATH - -echo -echo "Now go to $MOZ_PARSER_PATH and run" -echo " java -jar javalib/translator.jar javasrc . nsHtml5AtomList.h" -echo diff --git a/mozilla-export-scripts/export-java-srcs.sh b/mozilla-export-scripts/export-java-srcs.sh deleted file mode 100644 index 6d32b07d..00000000 --- a/mozilla-export-scripts/export-java-srcs.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env sh - -SCRIPT_DIR=`dirname $0` -source $SCRIPT_DIR/util.sh -SCRIPT_DIR=`abs $SCRIPT_DIR` - -SRCDIR=`abs $SCRIPT_DIR/../src/nu/validator/htmlparser/impl` - -if [ $# -eq 1 ] -then - MOZ_PARSER_PATH=`abs $1` -else - echo - echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html" - echo "Note that relative paths will work just fine." - echo - exit 1 -fi - -SRCTARGET=$MOZ_PARSER_PATH/javasrc - -rm -rf $SRCTARGET -mkdir $SRCTARGET -# Avoid copying the .svn directory: -cp -rv $SRCDIR/*.java $SRCTARGET diff --git a/mozilla-export-scripts/export-translator.sh b/mozilla-export-scripts/export-translator.sh deleted file mode 100644 index d1f4f1c3..00000000 --- a/mozilla-export-scripts/export-translator.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env sh - -SCRIPT_DIR=`dirname $0` -source $SCRIPT_DIR/util.sh -SCRIPT_DIR=`abs $SCRIPT_DIR` - -LIBDIR=`abs $SCRIPT_DIR/../translator-lib` - -if [ $# -eq 1 ] -then - MOZ_PARSER_PATH=`abs $1` -else - echo - echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html" - echo "Note that relative paths will work just fine." - echo "Be sure that you have run `dirname $0`/make-translator-jar.sh before running this script." - echo - exit 1 -fi - -LIBTARGET=$MOZ_PARSER_PATH/javalib - -rm -rf $LIBTARGET -cp -rv $LIBDIR $LIBTARGET diff --git a/mozilla-export-scripts/make-translator-jar.sh b/mozilla-export-scripts/make-translator-jar.sh deleted file mode 100644 index 4f21ae66..00000000 --- a/mozilla-export-scripts/make-translator-jar.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env sh - -SCRIPT_DIR=`dirname $0` -source $SCRIPT_DIR/util.sh -SCRIPT_DIR=`abs $SCRIPT_DIR` - -SRCDIR=`abs $SCRIPT_DIR/../translator-src` -BINDIR=`abs $SCRIPT_DIR/../translator-bin` -LIBDIR=`abs $SCRIPT_DIR/../translator-lib` - -if [ $# -eq 1 ] -then - JAVAPARSER_JAR_PATH=`abs $1` -else - echo - echo "Usage: sh `basename $0` /path/to/javaparser-1.0.7.jar" - echo "Note that relative paths will work just fine." - echo "Obtain javaparser-1.0.7.jar from http://code.google.com/p/javaparser" - echo - exit 1 -fi - -set_up() { - rm -rf $BINDIR; mkdir $BINDIR - rm -rf $LIBDIR; mkdir $LIBDIR - cp $JAVAPARSER_JAR_PATH $LIBDIR/javaparser.jar -} - -write_manifest() { - rm -f $LIBDIR/manifest - echo "Main-Class: nu.validator.htmlparser.cpptranslate.Main" > $LIBDIR/manifest - echo "Class-Path: javaparser.jar" >> $LIBDIR/manifest -} - -compile_translator() { - find $SRCDIR -name "*.java" | \ - xargs javac -cp $LIBDIR/javaparser.jar -g -d $BINDIR -} - -generate_jar() { - jar cvfm $LIBDIR/translator.jar $LIBDIR/manifest -C $BINDIR . -} - -clean_up() { - rm -f $LIBDIR/manifest -} - -success_message() { - echo - echo "Successfully generated directory \"$LIBDIR\" with contents:" - echo - ls -al $LIBDIR - echo - echo "Now run `dirname $0`/export-all.sh with no arguments and follow the usage instructions." - echo -} - -set_up && \ - compile_translator && \ - write_manifest && \ - generate_jar && \ - clean_up && \ - success_message diff --git a/mozilla-export-scripts/util.sh b/mozilla-export-scripts/util.sh deleted file mode 100644 index 348ca14f..00000000 --- a/mozilla-export-scripts/util.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env sh - -abs() { - local rel - local p - if [ $# -ne 1 ] - then - rel=. - else - rel=$1 - fi - if [ -d $rel ] - then - pushd $rel > /dev/null - p=`pwd` - popd > /dev/null - else - pushd `dirname $rel` > /dev/null - p=`pwd`/`basename $rel` - popd > /dev/null - fi - echo $p -} From 0a18810fdec4b75bd4db53a6378b41f924449dcd Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:58:19 +0200 Subject: [PATCH 06/30] Clean up: ruby-gcj --- ruby-gcj/DomUtils.java | 36 ------ ruby-gcj/README | 65 ----------- ruby-gcj/Rakefile | 77 ------------- ruby-gcj/extconf.rb | 45 -------- ruby-gcj/test/domencoding.rb | 5 - ruby-gcj/test/fonts.rb | 11 -- ruby-gcj/test/google.html | 10 -- ruby-gcj/test/greek.xml | 2 - ruby-gcj/validator.cpp | 210 ----------------------------------- 9 files changed, 461 deletions(-) delete mode 100644 ruby-gcj/DomUtils.java delete mode 100644 ruby-gcj/README delete mode 100644 ruby-gcj/Rakefile delete mode 100644 ruby-gcj/extconf.rb delete mode 100644 ruby-gcj/test/domencoding.rb delete mode 100644 ruby-gcj/test/fonts.rb delete mode 100644 ruby-gcj/test/google.html delete mode 100644 ruby-gcj/test/greek.xml delete mode 100644 ruby-gcj/validator.cpp diff --git a/ruby-gcj/DomUtils.java b/ruby-gcj/DomUtils.java deleted file mode 100644 index dc43da83..00000000 --- a/ruby-gcj/DomUtils.java +++ /dev/null @@ -1,36 +0,0 @@ -import java.util.HashSet; -import org.w3c.dom.Document; -import org.w3c.dom.Node; -import org.w3c.dom.Element; - -public class DomUtils { - - private static HashSet pinned_list = new HashSet(); - - public static synchronized void pin(Document d) { - pinned_list.add(d); - } - - public static synchronized void unpin(Document d) { - pinned_list.remove(d); - } - - // return all the text content contained by a single element - public static void getElementContent(Element e, StringBuffer b) { - for (Node n = e.getFirstChild(); n!=null; n=n.getNextSibling()) { - if (n.getNodeType() == n.TEXT_NODE) { - b.append(n.getNodeValue()); - } else if (n.getNodeType() == n.ELEMENT_NODE) { - getElementContent((Element) e, b); - } - } - } - - // replace all child nodes of a given element with a single text element - public static void setElementContent(Element e, String s) { - while (e.hasChildNodes()) { - e.removeChild(e.getFirstChild()); - } - e.appendChild(e.getOwnerDocument().createTextNode(s)); - } -} diff --git a/ruby-gcj/README b/ruby-gcj/README deleted file mode 100644 index b368437f..00000000 --- a/ruby-gcj/README +++ /dev/null @@ -1,65 +0,0 @@ -Disclaimer: - - This code is experimental. - - When some people say experimental, they mean "it may not do what it is - intended to do; in fact, it might even wipe out your hard drive". I mean - that too. But I mean something more than that. - - In this case, experimental means that I don't even know what it is intended - to do. I just have a vague vision, and I am trying out various things in - the hopes that one of them will work out. - -Vision: - - My vague vision is that I would like to see HTML 5 be a success. For me to - consider it to be a success, it needs to be a standard, be interoperable, - and be ubiquitous. - - I believe that the Validator.nu parser can be used to bootstrap that - process. It is written in Java. Has been compiled into JavaScript. Has - been translated into C++ based on the Mozilla libraries with the intent of - being included in Firefox. It very closely tracks to the standard. - - For the moment, the effort is on extending that to another language (Ruby) - on a single environment (i.e., Linux). Once that is complete, intent is to - evaluate the results, decide what needs to be changed, and what needs to be - done to support other languages and environments. - - The bar I'm setting for myself isn't just another SWIG generated low level - interface to a DOM, but rather a best of breed interface; which for Ruby - seems to be the one pioneered by Hpricot and adopted by Nokogiri. Success - will mean passing all of the tests from one of those two parsers as well as - all of the HTML5 tests. - -Build instructions: - - You'll need icu4j and chardet jars. If you checked out and ran dldeps you - are already all set: - - svn co http://svn.versiondude.net/whattf/build/trunk/ build - python build/build.py checkout dldeps - - Fedora 11: - - yum install ruby-devel rubygem-rake java-1.5.0-gcj-devel gcc-c++ - - Ubuntu 9.04: - - apt-get install ruby ruby1.8-dev rake gcj g++ - - Also at this time, you need to install a jdk (e.g. sun-java6-jdk), simply - because the javac that comes with gcj doesn't support -sourcepath, and - I haven't spent the time to find a replacement. - - Finally, make sure that libjaxp1.3-java is *not* installed. - - http://gcc.gnu.org/ml/java/2009-06/msg00055.html - - If this is done, you should be all set. - - cd htmlparser/ruby-gcj - rake test - - If things are successful, the last lines of the output will list the - font attributes and values found in the test/google.html file. diff --git a/ruby-gcj/Rakefile b/ruby-gcj/Rakefile deleted file mode 100644 index 7b518025..00000000 --- a/ruby-gcj/Rakefile +++ /dev/null @@ -1,77 +0,0 @@ -deps = ENV['deps'] || '../../dependencies' -icu4j = "#{deps}/icu4j-4_0.jar" -chardet = "#{deps}/mozilla/intl/chardet/java/dist/lib/chardet.jar" -libgcj = Dir['/usr/share/java/libgcj*.jar'].grep(/gcj[-\d.]*jar$/).sort.last - -task :default => %w(headers libs Makefile validator.so) - -# headers - -hdb = 'nu/validator/htmlparser/dom/HtmlDocumentBuilder' -task :headers => %W(headers/DomUtils.h headers/#{hdb}.h) - -file 'headers/DomUtils.h' => 'DomUtils.java' do |t| - mkdir_p %w(classes headers), :verbose => false - sh "javac -d classes #{t.prerequisites.first}" - sh "gcjh -force -o #{t.name} -cp #{libgcj}:classes DomUtils" -end - -file "headers/#{hdb}.h" => "../src/#{hdb}.java" do |t| - mkdir_p %w(classes headers), :verbose => false - sh "javac -cp #{icu4j}:#{chardet} -d classes -sourcepath ../src " + - t.prerequisites.first - sh "gcjh -force -cp classes -o #{t.name} -cp #{libgcj}:classes " + - hdb.gsub('/','.') -end - -# libs - -task :libs => %w(htmlparser chardet icu).map {|name| "lib/libnu-#{name}.so"} - -htmlparser = Dir['../src/**/*.java'].reject {|name| name.include? '/xom/'} -file 'lib/libnu-htmlparser.so' => htmlparser + ['DomUtils.java'] do |t| - mkdir_p 'lib', :verbose => false - sh "gcj -shared --classpath=#{icu4j}:#{chardet} -fPIC " + - "-o #{t.name} #{t.prerequisites.join(' ')}" -end - -file 'lib/libnu-chardet.so' => chardet do |t| - mkdir_p 'lib', :verbose => false - sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}" -end - -file 'lib/libnu-icu.so' => icu4j do |t| - mkdir_p 'lib', :verbose => false - sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}" -end - -# module - -file 'Makefile' do - sh "ruby extconf.rb --with-gcj=#{libgcj}" -end - -file 'validator.so' => %w(Makefile validator.cpp headers/DomUtils.h) do - system 'make' -end - -file 'nu/validator.so' do - mkdir_p 'nu', :verbose => false - system 'ln -s -t nu ../validator.so' -end - -# tasks - -task :test => [:default, 'nu/validator.so'] do - ENV['LD_LIBRARY_PATH']='lib' - sh 'ruby test/fonts.rb test/google.html' -end - -task :clean do - rm_rf %W(classes lib nu mkmf.log headers/DomUtils.h headers/#{hdb}.h) + - Dir['*.o'] + Dir['*.so'] -end - -task :clobber => :clean do - rm_rf %w(headers Makefile) -end diff --git a/ruby-gcj/extconf.rb b/ruby-gcj/extconf.rb deleted file mode 100644 index 415cf430..00000000 --- a/ruby-gcj/extconf.rb +++ /dev/null @@ -1,45 +0,0 @@ -require 'mkmf' - -# system dependencies -gcj = with_config('gcj', '/usr/share/java/libgcj.jar') - -# headers for JAXP -CONFIG['CC'] = 'g++' -with_cppflags('-xc++') do - - unless find_header('org/w3c/dom/Document.h', 'headers') - - `jar tf #{gcj}`.split.each do |file| - next unless file =~ /\.class$/ - next unless file =~ /^(javax|org)\/(w3c|xml)/ - next if file.include? '$' - - dest = 'headers/' + file.sub(/\.class$/,'.h') - name = file.sub(/\.class$/,'').gsub('/','.') - - next if File.exist? dest - - cmd = "gcjh -cp #{gcj} -o #{dest} #{name}" - puts cmd - break unless system cmd - system "ruby -pi -e '$_.sub!(/namespace namespace$/," + - "\"namespace namespace$\")' #{dest}" - system "ruby -pi -e '$_.sub!(/::namespace::/," + - "\"::namespace$::\")' #{dest}" - end - - exit unless find_header('org/w3c/dom/Document.h', 'headers') - end - - find_header 'nu/validator/htmlparser/dom/HtmlDocumentBuilder.h', 'headers' -end - -# Java libraries -Config::CONFIG['CC'] = 'g++ -shared' -dir_config('nu-htmlparser', nil, 'lib') -have_library 'nu-htmlparser' -have_library 'nu-icu' -have_library 'nu-chardet' - -# Ruby library -create_makefile 'nu/validator' diff --git a/ruby-gcj/test/domencoding.rb b/ruby-gcj/test/domencoding.rb deleted file mode 100644 index 1beb94c1..00000000 --- a/ruby-gcj/test/domencoding.rb +++ /dev/null @@ -1,5 +0,0 @@ -require 'nu/validator' - -ARGV.each do |arg| - puts Nu::Validator::parse(open(arg)).root.name -end diff --git a/ruby-gcj/test/fonts.rb b/ruby-gcj/test/fonts.rb deleted file mode 100644 index 595e3ae0..00000000 --- a/ruby-gcj/test/fonts.rb +++ /dev/null @@ -1,11 +0,0 @@ -require 'nu/validator' -require 'open-uri' - -ARGV.each do |arg| - doc = Nu::Validator::parse(open(arg)) - doc.xpath("//*[local-name()='font']").each do |font| - font.attributes.each do |name, attr| - puts "#{name} => #{attr.value}" - end - end -end diff --git a/ruby-gcj/test/google.html b/ruby-gcj/test/google.html deleted file mode 100644 index 8d2183b2..00000000 --- a/ruby-gcj/test/google.html +++ /dev/null @@ -1,10 +0,0 @@ -Google



     
      Advanced Search
      Preferences
      Language Tools

    Find an opportunity to volunteer in your community today.


    Advertising Programs - Business Solutions - About Google

    ©2009 - Privacy

    \ No newline at end of file diff --git a/ruby-gcj/test/greek.xml b/ruby-gcj/test/greek.xml deleted file mode 100644 index a14d23eb..00000000 --- a/ruby-gcj/test/greek.xml +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/ruby-gcj/validator.cpp b/ruby-gcj/validator.cpp deleted file mode 100644 index aadd24ab..00000000 --- a/ruby-gcj/validator.cpp +++ /dev/null @@ -1,210 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "nu/validator/htmlparser/dom/HtmlDocumentBuilder.h" - -#include "DomUtils.h" - -#include "ruby.h" - -using namespace java::io; -using namespace java::lang; -using namespace java::util; -using namespace javax::xml::parsers; -using namespace javax::xml::xpath; -using namespace nu::validator::htmlparser::dom; -using namespace org::w3c::dom; -using namespace org::xml::sax; - -static VALUE jaxp_Document; -static VALUE jaxp_Attr; -static VALUE jaxp_Element; -static ID ID_read; -static ID ID_doc; -static ID ID_element; - -// convert a Java string into a Ruby string -static VALUE j2r(String *string) { - if (string == NULL) return Qnil; - jint len = JvGetStringUTFLength(string); - char buf[len]; - JvGetStringUTFRegion(string, 0, len, buf); - return rb_str_new(buf, len); -} - -// convert a Ruby string into a Java string -static String *r2j(VALUE string) { - return JvNewStringUTF(RSTRING(string)->ptr); -} - -// release the Java Document associated with this Ruby Document -static void vnu_document_free(Document *doc) { - DomUtils::unpin(doc); -} - -// Nu::Validator::parse( string|file ) -static VALUE vnu_parse(VALUE self, VALUE input) { - HtmlDocumentBuilder *parser = new HtmlDocumentBuilder(); - - // read file-like objects into memory. TODO: buffer such objects - if (rb_respond_to(input, ID_read)) - input = rb_funcall(input, ID_read, 0); - - // convert input in to a ByteArrayInputStream - jbyteArray bytes = JvNewByteArray(RSTRING(input)->len); - memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len); - InputSource *source = new InputSource(new ByteArrayInputStream(bytes)); - - // parse, pin, and wrap - Document *doc = parser->parse(source); - DomUtils::pin(doc); - return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc); -} - -// Jaxp::parse( string|file ) -static VALUE jaxp_parse(VALUE self, VALUE input) { - DocumentBuilderFactory *factory = DocumentBuilderFactory::newInstance(); - DocumentBuilder *parser = factory->newDocumentBuilder(); - - // read file-like objects into memory. TODO: buffer such objects - if (rb_respond_to(input, ID_read)) - input = rb_funcall(input, ID_read, 0); - - try { - jbyteArray bytes = JvNewByteArray(RSTRING(input)->len); - memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len); - Document *doc = parser->parse(new ByteArrayInputStream(bytes)); - DomUtils::pin(doc); - return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc); - } catch (java::lang::Throwable *ex) { - ex->printStackTrace(); - return Qnil; - } -} - - -// Nu::Validator::Document#encoding -static VALUE jaxp_document_encoding(VALUE rdoc) { - Document *jdoc; - Data_Get_Struct(rdoc, Document, jdoc); - return j2r(jdoc->getXmlEncoding()); -} - -// Nu::Validator::Document#root -static VALUE jaxp_document_root(VALUE rdoc) { - Document *jdoc; - Data_Get_Struct(rdoc, Document, jdoc); - - Element *jelement = jdoc->getDocumentElement(); - if (jelement==NULL) return Qnil; - - VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, jelement); - rb_ivar_set(relement, ID_doc, rdoc); - return relement; -} - -// Nu::Validator::Document#xpath -static VALUE jaxp_document_xpath(VALUE rdoc, VALUE path) { - Document *jdoc; - Data_Get_Struct(rdoc, Document, jdoc); - - Element *jelement = jdoc->getDocumentElement(); - if (jelement==NULL) return Qnil; - - XPath *xpath = XPathFactory::newInstance()->newXPath(); - XPathExpression *expr = xpath->compile(r2j(path)); - NodeList *list = (NodeList*) expr->evaluate(jdoc, XPathConstants::NODESET); - - VALUE result = rb_ary_new(); - for (int i=0; igetLength(); i++) { - VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, list->item(i)); - rb_ivar_set(relement, ID_doc, rdoc); - rb_ary_push(result, relement); - } - return result; -} - -// Nu::Validator::Element#name -static VALUE jaxp_element_name(VALUE relement) { - Element *jelement; - Data_Get_Struct(relement, Element, jelement); - return j2r(jelement->getNodeName()); -} - -// Nu::Validator::Element#attributes -static VALUE jaxp_element_attributes(VALUE relement) { - Element *jelement; - Data_Get_Struct(relement, Element, jelement); - VALUE result = rb_hash_new(); - NamedNodeMap *map = jelement->getAttributes(); - for (int i=0; igetLength(); i++) { - Attr *jattr = (Attr *) map->item(i); - VALUE rattr = Data_Wrap_Struct(jaxp_Attr, NULL, NULL, jattr); - rb_ivar_set(rattr, ID_element, relement); - rb_hash_aset(result, j2r(jattr->getName()), rattr); - } - return result; -} - -// Nu::Validator::Attribute#value -static VALUE jaxp_attribute_value(VALUE rattribute) { - Attr *jattribute; - Data_Get_Struct(rattribute, Attr, jattribute); - return j2r(jattribute->getValue()); -} - -typedef VALUE (ruby_method)(...); - -// Nu::Validator module initialization -extern "C" void Init_validator() { - JvCreateJavaVM(NULL); - JvAttachCurrentThread(NULL, NULL); - JvInitClass(&DomUtils::class$); - JvInitClass(&XPathFactory::class$); - JvInitClass(&XPathConstants::class$); - - VALUE jaxp = rb_define_module("Jaxp"); - rb_define_singleton_method(jaxp, "parse", (ruby_method*)&jaxp_parse, 1); - - VALUE nu = rb_define_module("Nu"); - VALUE validator = rb_define_module_under(nu, "Validator"); - rb_define_singleton_method(validator, "parse", (ruby_method*)&vnu_parse, 1); - - jaxp_Document = rb_define_class_under(jaxp, "Document", rb_cObject); - rb_define_method(jaxp_Document, "encoding", - (ruby_method*)&jaxp_document_encoding, 0); - rb_define_method(jaxp_Document, "root", - (ruby_method*)&jaxp_document_root, 0); - rb_define_method(jaxp_Document, "xpath", - (ruby_method*)&jaxp_document_xpath, 1); - - jaxp_Element = rb_define_class_under(jaxp, "Element", rb_cObject); - rb_define_method(jaxp_Element, "name", - (ruby_method*)&jaxp_element_name, 0); - rb_define_method(jaxp_Element, "attributes", - (ruby_method*)&jaxp_element_attributes, 0); - - jaxp_Attr = rb_define_class_under(jaxp, "Attr", rb_cObject); - rb_define_method(jaxp_Attr, "value", - (ruby_method*)&jaxp_attribute_value, 0); - - ID_read = rb_intern("read"); - ID_doc = rb_intern("@doc"); - ID_element = rb_intern("@element"); -} From d6df8ad7dd90ad169d8a5a3d4dda3163e106fc20 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:58:23 +0200 Subject: [PATCH 07/30] Clean up: super --- .../translatable/java/io/IOException.java | 42 --- .../translatable/org/xml/sax/Attributes.java | 257 --------------- .../org/xml/sax/ErrorHandler.java | 139 -------- .../translatable/org/xml/sax/Locator.java | 136 -------- .../org/xml/sax/SAXException.java | 153 --------- .../org/xml/sax/SAXParseException.java | 269 ---------------- .../translatable/org/xml/sax/package.html | 297 ------------------ 7 files changed, 1293 deletions(-) delete mode 100644 super/nu/validator/htmlparser/translatable/java/io/IOException.java delete mode 100644 super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java delete mode 100644 super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java delete mode 100644 super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java delete mode 100644 super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java delete mode 100644 super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java delete mode 100644 super/nu/validator/htmlparser/translatable/org/xml/sax/package.html diff --git a/super/nu/validator/htmlparser/translatable/java/io/IOException.java b/super/nu/validator/htmlparser/translatable/java/io/IOException.java deleted file mode 100644 index f323f1e3..00000000 --- a/super/nu/validator/htmlparser/translatable/java/io/IOException.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2009 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package java.io; - -public class IOException extends Exception { - - public IOException() { - } - - public IOException(String arg0) { - super(arg0); - } - - public IOException(Throwable arg0) { - super(arg0); - } - - public IOException(String arg0, Throwable arg1) { - super(arg0, arg1); - } - -} diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java b/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java deleted file mode 100644 index b25432d4..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java +++ /dev/null @@ -1,257 +0,0 @@ -// Attributes.java - attribute list with Namespace support -// http://www.saxproject.org -// Written by David Megginson -// NO WARRANTY! This class is in the public domain. -// $Id: Attributes.java,v 1.13 2004/03/18 12:28:05 dmegginson Exp $ - -package org.xml.sax; - - -/** - * Interface for a list of XML attributes. - * - *
    - * This module, both source code and documentation, is in the - * Public Domain, and comes with NO WARRANTY. - * See http://www.saxproject.org - * for further information. - *
    - * - *

    This interface allows access to a list of attributes in - * three different ways:

    - * - *
      - *
    1. by attribute index;
    2. - *
    3. by Namespace-qualified name; or
    4. - *
    5. by qualified (prefixed) name.
    6. - *
    - * - *

    The list will not contain attributes that were declared - * #IMPLIED but not specified in the start tag. It will also not - * contain attributes used as Namespace declarations (xmlns*) unless - * the http://xml.org/sax/features/namespace-prefixes - * feature is set to true (it is false by - * default). - * Because SAX2 conforms to the original "Namespaces in XML" - * recommendation, it normally does not - * give namespace declaration attributes a namespace URI. - *

    - * - *

    Some SAX2 parsers may support using an optional feature flag - * (http://xml.org/sax/features/xmlns-uris) to request - * that those attributes be given URIs, conforming to a later - * backwards-incompatible revision of that recommendation. (The - * attribute's "local name" will be the prefix, or "xmlns" when - * defining a default element namespace.) For portability, handler - * code should always resolve that conflict, rather than requiring - * parsers that can change the setting of that feature flag.

    - * - *

    If the namespace-prefixes feature (see above) is - * false, access by qualified name may not be available; if - * the http://xml.org/sax/features/namespaces feature is - * false, access by Namespace-qualified names may not be - * available.

    - * - *

    This interface replaces the now-deprecated SAX1 {@link - * org.xml.sax.AttributeList AttributeList} interface, which does not - * contain Namespace support. In addition to Namespace support, it - * adds the getIndex methods (below).

    - * - *

    The order of attributes in the list is unspecified, and will - * vary from implementation to implementation.

    - * - * @since SAX 2.0 - * @author David Megginson - * @version 2.0.1 (sax2r2) - * @see org.xml.sax.helpers.AttributesImpl - * @see org.xml.sax.ext.DeclHandler#attributeDecl - */ -public interface Attributes -{ - - - //////////////////////////////////////////////////////////////////// - // Indexed access. - //////////////////////////////////////////////////////////////////// - - - /** - * Return the number of attributes in the list. - * - *

    Once you know the number of attributes, you can iterate - * through the list.

    - * - * @return The number of attributes in the list. - * @see #getURI(int) - * @see #getLocalName(int) - * @see #getQName(int) - * @see #getType(int) - * @see #getValue(int) - */ - public abstract int getLength (); - - - /** - * Look up an attribute's Namespace URI by index. - * - * @param index The attribute index (zero-based). - * @return The Namespace URI, or the empty string if none - * is available, or null if the index is out of - * range. - * @see #getLength - */ - public abstract String getURI (int index); - - - /** - * Look up an attribute's local name by index. - * - * @param index The attribute index (zero-based). - * @return The local name, or the empty string if Namespace - * processing is not being performed, or null - * if the index is out of range. - * @see #getLength - */ - public abstract String getLocalName (int index); - - - /** - * Look up an attribute's XML qualified (prefixed) name by index. - * - * @param index The attribute index (zero-based). - * @return The XML qualified name, or the empty string - * if none is available, or null if the index - * is out of range. - * @see #getLength - */ - public abstract String getQName (int index); - - - /** - * Look up an attribute's type by index. - * - *

    The attribute type is one of the strings "CDATA", "ID", - * "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", - * or "NOTATION" (always in upper case).

    - * - *

    If the parser has not read a declaration for the attribute, - * or if the parser does not report attribute types, then it must - * return the value "CDATA" as stated in the XML 1.0 Recommendation - * (clause 3.3.3, "Attribute-Value Normalization").

    - * - *

    For an enumerated attribute that is not a notation, the - * parser will report the type as "NMTOKEN".

    - * - * @param index The attribute index (zero-based). - * @return The attribute's type as a string, or null if the - * index is out of range. - * @see #getLength - */ - public abstract String getType (int index); - - - /** - * Look up an attribute's value by index. - * - *

    If the attribute value is a list of tokens (IDREFS, - * ENTITIES, or NMTOKENS), the tokens will be concatenated - * into a single string with each token separated by a - * single space.

    - * - * @param index The attribute index (zero-based). - * @return The attribute's value as a string, or null if the - * index is out of range. - * @see #getLength - */ - public abstract String getValue (int index); - - - - //////////////////////////////////////////////////////////////////// - // Name-based query. - //////////////////////////////////////////////////////////////////// - - - /** - * Look up the index of an attribute by Namespace name. - * - * @param uri The Namespace URI, or the empty string if - * the name has no Namespace URI. - * @param localName The attribute's local name. - * @return The index of the attribute, or -1 if it does not - * appear in the list. - */ - public int getIndex (String uri, String localName); - - - /** - * Look up the index of an attribute by XML qualified (prefixed) name. - * - * @param qName The qualified (prefixed) name. - * @return The index of the attribute, or -1 if it does not - * appear in the list. - */ - public int getIndex (String qName); - - - /** - * Look up an attribute's type by Namespace name. - * - *

    See {@link #getType(int) getType(int)} for a description - * of the possible types.

    - * - * @param uri The Namespace URI, or the empty String if the - * name has no Namespace URI. - * @param localName The local name of the attribute. - * @return The attribute type as a string, or null if the - * attribute is not in the list or if Namespace - * processing is not being performed. - */ - public abstract String getType (String uri, String localName); - - - /** - * Look up an attribute's type by XML qualified (prefixed) name. - * - *

    See {@link #getType(int) getType(int)} for a description - * of the possible types.

    - * - * @param qName The XML qualified name. - * @return The attribute type as a string, or null if the - * attribute is not in the list or if qualified names - * are not available. - */ - public abstract String getType (String qName); - - - /** - * Look up an attribute's value by Namespace name. - * - *

    See {@link #getValue(int) getValue(int)} for a description - * of the possible values.

    - * - * @param uri The Namespace URI, or the empty String if the - * name has no Namespace URI. - * @param localName The local name of the attribute. - * @return The attribute value as a string, or null if the - * attribute is not in the list. - */ - public abstract String getValue (String uri, String localName); - - - /** - * Look up an attribute's value by XML qualified (prefixed) name. - * - *

    See {@link #getValue(int) getValue(int)} for a description - * of the possible values.

    - * - * @param qName The XML qualified name. - * @return The attribute value as a string, or null if the - * attribute is not in the list or if qualified names - * are not available. - */ - public abstract String getValue (String qName); - -} - -// end of Attributes.java diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java b/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java deleted file mode 100644 index 37d25014..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java +++ /dev/null @@ -1,139 +0,0 @@ -// SAX error handler. -// http://www.saxproject.org -// No warranty; no copyright -- use this as you will. -// $Id: ErrorHandler.java,v 1.10 2004/03/08 13:01:00 dmegginson Exp $ - -package org.xml.sax; - - -/** - * Basic interface for SAX error handlers. - * - *
    - * This module, both source code and documentation, is in the - * Public Domain, and comes with NO WARRANTY. - * See http://www.saxproject.org - * for further information. - *
    - * - *

    If a SAX application needs to implement customized error - * handling, it must implement this interface and then register an - * instance with the XML reader using the - * {@link org.xml.sax.XMLReader#setErrorHandler setErrorHandler} - * method. The parser will then report all errors and warnings - * through this interface.

    - * - *

    WARNING: If an application does not - * register an ErrorHandler, XML parsing errors will go unreported, - * except that SAXParseExceptions will be thrown for fatal errors. - * In order to detect validity errors, an ErrorHandler that does something - * with {@link #error error()} calls must be registered.

    - * - *

    For XML processing errors, a SAX driver must use this interface - * in preference to throwing an exception: it is up to the application - * to decide whether to throw an exception for different types of - * errors and warnings. Note, however, that there is no requirement that - * the parser continue to report additional errors after a call to - * {@link #fatalError fatalError}. In other words, a SAX driver class - * may throw an exception after reporting any fatalError. - * Also parsers may throw appropriate exceptions for non-XML errors. - * For example, {@link XMLReader#parse XMLReader.parse()} would throw - * an IOException for errors accessing entities or the document.

    - * - * @since SAX 1.0 - * @author David Megginson - * @version 2.0.1+ (sax2r3pre1) - * @see org.xml.sax.XMLReader#setErrorHandler - * @see org.xml.sax.SAXParseException - */ -public interface ErrorHandler { - - - /** - * Receive notification of a warning. - * - *

    SAX parsers will use this method to report conditions that - * are not errors or fatal errors as defined by the XML - * recommendation. The default behaviour is to take no - * action.

    - * - *

    The SAX parser must continue to provide normal parsing events - * after invoking this method: it should still be possible for the - * application to process the document through to the end.

    - * - *

    Filters may use this method to report other, non-XML warnings - * as well.

    - * - * @param exception The warning information encapsulated in a - * SAX parse exception. - * @exception org.xml.sax.SAXException Any SAX exception, possibly - * wrapping another exception. - * @see org.xml.sax.SAXParseException - */ - public abstract void warning (SAXParseException exception) - throws SAXException; - - - /** - * Receive notification of a recoverable error. - * - *

    This corresponds to the definition of "error" in section 1.2 - * of the W3C XML 1.0 Recommendation. For example, a validating - * parser would use this callback to report the violation of a - * validity constraint. The default behaviour is to take no - * action.

    - * - *

    The SAX parser must continue to provide normal parsing - * events after invoking this method: it should still be possible - * for the application to process the document through to the end. - * If the application cannot do so, then the parser should report - * a fatal error even if the XML recommendation does not require - * it to do so.

    - * - *

    Filters may use this method to report other, non-XML errors - * as well.

    - * - * @param exception The error information encapsulated in a - * SAX parse exception. - * @exception org.xml.sax.SAXException Any SAX exception, possibly - * wrapping another exception. - * @see org.xml.sax.SAXParseException - */ - public abstract void error (SAXParseException exception) - throws SAXException; - - - /** - * Receive notification of a non-recoverable error. - * - *

    There is an apparent contradiction between the - * documentation for this method and the documentation for {@link - * org.xml.sax.ContentHandler#endDocument}. Until this ambiguity - * is resolved in a future major release, clients should make no - * assumptions about whether endDocument() will or will not be - * invoked when the parser has reported a fatalError() or thrown - * an exception.

    - * - *

    This corresponds to the definition of "fatal error" in - * section 1.2 of the W3C XML 1.0 Recommendation. For example, a - * parser would use this callback to report the violation of a - * well-formedness constraint.

    - * - *

    The application must assume that the document is unusable - * after the parser has invoked this method, and should continue - * (if at all) only for the sake of collecting additional error - * messages: in fact, SAX parsers are free to stop reporting any - * other events once this method has been invoked.

    - * - * @param exception The error information encapsulated in a - * SAX parse exception. - * @exception org.xml.sax.SAXException Any SAX exception, possibly - * wrapping another exception. - * @see org.xml.sax.SAXParseException - */ - public abstract void fatalError (SAXParseException exception) - throws SAXException; - -} - -// end of ErrorHandler.java diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java b/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java deleted file mode 100644 index f8f3484c..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java +++ /dev/null @@ -1,136 +0,0 @@ -// SAX locator interface for document events. -// http://www.saxproject.org -// No warranty; no copyright -- use this as you will. -// $Id: Locator.java,v 1.8 2002/01/30 21:13:47 dbrownell Exp $ - -package org.xml.sax; - - -/** - * Interface for associating a SAX event with a document location. - * - *
    - * This module, both source code and documentation, is in the - * Public Domain, and comes with NO WARRANTY. - * See http://www.saxproject.org - * for further information. - *
    - * - *

    If a SAX parser provides location information to the SAX - * application, it does so by implementing this interface and then - * passing an instance to the application using the content - * handler's {@link org.xml.sax.ContentHandler#setDocumentLocator - * setDocumentLocator} method. The application can use the - * object to obtain the location of any other SAX event - * in the XML source document.

    - * - *

    Note that the results returned by the object will be valid only - * during the scope of each callback method: the application - * will receive unpredictable results if it attempts to use the - * locator at any other time, or after parsing completes.

    - * - *

    SAX parsers are not required to supply a locator, but they are - * very strongly encouraged to do so. If the parser supplies a - * locator, it must do so before reporting any other document events. - * If no locator has been set by the time the application receives - * the {@link org.xml.sax.ContentHandler#startDocument startDocument} - * event, the application should assume that a locator is not - * available.

    - * - * @since SAX 1.0 - * @author David Megginson - * @version 2.0.1 (sax2r2) - * @see org.xml.sax.ContentHandler#setDocumentLocator - */ -public interface Locator { - - - /** - * Return the public identifier for the current document event. - * - *

    The return value is the public identifier of the document - * entity or of the external parsed entity in which the markup - * triggering the event appears.

    - * - * @return A string containing the public identifier, or - * null if none is available. - * @see #getSystemId - */ - public abstract String getPublicId (); - - - /** - * Return the system identifier for the current document event. - * - *

    The return value is the system identifier of the document - * entity or of the external parsed entity in which the markup - * triggering the event appears.

    - * - *

    If the system identifier is a URL, the parser must resolve it - * fully before passing it to the application. For example, a file - * name must always be provided as a file:... URL, and other - * kinds of relative URI are also resolved against their bases.

    - * - * @return A string containing the system identifier, or null - * if none is available. - * @see #getPublicId - */ - public abstract String getSystemId (); - - - /** - * Return the line number where the current document event ends. - * Lines are delimited by line ends, which are defined in - * the XML specification. - * - *

    Warning: The return value from the method - * is intended only as an approximation for the sake of diagnostics; - * it is not intended to provide sufficient information - * to edit the character content of the original XML document. - * In some cases, these "line" numbers match what would be displayed - * as columns, and in others they may not match the source text - * due to internal entity expansion.

    - * - *

    The return value is an approximation of the line number - * in the document entity or external parsed entity where the - * markup triggering the event appears.

    - * - *

    If possible, the SAX driver should provide the line position - * of the first character after the text associated with the document - * event. The first line is line 1.

    - * - * @return The line number, or -1 if none is available. - * @see #getColumnNumber - */ - public abstract int getLineNumber (); - - - /** - * Return the column number where the current document event ends. - * This is one-based number of Java char values since - * the last line end. - * - *

    Warning: The return value from the method - * is intended only as an approximation for the sake of diagnostics; - * it is not intended to provide sufficient information - * to edit the character content of the original XML document. - * For example, when lines contain combining character sequences, wide - * characters, surrogate pairs, or bi-directional text, the value may - * not correspond to the column in a text editor's display.

    - * - *

    The return value is an approximation of the column number - * in the document entity or external parsed entity where the - * markup triggering the event appears.

    - * - *

    If possible, the SAX driver should provide the line position - * of the first character after the text associated with the document - * event. The first column in each line is column 1.

    - * - * @return The column number, or -1 if none is available. - * @see #getLineNumber - */ - public abstract int getColumnNumber (); - -} - -// end of Locator.java diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java b/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java deleted file mode 100644 index 256719ce..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java +++ /dev/null @@ -1,153 +0,0 @@ -// SAX exception class. -// http://www.saxproject.org -// No warranty; no copyright -- use this as you will. -// $Id: SAXException.java,v 1.7 2002/01/30 21:13:48 dbrownell Exp $ - -package org.xml.sax; - -/** - * Encapsulate a general SAX error or warning. - * - *
    - * This module, both source code and documentation, is in the - * Public Domain, and comes with NO WARRANTY. - * See http://www.saxproject.org - * for further information. - *
    - * - *

    This class can contain basic error or warning information from - * either the XML parser or the application: a parser writer or - * application writer can subclass it to provide additional - * functionality. SAX handlers may throw this exception or - * any exception subclassed from it.

    - * - *

    If the application needs to pass through other types of - * exceptions, it must wrap those exceptions in a SAXException - * or an exception derived from a SAXException.

    - * - *

    If the parser or application needs to include information about a - * specific location in an XML document, it should use the - * {@link org.xml.sax.SAXParseException SAXParseException} subclass.

    - * - * @since SAX 1.0 - * @author David Megginson - * @version 2.0.1 (sax2r2) - * @see org.xml.sax.SAXParseException - */ -public class SAXException extends Exception { - - - /** - * Create a new SAXException. - */ - public SAXException () - { - super(); - this.exception = null; - } - - - /** - * Create a new SAXException. - * - * @param message The error or warning message. - */ - public SAXException (String message) { - super(message); - this.exception = null; - } - - - /** - * Create a new SAXException wrapping an existing exception. - * - *

    The existing exception will be embedded in the new - * one, and its message will become the default message for - * the SAXException.

    - * - * @param e The exception to be wrapped in a SAXException. - */ - public SAXException (Exception e) - { - super(); - this.exception = e; - } - - - /** - * Create a new SAXException from an existing exception. - * - *

    The existing exception will be embedded in the new - * one, but the new exception will have its own message.

    - * - * @param message The detail message. - * @param e The exception to be wrapped in a SAXException. - */ - public SAXException (String message, Exception e) - { - super(message); - this.exception = e; - } - - - /** - * Return a detail message for this exception. - * - *

    If there is an embedded exception, and if the SAXException - * has no detail message of its own, this method will return - * the detail message from the embedded exception.

    - * - * @return The error or warning message. - */ - public String getMessage () - { - String message = super.getMessage(); - - if (message == null && exception != null) { - return exception.getMessage(); - } else { - return message; - } - } - - - /** - * Return the embedded exception, if any. - * - * @return The embedded exception, or null if there is none. - */ - public Exception getException () - { - return exception; - } - - - /** - * Override toString to pick up any embedded exception. - * - * @return A string representation of this exception. - */ - public String toString () - { - if (exception != null) { - return exception.toString(); - } else { - return super.toString(); - } - } - - - - ////////////////////////////////////////////////////////////////////// - // Internal state. - ////////////////////////////////////////////////////////////////////// - - - /** - * @serial The embedded exception if tunnelling, or null. - */ - private Exception exception; - -} - -// end of SAXException.java diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java b/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java deleted file mode 100644 index 1df5e142..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java +++ /dev/null @@ -1,269 +0,0 @@ -// SAX exception class. -// http://www.saxproject.org -// No warranty; no copyright -- use this as you will. -// $Id: SAXParseException.java,v 1.11 2004/04/21 13:05:02 dmegginson Exp $ - -package org.xml.sax; - -/** - * Encapsulate an XML parse error or warning. - * - *
    - * This module, both source code and documentation, is in the - * Public Domain, and comes with NO WARRANTY. - * See http://www.saxproject.org - * for further information. - *
    - * - *

    This exception may include information for locating the error - * in the original XML document, as if it came from a {@link Locator} - * object. Note that although the application - * will receive a SAXParseException as the argument to the handlers - * in the {@link org.xml.sax.ErrorHandler ErrorHandler} interface, - * the application is not actually required to throw the exception; - * instead, it can simply read the information in it and take a - * different action.

    - * - *

    Since this exception is a subclass of {@link org.xml.sax.SAXException - * SAXException}, it inherits the ability to wrap another exception.

    - * - * @since SAX 1.0 - * @author David Megginson - * @version 2.0.1 (sax2r2) - * @see org.xml.sax.SAXException - * @see org.xml.sax.Locator - * @see org.xml.sax.ErrorHandler - */ -public class SAXParseException extends SAXException { - - - ////////////////////////////////////////////////////////////////////// - // Constructors. - ////////////////////////////////////////////////////////////////////// - - - /** - * Create a new SAXParseException from a message and a Locator. - * - *

    This constructor is especially useful when an application is - * creating its own exception from within a {@link org.xml.sax.ContentHandler - * ContentHandler} callback.

    - * - * @param message The error or warning message. - * @param locator The locator object for the error or warning (may be - * null). - * @see org.xml.sax.Locator - */ - public SAXParseException (String message, Locator locator) { - super(message); - if (locator != null) { - init(locator.getPublicId(), locator.getSystemId(), - locator.getLineNumber(), locator.getColumnNumber()); - } else { - init(null, null, -1, -1); - } - } - - - /** - * Wrap an existing exception in a SAXParseException. - * - *

    This constructor is especially useful when an application is - * creating its own exception from within a {@link org.xml.sax.ContentHandler - * ContentHandler} callback, and needs to wrap an existing exception that is not a - * subclass of {@link org.xml.sax.SAXException SAXException}.

    - * - * @param message The error or warning message, or null to - * use the message from the embedded exception. - * @param locator The locator object for the error or warning (may be - * null). - * @param e Any exception. - * @see org.xml.sax.Locator - */ - public SAXParseException (String message, Locator locator, - Exception e) { - super(message, e); - if (locator != null) { - init(locator.getPublicId(), locator.getSystemId(), - locator.getLineNumber(), locator.getColumnNumber()); - } else { - init(null, null, -1, -1); - } - } - - - /** - * Create a new SAXParseException. - * - *

    This constructor is most useful for parser writers.

    - * - *

    All parameters except the message are as if - * they were provided by a {@link Locator}. For example, if the - * system identifier is a URL (including relative filename), the - * caller must resolve it fully before creating the exception.

    - * - * - * @param message The error or warning message. - * @param publicId The public identifier of the entity that generated - * the error or warning. - * @param systemId The system identifier of the entity that generated - * the error or warning. - * @param lineNumber The line number of the end of the text that - * caused the error or warning. - * @param columnNumber The column number of the end of the text that - * cause the error or warning. - */ - public SAXParseException (String message, String publicId, String systemId, - int lineNumber, int columnNumber) - { - super(message); - init(publicId, systemId, lineNumber, columnNumber); - } - - - /** - * Create a new SAXParseException with an embedded exception. - * - *

    This constructor is most useful for parser writers who - * need to wrap an exception that is not a subclass of - * {@link org.xml.sax.SAXException SAXException}.

    - * - *

    All parameters except the message and exception are as if - * they were provided by a {@link Locator}. For example, if the - * system identifier is a URL (including relative filename), the - * caller must resolve it fully before creating the exception.

    - * - * @param message The error or warning message, or null to use - * the message from the embedded exception. - * @param publicId The public identifier of the entity that generated - * the error or warning. - * @param systemId The system identifier of the entity that generated - * the error or warning. - * @param lineNumber The line number of the end of the text that - * caused the error or warning. - * @param columnNumber The column number of the end of the text that - * cause the error or warning. - * @param e Another exception to embed in this one. - */ - public SAXParseException (String message, String publicId, String systemId, - int lineNumber, int columnNumber, Exception e) - { - super(message, e); - init(publicId, systemId, lineNumber, columnNumber); - } - - - /** - * Internal initialization method. - * - * @param publicId The public identifier of the entity which generated the exception, - * or null. - * @param systemId The system identifier of the entity which generated the exception, - * or null. - * @param lineNumber The line number of the error, or -1. - * @param columnNumber The column number of the error, or -1. - */ - private void init (String publicId, String systemId, - int lineNumber, int columnNumber) - { - this.publicId = publicId; - this.systemId = systemId; - this.lineNumber = lineNumber; - this.columnNumber = columnNumber; - } - - - /** - * Get the public identifier of the entity where the exception occurred. - * - * @return A string containing the public identifier, or null - * if none is available. - * @see org.xml.sax.Locator#getPublicId - */ - public String getPublicId () - { - return this.publicId; - } - - - /** - * Get the system identifier of the entity where the exception occurred. - * - *

    If the system identifier is a URL, it will have been resolved - * fully.

    - * - * @return A string containing the system identifier, or null - * if none is available. - * @see org.xml.sax.Locator#getSystemId - */ - public String getSystemId () - { - return this.systemId; - } - - - /** - * The line number of the end of the text where the exception occurred. - * - *

    The first line is line 1.

    - * - * @return An integer representing the line number, or -1 - * if none is available. - * @see org.xml.sax.Locator#getLineNumber - */ - public int getLineNumber () - { - return this.lineNumber; - } - - - /** - * The column number of the end of the text where the exception occurred. - * - *

    The first column in a line is position 1.

    - * - * @return An integer representing the column number, or -1 - * if none is available. - * @see org.xml.sax.Locator#getColumnNumber - */ - public int getColumnNumber () - { - return this.columnNumber; - } - - - ////////////////////////////////////////////////////////////////////// - // Internal state. - ////////////////////////////////////////////////////////////////////// - - - /** - * @serial The public identifier, or null. - * @see #getPublicId - */ - private String publicId; - - - /** - * @serial The system identifier, or null. - * @see #getSystemId - */ - private String systemId; - - - /** - * @serial The line number, or -1. - * @see #getLineNumber - */ - private int lineNumber; - - - /** - * @serial The column number, or -1. - * @see #getColumnNumber - */ - private int columnNumber; - -} - -// end of SAXParseException.java diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html b/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html deleted file mode 100644 index dd7030e2..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html +++ /dev/null @@ -1,297 +0,0 @@ - - - - -

    This package provides the core SAX APIs. -Some SAX1 APIs are deprecated to encourage integration of -namespace-awareness into designs of new applications -and into maintenance of existing infrastructure.

    - -

    See http://www.saxproject.org -for more information about SAX.

    - - -

    SAX2 Standard Feature Flags

    - -

    One of the essential characteristics of SAX2 is that it added -feature flags which can be used to examine and perhaps modify -parser modes, in particular modes such as validation. -Since features are identified by (absolute) URIs, anyone -can define such features. -Currently defined standard feature URIs have the prefix -http://xml.org/sax/features/ before an identifier such as -validation. Turn features on or off using -setFeature. Those standard identifiers are:

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Feature IDAccessDefaultDescription
    external-general-entitiesread/writeunspecified Reports whether this parser processes external - general entities; always true if validating. -
    external-parameter-entitiesread/writeunspecified Reports whether this parser processes external - parameter entities; always true if validating. -
    is-standalone(parsing) read-only, (not parsing) nonenot applicable May be examined only during a parse, after the - startDocument() callback has been completed; read-only. - The value is true if the document specified standalone="yes" in - its XML declaration, and otherwise is false. -
    lexical-handler/parameter-entitiesread/writeunspecified A value of "true" indicates that the LexicalHandler will report - the beginning and end of parameter entities. -
    namespacesread/writetrue A value of "true" indicates namespace URIs and unprefixed local names - for element and attribute names will be available. -
    namespace-prefixesread/writefalse A value of "true" indicates that XML qualified names (with prefixes) and - attributes (including xmlns* attributes) will be available. -
    resolve-dtd-urisread/writetrue A value of "true" indicates that system IDs in declarations will - be absolutized (relative to their base URIs) before reporting. - (That is the default behavior for all SAX2 XML parsers.) - A value of "false" indicates those IDs will not be absolutized; - parsers will provide the base URI from - Locator.getSystemId(). - This applies to system IDs passed in
      -
    • DTDHandler.notationDecl(), -
    • DTDHandler.unparsedEntityDecl(), and -
    • DeclHandler.externalEntityDecl(). -
    - It does not apply to EntityResolver.resolveEntity(), - which is not used to report declarations, or to - LexicalHandler.startDTD(), which already provides - the non-absolutized URI. -
    string-interningread/writeunspecified Has a value of "true" if all XML names (for elements, prefixes, - attributes, entities, notations, and local names), - as well as Namespace URIs, will have been interned - using java.lang.String.intern. This supports fast - testing of equality/inequality against string constants, - rather than forcing slower calls to String.equals(). -
    unicode-normalization-checkingread/writefalse Controls whether the parser reports Unicode normalization - errors as described in section 2.13 and Appendix B of the - XML 1.1 Recommendation. If true, Unicode normalization - errors are reported using the ErrorHandler.error() callback. - Such errors are not fatal in themselves (though, obviously, - other Unicode-related encoding errors may be). -
    use-attributes2read-onlynot applicable Returns "true" if the Attributes objects passed by - this parser in ContentHandler.startElement() - implement the org.xml.sax.ext.Attributes2 interface. - That interface exposes additional DTD-related information, - such as whether the attribute was specified in the - source text rather than defaulted. -
    use-locator2read-onlynot applicable Returns "true" if the Locator objects passed by - this parser in ContentHandler.setDocumentLocator() - implement the org.xml.sax.ext.Locator2 interface. - That interface exposes additional entity information, - such as the character encoding and XML version used. -
    use-entity-resolver2read/writetrue Returns "true" if, when setEntityResolver is given - an object implementing the org.xml.sax.ext.EntityResolver2 interface, - those new methods will be used. - Returns "false" to indicate that those methods will not be used. -
    validationread/writeunspecified Controls whether the parser is reporting all validity - errors; if true, all external entities will be read. -
    xmlns-urisread/writefalse Controls whether, when the namespace-prefixes feature - is set, the parser treats namespace declaration attributes as - being in the http://www.w3.org/2000/xmlns/ namespace. - By default, SAX2 conforms to the original "Namespaces in XML" - Recommendation, which explicitly states that such attributes are - not in any namespace. - Setting this optional flag to "true" makes the SAX2 events conform to - a later backwards-incompatible revision of that recommendation, - placing those attributes in a namespace. -
    xml-1.1read-onlynot applicable Returns "true" if the parser supports both XML 1.1 and XML 1.0. - Returns "false" if the parser supports only XML 1.0. -
    - -

    Support for the default values of the -namespaces and namespace-prefixes -properties is required. -Support for any other feature flags is entirely optional. -

    - -

    For default values not specified by SAX2, -each XMLReader implementation specifies its default, -or may choose not to expose the feature flag. -Unless otherwise specified here, -implementations may support changing current values -of these standard feature flags, but not while parsing. -

    - -

    SAX2 Standard Handler and Property IDs

    - -

    For parser interface characteristics that are described -as objects, a separate namespace is defined. The -objects in this namespace are again identified by URI, and -the standard property URIs have the prefix -http://xml.org/sax/properties/ before an identifier such as -lexical-handler or -dom-node. Manage those properties using -setProperty(). Those identifiers are:

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Property IDDescription
    declaration-handler Used to see most DTD declarations except those treated - as lexical ("document element name is ...") or which are - mandatory for all SAX parsers (DTDHandler). - The Object must implement org.xml.sax.ext.DeclHandler. -
    document-xml-version May be examined only during a parse, after the startDocument() - callback has been completed; read-only. This property is a - literal string describing the actual XML version of the document, - such as "1.0" or "1.1". -
    dom-node For "DOM Walker" style parsers, which ignore their - parser.parse() parameters, this is used to - specify the DOM (sub)tree being walked by the parser. - The Object must implement the - org.w3c.dom.Node interface. -
    lexical-handler Used to see some syntax events that are essential in some - applications: comments, CDATA delimiters, selected general - entity inclusions, and the start and end of the DTD - (and declaration of document element name). - The Object must implement org.xml.sax.ext.LexicalHandler. -
    xml-string Readable only during a parser callback, this exposes a TBS - chunk of characters responsible for the current event.
    - -

    All of these standard properties are optional; -XMLReader implementations need not support them. -

    - - \ No newline at end of file From d459ad0a41d533243a4ca6da7ef4ea237e3b74ab Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:58:26 +0200 Subject: [PATCH 08/30] Clean up: tools --- .../validator/htmlparser/tools/HTML2HTML.java | 87 ------- .../validator/htmlparser/tools/HTML2XML.java | 86 ------- .../validator/htmlparser/tools/XML2HTML.java | 89 ------- .../validator/htmlparser/tools/XML2XML.java | 89 ------- .../htmlparser/tools/XSLT4HTML5.java | 237 ------------------ .../htmlparser/tools/XSLT4HTML5XOM.java | 162 ------------ .../htmlparser/tools/XmlnsDropper.java | 169 ------------- .../validator/htmlparser/tools/package.html | 29 --- 8 files changed, 948 deletions(-) delete mode 100644 test-src/nu/validator/htmlparser/tools/HTML2HTML.java delete mode 100644 test-src/nu/validator/htmlparser/tools/HTML2XML.java delete mode 100644 test-src/nu/validator/htmlparser/tools/XML2HTML.java delete mode 100644 test-src/nu/validator/htmlparser/tools/XML2XML.java delete mode 100644 test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java delete mode 100644 test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java delete mode 100644 test-src/nu/validator/htmlparser/tools/XmlnsDropper.java delete mode 100644 test-src/nu/validator/htmlparser/tools/package.html diff --git a/test-src/nu/validator/htmlparser/tools/HTML2HTML.java b/test-src/nu/validator/htmlparser/tools/HTML2HTML.java deleted file mode 100644 index 5e2cf1f5..00000000 --- a/test-src/nu/validator/htmlparser/tools/HTML2HTML.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.MalformedURLException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerException; - -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.sax.HtmlParser; -import nu.validator.htmlparser.sax.HtmlSerializer; -import nu.validator.htmlparser.sax.XmlSerializer; -import nu.validator.htmlparser.test.SystemErrErrorHandler; - -import org.xml.sax.ContentHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -public class HTML2HTML { - - /** - * @param args - */ - public static void main(String[] args) throws SAXException, - ParserConfigurationException, MalformedURLException, IOException, - TransformerException { - InputStream in; - OutputStream out; - - switch (args.length) { - case 0: - in = System.in; - out = System.out; - break; - case 1: - in = new FileInputStream(args[0]); - out = System.out; - break; - case 2: - in = new FileInputStream(args[0]); - out = new FileOutputStream(args[1]); - break; - default: - System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); - System.exit(1); - return; - } - - ContentHandler serializer = new HtmlSerializer(out); - - HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW); - - parser.setErrorHandler(new SystemErrErrorHandler()); - parser.setContentHandler(serializer); - parser.setProperty("http://xml.org/sax/properties/lexical-handler", - serializer); - parser.parse(new InputSource(in)); - out.flush(); - out.close(); - } -} diff --git a/test-src/nu/validator/htmlparser/tools/HTML2XML.java b/test-src/nu/validator/htmlparser/tools/HTML2XML.java deleted file mode 100644 index 57666f93..00000000 --- a/test-src/nu/validator/htmlparser/tools/HTML2XML.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.MalformedURLException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerException; - -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.sax.HtmlParser; -import nu.validator.htmlparser.sax.XmlSerializer; -import nu.validator.htmlparser.test.SystemErrErrorHandler; - -import org.xml.sax.ContentHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -public class HTML2XML { - - /** - * @param args - */ - public static void main(String[] args) throws SAXException, - ParserConfigurationException, MalformedURLException, IOException, - TransformerException { - InputStream in; - OutputStream out; - - switch (args.length) { - case 0: - in = System.in; - out = System.out; - break; - case 1: - in = new FileInputStream(args[0]); - out = System.out; - break; - case 2: - in = new FileInputStream(args[0]); - out = new FileOutputStream(args[1]); - break; - default: - System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); - System.exit(1); - return; - } - - ContentHandler serializer = new XmlSerializer(out); - - HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); - - parser.setErrorHandler(new SystemErrErrorHandler()); - parser.setContentHandler(serializer); - parser.setProperty("http://xml.org/sax/properties/lexical-handler", - serializer); - parser.parse(new InputSource(in)); - out.flush(); - out.close(); - } -} diff --git a/test-src/nu/validator/htmlparser/tools/XML2HTML.java b/test-src/nu/validator/htmlparser/tools/XML2HTML.java deleted file mode 100644 index dad89a5b..00000000 --- a/test-src/nu/validator/htmlparser/tools/XML2HTML.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.MalformedURLException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParserFactory; -import javax.xml.transform.TransformerException; - -import nu.validator.htmlparser.sax.HtmlSerializer; -import nu.validator.htmlparser.sax.XmlSerializer; -import nu.validator.htmlparser.test.SystemErrErrorHandler; - -import org.xml.sax.ContentHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.XMLReader; - -public class XML2HTML { - - /** - * @param args - */ - public static void main(String[] args) throws SAXException, - ParserConfigurationException, MalformedURLException, IOException, - TransformerException { - InputStream in; - OutputStream out; - - switch (args.length) { - case 0: - in = System.in; - out = System.out; - break; - case 1: - in = new FileInputStream(args[0]); - out = System.out; - break; - case 2: - in = new FileInputStream(args[0]); - out = new FileOutputStream(args[1]); - break; - default: - System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); - System.exit(1); - return; - } - - ContentHandler serializer = new HtmlSerializer(out); - - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - factory.setValidating(false); - XMLReader parser = factory.newSAXParser().getXMLReader(); - parser.setErrorHandler(new SystemErrErrorHandler()); - parser.setContentHandler(serializer); - parser.setProperty("http://xml.org/sax/properties/lexical-handler", - serializer); - parser.parse(new InputSource(in)); - out.flush(); - out.close(); - } -} diff --git a/test-src/nu/validator/htmlparser/tools/XML2XML.java b/test-src/nu/validator/htmlparser/tools/XML2XML.java deleted file mode 100644 index 2f6aa24d..00000000 --- a/test-src/nu/validator/htmlparser/tools/XML2XML.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.MalformedURLException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParserFactory; -import javax.xml.transform.TransformerException; - -import nu.validator.htmlparser.sax.NameCheckingXmlSerializer; -import nu.validator.htmlparser.sax.XmlSerializer; -import nu.validator.htmlparser.test.SystemErrErrorHandler; - -import org.xml.sax.ContentHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.XMLReader; - -public class XML2XML { - - /** - * @param args - */ - public static void main(String[] args) throws SAXException, - ParserConfigurationException, MalformedURLException, IOException, - TransformerException { - InputStream in; - OutputStream out; - - switch (args.length) { - case 0: - in = System.in; - out = System.out; - break; - case 1: - in = new FileInputStream(args[0]); - out = System.out; - break; - case 2: - in = new FileInputStream(args[0]); - out = new FileOutputStream(args[1]); - break; - default: - System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); - System.exit(1); - return; - } - - ContentHandler serializer = new NameCheckingXmlSerializer(out); - - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - factory.setValidating(false); - XMLReader parser = factory.newSAXParser().getXMLReader(); - parser.setErrorHandler(new SystemErrErrorHandler()); - parser.setContentHandler(serializer); - parser.setProperty("http://xml.org/sax/properties/lexical-handler", - serializer); - parser.parse(new InputSource(in)); - out.flush(); - out.close(); - } -} diff --git a/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java b/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java deleted file mode 100644 index 05d8193c..00000000 --- a/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2007 Henri Sivonen - * Copyright (c) 2007 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.net.MalformedURLException; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParserFactory; -import javax.xml.transform.Templates; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.sax.SAXResult; -import javax.xml.transform.sax.SAXTransformerFactory; -import javax.xml.transform.sax.TemplatesHandler; -import javax.xml.transform.sax.TransformerHandler; - -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.dom.HtmlDocumentBuilder; -import nu.validator.htmlparser.sax.HtmlParser; -import nu.validator.htmlparser.sax.HtmlSerializer; -import nu.validator.htmlparser.sax.XmlSerializer; -import nu.validator.htmlparser.test.SystemErrErrorHandler; - -import org.w3c.dom.Document; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; -import org.xml.sax.XMLReader; -import org.xml.sax.ext.LexicalHandler; - -public class XSLT4HTML5 { - - private enum Mode { - STREAMING_SAX, BUFFERED_SAX, DOM, - } - - private static final String TEMPLATE = "--template="; - - private static final String INPUT_HTML = "--input-html="; - - private static final String INPUT_XML = "--input-xml="; - - private static final String OUTPUT_HTML = "--output-html="; - - private static final String OUTPUT_XML = "--output-xml="; - - private static final String MODE = "--mode="; - - /** - * @param args - * @throws ParserConfigurationException - * @throws SAXException - * @throws IOException - * @throws MalformedURLException - * @throws TransformerException - */ - public static void main(String[] args) throws SAXException, - ParserConfigurationException, MalformedURLException, IOException, TransformerException { - if (args.length == 0) { - System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]"); - System.exit(0); - } - String template = null; - String input = null; - boolean inputHtml = false; - String output = null; - boolean outputHtml = false; - Mode mode = null; - for (int i = 0; i < args.length; i++) { - String arg = args[i]; - if (arg.startsWith(TEMPLATE)) { - if (template == null) { - template = arg.substring(TEMPLATE.length()); - } else { - System.err.println("Tried to set template twice."); - System.exit(1); - } - } else if (arg.startsWith(INPUT_HTML)) { - if (input == null) { - input = arg.substring(INPUT_HTML.length()); - inputHtml = true; - } else { - System.err.println("Tried to set input twice."); - System.exit(2); - } - } else if (arg.startsWith(INPUT_XML)) { - if (input == null) { - input = arg.substring(INPUT_XML.length()); - inputHtml = false; - } else { - System.err.println("Tried to set input twice."); - System.exit(2); - } - } else if (arg.startsWith(OUTPUT_HTML)) { - if (output == null) { - output = arg.substring(OUTPUT_HTML.length()); - outputHtml = true; - } else { - System.err.println("Tried to set output twice."); - System.exit(3); - } - } else if (arg.startsWith(OUTPUT_XML)) { - if (output == null) { - output = arg.substring(OUTPUT_XML.length()); - outputHtml = false; - } else { - System.err.println("Tried to set output twice."); - System.exit(3); - } - } else if (arg.startsWith(MODE)) { - if (mode == null) { - String modeStr = arg.substring(MODE.length()); - if ("dom".equals(modeStr)) { - mode = Mode.DOM; - } else if ("sax-buffered".equals(modeStr)) { - mode = Mode.BUFFERED_SAX; - } else if ("sax-streaming".equals(modeStr)) { - mode = Mode.STREAMING_SAX; - } else { - System.err.println("Unrecognized mode."); - System.exit(5); - } - } else { - System.err.println("Tried to set mode twice."); - System.exit(4); - } - } - } - - if (template == null) { - System.err.println("No template specified."); - System.exit(6); - } - if (input == null) { - System.err.println("No input specified."); - System.exit(7); - } - if (output == null) { - System.err.println("No output specified."); - System.exit(8); - } - if (mode == null) { - mode = Mode.BUFFERED_SAX; - } - - SystemErrErrorHandler errorHandler = new SystemErrErrorHandler(); - - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - factory.setValidating(false); - XMLReader reader = factory.newSAXParser().getXMLReader(); - reader.setErrorHandler(errorHandler); - - SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance(); - transformerFactory.setErrorListener(errorHandler); - TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler(); - reader.setContentHandler(templatesHandler); - reader.parse(new File(template).toURI().toASCIIString()); - - Templates templates = templatesHandler.getTemplates(); - - FileOutputStream outputStream = new FileOutputStream(output); - ContentHandler serializer; - if (outputHtml) { - serializer = new HtmlSerializer(outputStream); - } else { - serializer = new XmlSerializer(outputStream); - } - SAXResult result = new SAXResult(new XmlnsDropper(serializer)); - result.setLexicalHandler((LexicalHandler) serializer); - - if (mode == Mode.DOM) { - Document inputDoc; - DocumentBuilder builder; - if (inputHtml) { - builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET); - } else { - DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); - factory.setNamespaceAware(true); - try { - builder = builderFactory.newDocumentBuilder(); - } catch (ParserConfigurationException e) { - throw new RuntimeException(e); - } - } - inputDoc = builder.parse(new File(input)); - DOMSource inputSource = new DOMSource(inputDoc, - new File(input).toURI().toASCIIString()); - Transformer transformer = templates.newTransformer(); - transformer.setErrorListener(errorHandler); - transformer.transform(inputSource, result); - } else { - if (inputHtml) { - reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); - if (mode == Mode.STREAMING_SAX) { - reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL); - } - } - TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates); - transformerHandler.setResult(result); - reader.setErrorHandler(errorHandler); - reader.setContentHandler(transformerHandler); - reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler); - reader.parse(new File(input).toURI().toASCIIString()); - } - outputStream.flush(); - outputStream.close(); - } - -} diff --git a/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java b/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java deleted file mode 100644 index b364cc52..00000000 --- a/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2007 Henri Sivonen - * Copyright (c) 2007 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; - -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.sax.HtmlSerializer; -import nu.validator.htmlparser.xom.HtmlBuilder; -import nu.xom.Builder; -import nu.xom.Document; -import nu.xom.Element; -import nu.xom.Nodes; -import nu.xom.ParsingException; -import nu.xom.Serializer; -import nu.xom.ValidityException; -import nu.xom.converters.SAXConverter; -import nu.xom.xslt.XSLException; -import nu.xom.xslt.XSLTransform; - -import org.xml.sax.SAXException; - -public class XSLT4HTML5XOM { - - private static final String TEMPLATE = "--template="; - - private static final String INPUT_HTML = "--input-html="; - - private static final String INPUT_XML = "--input-xml="; - - private static final String OUTPUT_HTML = "--output-html="; - - private static final String OUTPUT_XML = "--output-xml="; - - /** - * @param args - * @throws IOException - * @throws ParsingException - * @throws ValidityException - * @throws XSLException - * @throws SAXException - */ - public static void main(String[] args) throws ValidityException, - ParsingException, IOException, XSLException, SAXException { - if (args.length == 0) { - System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]"); - System.exit(0); - } - String template = null; - String input = null; - boolean inputHtml = false; - String output = null; - boolean outputHtml = false; - for (int i = 0; i < args.length; i++) { - String arg = args[i]; - if (arg.startsWith(TEMPLATE)) { - if (template == null) { - template = arg.substring(TEMPLATE.length()); - } else { - System.err.println("Tried to set template twice."); - System.exit(1); - } - } else if (arg.startsWith(INPUT_HTML)) { - if (input == null) { - input = arg.substring(INPUT_HTML.length()); - inputHtml = true; - } else { - System.err.println("Tried to set input twice."); - System.exit(2); - } - } else if (arg.startsWith(INPUT_XML)) { - if (input == null) { - input = arg.substring(INPUT_XML.length()); - inputHtml = false; - } else { - System.err.println("Tried to set input twice."); - System.exit(2); - } - } else if (arg.startsWith(OUTPUT_HTML)) { - if (output == null) { - output = arg.substring(OUTPUT_HTML.length()); - outputHtml = true; - } else { - System.err.println("Tried to set output twice."); - System.exit(3); - } - } else if (arg.startsWith(OUTPUT_XML)) { - if (output == null) { - output = arg.substring(OUTPUT_XML.length()); - outputHtml = false; - } else { - System.err.println("Tried to set output twice."); - System.exit(3); - } - } - } - - if (template == null) { - System.err.println("No template specified."); - System.exit(6); - } - if (input == null) { - System.err.println("No input specified."); - System.exit(7); - } - if (output == null) { - System.err.println("No output specified."); - System.exit(8); - } - - Builder builder = new Builder(); - - Document transformationDoc = builder.build(new File(template)); - - XSLTransform transform = new XSLTransform(transformationDoc); - - FileOutputStream outputStream = new FileOutputStream(output); - - Document inputDoc; - if (inputHtml) { - builder = new HtmlBuilder(XmlViolationPolicy.ALTER_INFOSET); - } - inputDoc = builder.build(new File(input)); - Nodes result = transform.transform(inputDoc); - Document outputDoc = new Document((Element) result.get(0)); - if (outputHtml) { - HtmlSerializer htmlSerializer = new HtmlSerializer(outputStream); - SAXConverter converter = new SAXConverter(htmlSerializer); - converter.setLexicalHandler(htmlSerializer); - converter.convert(outputDoc); - } else { - Serializer serializer = new Serializer(outputStream); - serializer.write(outputDoc); - } - outputStream.flush(); - outputStream.close(); - } - -} diff --git a/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java b/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java deleted file mode 100644 index 0e6d4b1c..00000000 --- a/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2007 Henri Sivonen - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import org.xml.sax.Attributes; -import org.xml.sax.ContentHandler; -import org.xml.sax.Locator; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.AttributesImpl; - -/** - * Quick and dirty hack to work around Xalan xmlns weirdness. - * - * @version $Id$ - * @author hsivonen - */ -class XmlnsDropper implements ContentHandler { - - private final ContentHandler delegate; - - /** - * @param delegate - */ - public XmlnsDropper(final ContentHandler delegate) { - this.delegate = delegate; - } - - /** - * @param ch - * @param start - * @param length - * @throws SAXException - * @see org.xml.sax.ContentHandler#characters(char[], int, int) - */ - public void characters(char[] ch, int start, int length) throws SAXException { - delegate.characters(ch, start, length); - } - - /** - * @throws SAXException - * @see org.xml.sax.ContentHandler#endDocument() - */ - public void endDocument() throws SAXException { - delegate.endDocument(); - } - - /** - * @param uri - * @param localName - * @param qName - * @throws SAXException - * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String) - */ - public void endElement(String uri, String localName, String qName) throws SAXException { - delegate.endElement(uri, localName, qName); - } - - /** - * @param prefix - * @throws SAXException - * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String) - */ - public void endPrefixMapping(String prefix) throws SAXException { - delegate.endPrefixMapping(prefix); - } - - /** - * @param ch - * @param start - * @param length - * @throws SAXException - * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int) - */ - public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { - delegate.ignorableWhitespace(ch, start, length); - } - - /** - * @param target - * @param data - * @throws SAXException - * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String) - */ - public void processingInstruction(String target, String data) throws SAXException { - delegate.processingInstruction(target, data); - } - - /** - * @param locator - * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator) - */ - public void setDocumentLocator(Locator locator) { - delegate.setDocumentLocator(locator); - } - - /** - * @param name - * @throws SAXException - * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String) - */ - public void skippedEntity(String name) throws SAXException { - delegate.skippedEntity(name); - } - - /** - * @throws SAXException - * @see org.xml.sax.ContentHandler#startDocument() - */ - public void startDocument() throws SAXException { - delegate.startDocument(); - } - - /** - * @param uri - * @param localName - * @param qName - * @param atts - * @throws SAXException - * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) - */ - public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { - AttributesImpl ai = new AttributesImpl(); - for (int i = 0; i < atts.getLength(); i++) { - String u = atts.getURI(i); - String t = atts.getType(i); - String v = atts.getValue(i); - String n = atts.getLocalName(i); - String q = atts.getQName(i); - if (q != null) { - if ("xmlns".equals(q) || q.startsWith("xmlns:")) { - continue; - } - } - ai.addAttribute(u, n, q, t, v); - } - delegate.startElement(uri, localName, qName, ai); - } - - /** - * @param prefix - * @param uri - * @throws SAXException - * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String) - */ - public void startPrefixMapping(String prefix, String uri) throws SAXException { - delegate.startPrefixMapping(prefix, uri); - } - -} diff --git a/test-src/nu/validator/htmlparser/tools/package.html b/test-src/nu/validator/htmlparser/tools/package.html deleted file mode 100644 index a04bf3cd..00000000 --- a/test-src/nu/validator/htmlparser/tools/package.html +++ /dev/null @@ -1,29 +0,0 @@ - - -Package Overview - - - -

    Demo apps.

    - - \ No newline at end of file From 429bfe2fb5d75cd8271cb618f5b3394b2133559e Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:58:29 +0200 Subject: [PATCH 09/30] Clean up: cpptranslate et al. --- .../cpptranslate/AnnotationHelperVisitor.java | 159 -- .../cpptranslate/CppOnlyInputStream.java | 70 - .../htmlparser/cpptranslate/CppTypes.java | 493 ---- .../htmlparser/cpptranslate/CppVisitor.java | 2446 ----------------- .../htmlparser/cpptranslate/GkAtomParser.java | 72 - .../htmlparser/cpptranslate/HVisitor.java | 291 -- .../htmlparser/cpptranslate/LabelVisitor.java | 84 - .../cpptranslate/LicenseExtractor.java | 75 - .../cpptranslate/LocalSymbolTable.java | 89 - .../htmlparser/cpptranslate/Main.java | 145 - .../cpptranslate/NoCppInputStream.java | 86 - .../cpptranslate/StringLiteralParser.java | 70 - .../htmlparser/cpptranslate/StringPair.java | 73 - .../htmlparser/cpptranslate/SymbolTable.java | 93 - .../cpptranslate/SymbolTableVisitor.java | 71 - .../cpptranslate/TranslatorUtils.java | 81 - .../htmlparser/cpptranslate/Type.java | 99 - .../generator/GenerateNamedCharactersCpp.java | 579 ---- 18 files changed, 5076 deletions(-) delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java delete mode 100755 translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/Main.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java delete mode 100644 translator-src/nu/validator/htmlparser/cpptranslate/Type.java delete mode 100644 translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java b/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java deleted file mode 100644 index 98a0226e..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java +++ /dev/null @@ -1,159 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.util.List; - -import japa.parser.ast.expr.AnnotationExpr; -import japa.parser.ast.expr.MarkerAnnotationExpr; -import japa.parser.ast.type.ReferenceType; -import japa.parser.ast.visitor.VoidVisitorAdapter; - -public class AnnotationHelperVisitor extends VoidVisitorAdapter { - - protected List currentAnnotations; - - protected boolean nsUri() { - return hasAnnotation("NsUri"); - } - - protected boolean prefix() { - return hasAnnotation("Prefix"); - } - - protected boolean local() { - return hasAnnotation("Local"); - } - - protected boolean literal() { - return hasAnnotation("Literal"); - } - - protected boolean inline() { - return hasAnnotation("Inline"); - } - - protected boolean noLength() { - return hasAnnotation("NoLength"); - } - - protected boolean unsigned() { - return hasAnnotation("Unsigned"); - } - - protected boolean auto() { - return hasAnnotation("Auto"); - } - - protected boolean virtual() { - return hasAnnotation("Virtual"); - } - - protected boolean override() { - return hasAnnotation("Override"); - } - - protected boolean isConst() { - return hasAnnotation("Const"); - } - - protected boolean characterName() { - return hasAnnotation("CharacterName"); - } - - protected boolean creator() { - return hasAnnotation("Creator"); - } - - protected boolean htmlCreator() { - return hasAnnotation("HtmlCreator"); - } - - protected boolean svgCreator() { - return hasAnnotation("SvgCreator"); - } - - private boolean hasAnnotation(String anno) { - if (currentAnnotations == null) { - return false; - } - for (AnnotationExpr ann : currentAnnotations) { - if (ann instanceof MarkerAnnotationExpr) { - MarkerAnnotationExpr marker = (MarkerAnnotationExpr) ann; - if (marker.getName().getName().equals(anno)) { - return true; - } - } - } - return false; - } - - protected Type convertType(japa.parser.ast.type.Type type, int modifiers) { - if (type instanceof ReferenceType) { - ReferenceType referenceType = (ReferenceType) type; - return new Type(convertTypeName(referenceType.getType().toString()), referenceType.getArrayCount(), noLength(), modifiers); - } else { - return new Type(convertTypeName(type.toString()), 0, false, modifiers); - } - } - - private String convertTypeName(String name) { - if ("String".equals(name)) { - if (local()) { - return "@Local"; - } - if (nsUri()) { - return "@NsUri"; - } - if (prefix()) { - return "@Prefix"; - } - if (literal()) { - return "@Literal"; - } - if (auto()) { - return "@Auto"; - } - if (characterName()) { - return "@CharacterName"; - } - } - return name; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java b/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java deleted file mode 100644 index 587b8160..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java +++ /dev/null @@ -1,70 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2010 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.BufferedInputStream; -import java.io.IOException; -import java.io.InputStream; - -public class CppOnlyInputStream extends InputStream { - - private static final String DROP = "// CPPONLY:"; - - private final InputStream delegate; - - public CppOnlyInputStream(InputStream delegate) { - this.delegate = new BufferedInputStream(delegate); - } - - @Override public int read() throws IOException { - int c = delegate.read(); - if (c == DROP.charAt(0)) { - delegate.mark(DROP.length()); - for (int i = 1; i < DROP.length(); ++i) { - int d = delegate.read(); - if (d != DROP.charAt(i)) { - delegate.reset(); - return c; - } - } - return delegate.read(); - } - return c; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java b/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java deleted file mode 100644 index d75f8fe0..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java +++ /dev/null @@ -1,493 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008-2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class CppTypes { - - /* Please note we aren't looking for the following Atom definitions: - PseudoElementAtom or NonInheritingAnonBoxAtom or InheritingAnonBoxAtom */ - private static final Pattern ATOM_DEF = Pattern.compile("^\\s*Atom\\(\"([^,]+)\",\\s*\"([^\"]*)\"\\).*$"); - - private static Set reservedWords = new HashSet(); - - static { - reservedWords.add("small"); - reservedWords.add("for"); - reservedWords.add("false"); - reservedWords.add("true"); - reservedWords.add("default"); - reservedWords.add("class"); - reservedWords.add("switch"); - reservedWords.add("union"); - reservedWords.add("template"); - reservedWords.add("int"); - reservedWords.add("char"); - reservedWords.add("operator"); - reservedWords.add("or"); - reservedWords.add("and"); - reservedWords.add("not"); - reservedWords.add("xor"); - reservedWords.add("unicode"); - } - - private static final String[] TREE_BUILDER_INCLUDES = { "nsContentUtils", "nsAtom", "nsHtml5AtomTable", - "nsHtml5String", "nsNameSpaceManager", "nsIContent", "nsTraceRefcnt", "jArray", "nsHtml5DocumentMode", - "nsHtml5ArrayCopy", "nsHtml5Parser", "nsGkAtoms", "nsHtml5TreeOperation", "nsHtml5StateSnapshot", - "nsHtml5StackNode", "nsHtml5TreeOpExecutor", "nsHtml5StreamParser", "nsAHtml5TreeBuilderState", - "nsHtml5Highlighter", "nsHtml5PlainTextUtils", "nsHtml5ViewSourceUtils", "mozilla/ImportScanner", - "mozilla/Likely", "nsIContentHandle", "nsHtml5OplessBuilder", }; - - private static final String[] TOKENIZER_INCLUDES = { "nsAtom", - "nsHtml5AtomTable", "nsHtml5String", "nsIContent", "nsTraceRefcnt", - "jArray", "nsHtml5DocumentMode", "nsHtml5ArrayCopy", - "nsHtml5NamedCharacters", "nsHtml5NamedCharactersAccel", - "nsGkAtoms", "nsAHtml5TreeBuilderState", "nsHtml5Macros", - "nsHtml5Highlighter", "nsHtml5TokenizerLoopPolicies" }; - - private static final String[] INCLUDES = { "nsAtom", "nsHtml5AtomTable", - "nsHtml5String", "nsNameSpaceManager", "nsIContent", - "nsTraceRefcnt", "jArray", "nsHtml5ArrayCopy", - "nsAHtml5TreeBuilderState", "nsGkAtoms", "nsHtml5ByteReadable", - "nsHtml5Macros", "nsIContentHandle", "nsHtml5Portability", - "nsHtml5ContentCreatorFunction"}; - - private static final String[] OTHER_DECLATIONS = {}; - - private static final String[] TREE_BUILDER_OTHER_DECLATIONS = {}; - - private static final String[] NAMED_CHARACTERS_INCLUDES = { "jArray", - "nscore", "nsDebug", "mozilla/Logging", "nsMemory" }; - - private static final String[] FORWARD_DECLARATIONS = { "nsHtml5StreamParser" }; - - private static final String[] CLASSES_THAT_NEED_SUPPLEMENT = { - "MetaScanner", "Tokenizer", "TreeBuilder", "UTF16Buffer", }; - - private static final String[] STATE_LOOP_POLICIES = { - "nsHtml5ViewSourcePolicy", "nsHtml5SilentPolicy" }; - - private final Map atomMap = new HashMap(); - - private final Writer atomWriter; - - public CppTypes(File atomList, File generatedAtomFile) { - if (atomList == null) { - atomWriter = null; - } else { - try { - ingestAtoms(atomList); - atomWriter = new OutputStreamWriter(new FileOutputStream( - generatedAtomFile), "utf-8"); - this.start(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } - - private void ingestAtoms(File atomList) throws IOException { - // This doesn't need to be efficient, so let's make it easy to write. - BufferedReader atomReader = new BufferedReader( - new InputStreamReader(new FileInputStream(atomList), "utf-8")); - try { - String line; - boolean startedParsing = false; - while ((line = atomReader.readLine()) != null) { - // only start parsing lines after this comment - if (line.trim().startsWith("# START ATOMS")) { - startedParsing = true; - } else if (!startedParsing) { - continue; - } - // stop parsing lines after this comment - if (line.trim().startsWith("# END ATOMS")) { - return; - } - if (!line.trim().startsWith("Atom")) { - continue; - } - Matcher m = ATOM_DEF.matcher(line); - if (!m.matches()) { - throw new RuntimeException("Malformed atom definition: " + line); - } - atomMap.put(m.group(2), m.group(1)); - } - throw new RuntimeException( - "Atom list did not have a marker for generated section."); - } finally { - atomReader.close(); - } - } - - public void start() { - try { - - if (atomWriter != null) { - atomWriter.write("# THIS FILE IS GENERATED BY THE HTML PARSER TRANSLATOR AND WILL BE OVERWRITTEN!\n"); - atomWriter.write("from Atom import Atom\n\n"); - atomWriter.write("HTML_PARSER_ATOMS = [\n"); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public void finished() { - try { - if (atomWriter != null) { - atomWriter.write("]\n"); - atomWriter.flush(); - atomWriter.close(); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public String classPrefix() { - return "nsHtml5"; - } - - public String booleanType() { - return "bool"; - } - - public String byteType() { - return "int8_t"; - } - - public String charType() { - return "char16_t"; - } - - /** - * Only used for named characters. - * - * @return - */ - public String unsignedShortType() { - return "uint16_t"; - } - - public String intType() { - return "int32_t"; - } - - public String unsignedIntType() { - return "uint32_t"; - } - - public String stringType() { - return "nsHtml5String"; - } - - public String weakLocalType() { - return "nsAtom*"; - } - - public String localType() { - return "RefPtr"; - } - - public String prefixType() { - return "nsStaticAtom*"; - } - - public String nsUriType() { - return "int32_t"; - } - - public String falseLiteral() { - return "false"; - } - - public String trueLiteral() { - return "true"; - } - - public String nullLiteral() { - return "nullptr"; - } - - public String encodingDeclarationHandlerType() { - return "nsHtml5StreamParser*"; - } - - public String nodeType() { - return "nsIContentHandle*"; - } - - public String htmlCreatorType() { - return "mozilla::dom::HTMLContentCreatorFunction"; - } - - public String svgCreatorType() { - return "mozilla::dom::SVGContentCreatorFunction"; - } - - public String creatorType() { - return "nsHtml5ContentCreatorFunction"; - } - - public String xhtmlNamespaceLiteral() { - return "kNameSpaceID_XHTML"; - } - - public String svgNamespaceLiteral() { - return "kNameSpaceID_SVG"; - } - - public String xmlnsNamespaceLiteral() { - return "kNameSpaceID_XMLNS"; - } - - public String xmlNamespaceLiteral() { - return "kNameSpaceID_XML"; - } - - public String noNamespaceLiteral() { - return "kNameSpaceID_None"; - } - - public String xlinkNamespaceLiteral() { - return "kNameSpaceID_XLink"; - } - - public String mathmlNamespaceLiteral() { - return "kNameSpaceID_MathML"; - } - - public String arrayTemplate() { - return "jArray"; - } - - public String autoArrayTemplate() { - return "autoJArray"; - } - - public String localForLiteral(String literal) { - String atom = atomMap.get(literal); - if (atom == null) { - atom = createAtomName(literal); - atomMap.put(literal, atom); - if (atomWriter != null) { - try { - atomWriter.write(" # ATOM GENERATED BY HTML PARSER TRANSLATOR (WILL BE AUTOMATICALLY OVERWRITTEN):\n Atom(\"" + atom + "\", \"" + literal - + "\"),\n"); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } - return "nsGkAtoms::" + atom; - } - - private String createAtomName(String literal) { - String candidate = literal.replaceAll("[^a-zA-Z0-9_]", "_"); - if ("".equals(candidate)) { - candidate = "emptystring"; - } - while (atomMap.values().contains(candidate) - || reservedWords.contains(candidate)) { - candidate = candidate + '_'; - } - return candidate; - } - - public String stringForLiteral(String literal) { - return '"' + literal + '"'; - } - - public String staticArrayTemplate() { - return "staticJArray"; - } - - public String newArrayCreator() { - return "newJArray"; - } - - public String[] boilerplateIncludes(String javaClass) { - if ("TreeBuilder".equals(javaClass)) { - return TREE_BUILDER_INCLUDES; - } else if ("Tokenizer".equals(javaClass)) { - return TOKENIZER_INCLUDES; - } else { - return INCLUDES; - } - } - - public String[] boilerplateDeclarations(String javaClass) { - if ("TreeBuilder".equals(javaClass)) { - return TREE_BUILDER_OTHER_DECLATIONS; - } else { - return OTHER_DECLATIONS; - } - } - - public String[] namedCharactersIncludes() { - return NAMED_CHARACTERS_INCLUDES; - } - - public String[] boilerplateForwardDeclarations() { - return FORWARD_DECLARATIONS; - } - - public String documentModeHandlerType() { - return "nsHtml5TreeBuilder*"; - } - - public String documentModeType() { - return "nsHtml5DocumentMode"; - } - - public String arrayCopy() { - return "nsHtml5ArrayCopy::arraycopy"; - } - - public String maxInteger() { - return "INT32_MAX"; - } - - public String constructorBoilerplate(String className) { - return "MOZ_COUNT_CTOR(" + className + ");"; - } - - public String destructorBoilerplate(String className) { - return "MOZ_COUNT_DTOR(" + className + ");"; - } - - public String literalType() { - return "const char*"; - } - - public boolean hasSupplement(String javaClass) { - return Arrays.binarySearch(CLASSES_THAT_NEED_SUPPLEMENT, javaClass) > -1; - } - - public String internerType() { - return "nsHtml5AtomTable*"; - } - - public String treeBuilderStateInterface() { - return "nsAHtml5TreeBuilderState"; - } - - public String treeBuilderStateType() { - return "nsAHtml5TreeBuilderState*"; - } - - public String arrayLengthMacro() { - return "MOZ_ARRAY_LENGTH"; - } - - public String staticAssert() { - return "static_assert"; - } - - public String continueMacro() { - return "NS_HTML5_CONTINUE"; - } - - public String breakMacro() { - return "NS_HTML5_BREAK"; - } - - public String characterNameType() { - return "nsHtml5CharacterName&"; - } - - public String characterNameTypeDeclaration() { - return "nsHtml5CharacterName"; - } - - public String transition() { - return "P::transition"; - } - - public String tokenizerErrorCondition() { - return "P::reportErrors"; - } - - public String firstTransitionArg() { - return "mViewSource.get()"; - } - - public String errorHandler() { - return this.unlikely() + "(mViewSource)"; - } - - public String unlikely() { - return "MOZ_UNLIKELY"; - } - - public String completedCharacterReference() { - return "P::completedNamedCharacterReference(mViewSource.get())"; - } - - public String[] stateLoopPolicies() { - return STATE_LOOP_POLICIES; - } - - public String assertionMacro() { - return "MOZ_ASSERT"; - } - - public String releaseAssertionMacro() { - return "MOZ_RELEASE_ASSERT"; - } - - public String crashMacro() { - return "MOZ_CRASH"; - } -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java b/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java deleted file mode 100755 index e832e3bb..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java +++ /dev/null @@ -1,2446 +0,0 @@ -/* - * Copyright (C) 2007 Júlio Vilmar Gesser. - * Copyright (C) 2008 Mozilla Foundation - * - * This file is part of HTML Parser C++ Translator. It was derived from DumpVisitor - * which was part of Java 1.5 parser and Abstract Syntax Tree and came with the following notice: - * - * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Java 1.5 parser and Abstract Syntax Tree. If not, see . - */ -/* - * Created on 05/10/2006 - */ -package nu.validator.htmlparser.cpptranslate; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; - -import japa.parser.ast.BlockComment; -import japa.parser.ast.CompilationUnit; -import japa.parser.ast.ImportDeclaration; -import japa.parser.ast.LineComment; -import japa.parser.ast.Node; -import japa.parser.ast.PackageDeclaration; -import japa.parser.ast.TypeParameter; -import japa.parser.ast.body.AnnotationDeclaration; -import japa.parser.ast.body.AnnotationMemberDeclaration; -import japa.parser.ast.body.BodyDeclaration; -import japa.parser.ast.body.ClassOrInterfaceDeclaration; -import japa.parser.ast.body.ConstructorDeclaration; -import japa.parser.ast.body.EmptyMemberDeclaration; -import japa.parser.ast.body.EmptyTypeDeclaration; -import japa.parser.ast.body.EnumConstantDeclaration; -import japa.parser.ast.body.EnumDeclaration; -import japa.parser.ast.body.FieldDeclaration; -import japa.parser.ast.body.InitializerDeclaration; -import japa.parser.ast.body.JavadocComment; -import japa.parser.ast.body.MethodDeclaration; -import japa.parser.ast.body.ModifierSet; -import japa.parser.ast.body.Parameter; -import japa.parser.ast.body.TypeDeclaration; -import japa.parser.ast.body.VariableDeclarator; -import japa.parser.ast.body.VariableDeclaratorId; -import japa.parser.ast.expr.ArrayAccessExpr; -import japa.parser.ast.expr.ArrayCreationExpr; -import japa.parser.ast.expr.ArrayInitializerExpr; -import japa.parser.ast.expr.AssignExpr; -import japa.parser.ast.expr.BinaryExpr; -import japa.parser.ast.expr.BooleanLiteralExpr; -import japa.parser.ast.expr.CastExpr; -import japa.parser.ast.expr.CharLiteralExpr; -import japa.parser.ast.expr.ClassExpr; -import japa.parser.ast.expr.ConditionalExpr; -import japa.parser.ast.expr.DoubleLiteralExpr; -import japa.parser.ast.expr.EnclosedExpr; -import japa.parser.ast.expr.Expression; -import japa.parser.ast.expr.FieldAccessExpr; -import japa.parser.ast.expr.InstanceOfExpr; -import japa.parser.ast.expr.IntegerLiteralExpr; -import japa.parser.ast.expr.IntegerLiteralMinValueExpr; -import japa.parser.ast.expr.LongLiteralExpr; -import japa.parser.ast.expr.LongLiteralMinValueExpr; -import japa.parser.ast.expr.MarkerAnnotationExpr; -import japa.parser.ast.expr.MemberValuePair; -import japa.parser.ast.expr.MethodCallExpr; -import japa.parser.ast.expr.NameExpr; -import japa.parser.ast.expr.NormalAnnotationExpr; -import japa.parser.ast.expr.NullLiteralExpr; -import japa.parser.ast.expr.ObjectCreationExpr; -import japa.parser.ast.expr.QualifiedNameExpr; -import japa.parser.ast.expr.SingleMemberAnnotationExpr; -import japa.parser.ast.expr.StringLiteralExpr; -import japa.parser.ast.expr.SuperExpr; -import japa.parser.ast.expr.ThisExpr; -import japa.parser.ast.expr.UnaryExpr; -import japa.parser.ast.expr.VariableDeclarationExpr; -import japa.parser.ast.stmt.AssertStmt; -import japa.parser.ast.stmt.BlockStmt; -import japa.parser.ast.stmt.BreakStmt; -import japa.parser.ast.stmt.CatchClause; -import japa.parser.ast.stmt.ContinueStmt; -import japa.parser.ast.stmt.DoStmt; -import japa.parser.ast.stmt.EmptyStmt; -import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt; -import japa.parser.ast.stmt.ExpressionStmt; -import japa.parser.ast.stmt.ForStmt; -import japa.parser.ast.stmt.ForeachStmt; -import japa.parser.ast.stmt.IfStmt; -import japa.parser.ast.stmt.LabeledStmt; -import japa.parser.ast.stmt.ReturnStmt; -import japa.parser.ast.stmt.Statement; -import japa.parser.ast.stmt.SwitchEntryStmt; -import japa.parser.ast.stmt.SwitchStmt; -import japa.parser.ast.stmt.SynchronizedStmt; -import japa.parser.ast.stmt.ThrowStmt; -import japa.parser.ast.stmt.TryStmt; -import japa.parser.ast.stmt.TypeDeclarationStmt; -import japa.parser.ast.stmt.WhileStmt; -import japa.parser.ast.type.ClassOrInterfaceType; -import japa.parser.ast.type.PrimitiveType; -import japa.parser.ast.type.ReferenceType; -import japa.parser.ast.type.Type; -import japa.parser.ast.type.VoidType; -import japa.parser.ast.type.WildcardType; - -/** - * @author Julio Vilmar Gesser - * @author Henri Sivonen - */ - -public class CppVisitor extends AnnotationHelperVisitor { - - private static final String[] CLASS_NAMES = { "AttributeName", - "ElementName", "HtmlAttributes", "LocatorImpl", "MetaScanner", - "NamedCharacters", "NamedCharactersAccel", "Portability", - "StackNode", "Tokenizer", "TreeBuilder", "UTF16Buffer" }; - - private static final String[] METHODS_WITH_UNLIKELY_CONDITIONS = { - "appendStrBuf" }; - - public class SourcePrinter { - - private int level = 0; - - private boolean indented = false; - - private final StringBuilder buf = new StringBuilder(); - - public void indent() { - level++; - } - - public void unindent() { - level--; - } - - private void makeIndent() { - for (int i = 0; i < level; i++) { - buf.append(" "); - } - } - - public void printWithoutIndent(String arg) { - indented = false; - buf.append(arg); - } - - public void print(String arg) { - if (!indented) { - makeIndent(); - indented = true; - } - buf.append(arg); - } - - public void printLn(String arg) { - print(arg); - printLn(); - } - - public void printLn() { - buf.append("\n"); - indented = false; - } - - public String getSource() { - return buf.toString(); - } - - @Override public String toString() { - return getSource(); - } - } - - private boolean supportErrorReporting = true; - - protected SourcePrinter printer = new SourcePrinter(); - - private SourcePrinter staticInitializerPrinter = new SourcePrinter(); - - private SourcePrinter tempPrinterHolder; - - protected final CppTypes cppTypes; - - protected String className = ""; - - protected int currentArrayCount; - - protected Set forLoopsWithCondition = new HashSet(); - - protected boolean inPrimitiveNoLengthFieldDeclarator = false; - - protected boolean inField = false; - - protected boolean inArray = false; - - protected final SymbolTable symbolTable; - - protected String definePrefix; - - protected String javaClassName; - - protected boolean suppressPointer = false; - - private final List staticReleases = new LinkedList(); - - private boolean inConstructorBody = false; - - private String currentMethod = null; - - private Set labels = null; - - private boolean destructor; - - protected boolean inStatic = false; - - private boolean reportTransitions = false; - - private int stateLoopCallCount = 0; - - /** - * @param cppTypes - */ - public CppVisitor(CppTypes cppTypes, SymbolTable symbolTable) { - this.cppTypes = cppTypes; - this.symbolTable = symbolTable; - staticInitializerPrinter.indent(); - } - - public String getSource() { - return printer.getSource(); - } - - private String classNameFromExpression(Expression e) { - if (e instanceof NameExpr) { - NameExpr nameExpr = (NameExpr) e; - String name = nameExpr.getName(); - if (Arrays.binarySearch(CLASS_NAMES, name) > -1) { - return name; - } - } - return null; - } - - protected void printModifiers(int modifiers) { - } - - private void printMembers(List members, - LocalSymbolTable arg) { - for (BodyDeclaration member : members) { - if ("Tokenizer".equals(javaClassName) - && member instanceof MethodDeclaration - && "stateLoop".equals(((MethodDeclaration) member).getName())) { - reportTransitions = true; - } - member.accept(this, arg); - reportTransitions = false; - } - } - - private void printTypeArgs(List args, LocalSymbolTable arg) { - // if (args != null) { - // printer.print("<"); - // for (Iterator i = args.iterator(); i.hasNext();) { - // Type t = i.next(); - // t.accept(this, arg); - // if (i.hasNext()) { - // printer.print(", "); - // } - // } - // printer.print(">"); - // } - } - - private void printTypeParameters(List args, - LocalSymbolTable arg) { - // if (args != null) { - // printer.print("<"); - // for (Iterator i = args.iterator(); i.hasNext();) { - // TypeParameter t = i.next(); - // t.accept(this, arg); - // if (i.hasNext()) { - // printer.print(", "); - // } - // } - // printer.print(">"); - // } - } - - public void visit(Node n, LocalSymbolTable arg) { - throw new IllegalStateException(n.getClass().getName()); - } - - public void visit(CompilationUnit n, LocalSymbolTable arg) { - if (n.getTypes() != null) { - for (Iterator i = n.getTypes().iterator(); i.hasNext();) { - i.next().accept(this, arg); - printer.printLn(); - if (i.hasNext()) { - printer.printLn(); - } - } - } - } - - public void visit(PackageDeclaration n, LocalSymbolTable arg) { - throw new IllegalStateException(n.getClass().getName()); - } - - public void visit(NameExpr n, LocalSymbolTable arg) { - if ("mappingLangToXmlLang".equals(n.getName())) { - printer.print("0"); - } else if ("LANG_NS".equals(n.getName())) { - printer.print("ALL_NO_NS"); - } else if ("LANG_PREFIX".equals(n.getName())) { - printer.print("ALL_NO_PREFIX"); - } else if ("HTML_LOCAL".equals(n.getName())) { - printer.print(cppTypes.localForLiteral("html")); - } else if ("documentModeHandler".equals(n.getName())) { - printer.print("this"); - } else if ("errorHandler".equals(n.getName())) { - printer.print(cppTypes.errorHandler()); - } else if ("MOZ_FALLTHROUGH".equals(n.getName())) { - printer.print("[[fallthrough]]"); - } else { - printer.print(n.getName()); - } - } - - public void visit(QualifiedNameExpr n, LocalSymbolTable arg) { - n.getQualifier().accept(this, arg); - printer.print("."); - printer.print(n.getName()); - } - - public void visit(ImportDeclaration n, LocalSymbolTable arg) { - throw new IllegalStateException(n.getClass().getName()); - } - - public void visit(ClassOrInterfaceDeclaration n, LocalSymbolTable arg) { - javaClassName = n.getName(); - className = cppTypes.classPrefix() + javaClassName; - definePrefix = makeDefinePrefix(className); - - startClassDeclaration(); - - if (n.getMembers() != null) { - printMembers(n.getMembers(), arg); - } - - endClassDeclaration(); - } - - private String makeDefinePrefix(String name) { - StringBuilder sb = new StringBuilder(); - boolean prevWasLowerCase = true; - for (int i = 0; i < name.length(); i++) { - char c = name.charAt(i); - if (c >= 'a' && c <= 'z') { - sb.append((char) (c - 0x20)); - prevWasLowerCase = true; - } else if (c >= 'A' && c <= 'Z') { - if (prevWasLowerCase) { - sb.append('_'); - } - sb.append(c); - prevWasLowerCase = false; - } else if (c >= '0' && c <= '9') { - sb.append(c); - prevWasLowerCase = false; - } - } - sb.append('_'); - return sb.toString(); - } - - protected void endClassDeclaration() { - printer.printLn("void"); - printer.print(className); - printer.printLn("::initializeStatics()"); - printer.printLn("{"); - printer.print(staticInitializerPrinter.getSource()); - printer.printLn("}"); - printer.printLn(); - - printer.printLn("void"); - printer.print(className); - printer.printLn("::releaseStatics()"); - printer.printLn("{"); - printer.indent(); - for (String del : staticReleases) { - printer.print(del); - printer.printLn(";"); - } - printer.unindent(); - printer.printLn("}"); - printer.printLn(); - - if (cppTypes.hasSupplement(javaClassName)) { - printer.printLn(); - printer.print("#include \""); - printer.print(className); - printer.printLn("CppSupplement.h\""); - } - } - - protected void startClassDeclaration() { - printer.print("#define "); - printer.print(className); - printer.printLn("_cpp__"); - printer.printLn(); - - String[] incs = cppTypes.boilerplateIncludes(javaClassName); - for (int i = 0; i < incs.length; i++) { - String inc = incs[i]; - printer.print("#include \""); - printer.print(inc); - printer.printLn(".h\""); - } - - printer.printLn(); - - for (int i = 0; i < Main.H_LIST.length; i++) { - String klazz = Main.H_LIST[i]; - if (!klazz.equals(javaClassName)) { - printer.print("#include \""); - printer.print(cppTypes.classPrefix()); - printer.print(klazz); - printer.printLn(".h\""); - } - } - - printer.printLn(); - printer.print("#include \""); - printer.print(className); - printer.printLn(".h\""); - printer.printLn(); - } - - public void visit(EmptyTypeDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - printer.print(";"); - } - - public void visit(JavadocComment n, LocalSymbolTable arg) { - printer.print("/**"); - printer.print(n.getContent()); - printer.printLn("*/"); - } - - public void visit(ClassOrInterfaceType n, LocalSymbolTable arg) { - if (n.getScope() != null) { - n.getScope().accept(this, arg); - printer.print("."); - throw new IllegalStateException("Can't translate nested classes."); - } - String name = n.getName(); - if ("String".equals(name)) { - if (local()) { - name = inField || inArray ? cppTypes.localType() : cppTypes.weakLocalType(); - } else if (prefix()) { - name = cppTypes.prefixType(); - } else if (nsUri()) { - name = cppTypes.nsUriType(); - } else if (literal()) { - name = cppTypes.literalType(); - } else if (characterName()) { - name = cppTypes.characterNameType(); - } else { - name = cppTypes.stringType(); - } - } else if ("T".equals(name) || "Object".equals(name)) { - if (htmlCreator()) { - name = cppTypes.htmlCreatorType(); - } else if (svgCreator()) { - name = cppTypes.svgCreatorType(); - } else if (creator()) { - name = cppTypes.creatorType(); - } else { - name = cppTypes.nodeType(); - } - } else if ("TokenHandler".equals(name)) { - name = cppTypes.classPrefix() + "TreeBuilder*"; - } else if ("EncodingDeclarationHandler".equals(name)) { - name = cppTypes.encodingDeclarationHandlerType(); - } else if ("Interner".equals(name)) { - name = cppTypes.internerType(); - } else if ("TreeBuilderState".equals(name)) { - name = cppTypes.treeBuilderStateType(); - } else if ("DocumentModeHandler".equals(name)) { - name = cppTypes.documentModeHandlerType(); - } else if ("DocumentMode".equals(name)) { - name = cppTypes.documentModeType(); - } else { - name = cppTypes.classPrefix() + name + (suppressPointer ? "" : "*"); - } - printer.print(name); - printTypeArgs(n.getTypeArgs(), arg); - } - - protected boolean inHeader() { - return false; - } - - public void visit(TypeParameter n, LocalSymbolTable arg) { - printer.print(n.getName()); - if (n.getTypeBound() != null) { - printer.print(" extends "); - for (Iterator i = n.getTypeBound().iterator(); i.hasNext();) { - ClassOrInterfaceType c = i.next(); - c.accept(this, arg); - if (i.hasNext()) { - printer.print(" & "); - } - } - } - } - - public void visit(PrimitiveType n, LocalSymbolTable arg) { - switch (n.getType()) { - case Boolean: - printer.print(cppTypes.booleanType()); - break; - case Byte: - printer.print(cppTypes.byteType()); - break; - case Char: - printer.print(cppTypes.charType()); - break; - case Double: - throw new IllegalStateException("Unsupported primitive."); - case Float: - throw new IllegalStateException("Unsupported primitive."); - case Int: - if (unsigned()) { - printer.print(cppTypes.unsignedIntType()); - } else { - printer.print(cppTypes.intType()); - } - break; - case Long: - throw new IllegalStateException("Unsupported primitive."); - case Short: - throw new IllegalStateException("Unsupported primitive."); - } - } - - public void visit(ReferenceType n, LocalSymbolTable arg) { - if (isConst()) { - printer.print("const "); - } - boolean wasInArray = inArray; - if (n.getArrayCount() > 0) { - inArray = true; - } - if (noLength()) { - n.getType().accept(this, arg); - for (int i = 0; i < n.getArrayCount(); i++) { - if (!inPrimitiveNoLengthFieldDeclarator) { - printer.print("*"); - } - } - } else { - for (int i = 0; i < n.getArrayCount(); i++) { - if (inStatic) { - printer.print(cppTypes.staticArrayTemplate()); - } else { - if (auto()) { - printer.print(cppTypes.autoArrayTemplate()); - } else { - printer.print(cppTypes.arrayTemplate()); - } - } - printer.print("<"); - } - n.getType().accept(this, arg); - for (int i = 0; i < n.getArrayCount(); i++) { - printer.print(", "); - printer.print(cppTypes.intType()); - printer.print(">"); - } - } - if (n.getArrayCount() > 0) { - inArray = wasInArray; - } - } - - public void visit(WildcardType n, LocalSymbolTable arg) { - printer.print("?"); - if (n.getExtends() != null) { - printer.print(" extends "); - n.getExtends().accept(this, arg); - } - if (n.getSuper() != null) { - printer.print(" super "); - n.getSuper().accept(this, arg); - } - } - - public void visit(FieldDeclaration n, LocalSymbolTable arg) { - currentAnnotations = n.getAnnotations(); - fieldDeclaration(n, arg); - currentAnnotations = null; - } - - protected boolean isNonToCharArrayMethodCall(Expression exp) { - if (exp instanceof MethodCallExpr) { - MethodCallExpr mce = (MethodCallExpr) exp; - return !"toCharArray".equals(mce.getName()); - } else { - return false; - } - } - - protected void fieldDeclaration(FieldDeclaration n, LocalSymbolTable arg) { - inField = true; - tempPrinterHolder = printer; - printer = staticInitializerPrinter; - int modifiers = n.getModifiers(); - List variables = n.getVariables(); - VariableDeclarator declarator = variables.get(0); - if (ModifierSet.isStatic(modifiers) && ModifierSet.isFinal(modifiers) - && !(n.getType() instanceof PrimitiveType) - && declarator.getInit() != null) { - if (n.getType() instanceof ReferenceType) { - ReferenceType rt = (ReferenceType) n.getType(); - currentArrayCount = rt.getArrayCount(); - if (currentArrayCount > 0) { - if (currentArrayCount != 1) { - throw new IllegalStateException( - "Multidimensional arrays not supported. " + n); - } - if (noLength()) { - if (rt.getType() instanceof PrimitiveType) { - inPrimitiveNoLengthFieldDeclarator = true; - printer = tempPrinterHolder; - n.getType().accept(this, arg); - printer.print(" "); - printer.print(className); - printer.print("::"); - declarator.getId().accept(this, arg); - - printer.print(" = "); - - declarator.getInit().accept(this, arg); - - printer.printLn(";"); - printer = staticInitializerPrinter; - } else { - printer = tempPrinterHolder; - n.getType().accept(this, arg); - printer.print(" "); - printer.print(className); - printer.print("::"); - declarator.getId().accept(this, arg); - - printer.printLn(" = 0;"); - printer = staticInitializerPrinter; - - staticReleases.add("delete[] " - + declarator.getId().getName()); - - ArrayInitializerExpr aie = (ArrayInitializerExpr) declarator.getInit(); - - declarator.getId().accept(this, arg); - printer.print(" = new "); - // suppressPointer = true; - rt.getType().accept(this, arg); - // suppressPointer = false; - printer.print("["); - printer.print("" + aie.getValues().size()); - printer.printLn("];"); - - printArrayInit(declarator.getId(), aie.getValues(), - arg); - } - } else if ((rt.getType() instanceof PrimitiveType) || "String".equals(rt.getType().toString())) { - printer = tempPrinterHolder; - printer.print("static "); - rt.getType().accept(this, arg); - printer.print(" const "); - declarator.getId().accept(this, arg); - printer.print("_DATA[] = "); - declarator.getInit().accept(this, arg); - printer.printLn(";"); - printer.print(cppTypes.staticArrayTemplate()); - printer.print("<"); - suppressPointer = true; - rt.getType().accept(this, arg); - suppressPointer = false; - printer.print(", "); - printer.print(cppTypes.intType()); - printer.print("> "); - printer.print(className); - printer.print("::"); - declarator.getId().accept(this, arg); - printer.print(" = { "); - declarator.getId().accept(this, arg); - printer.print("_DATA, "); - printer.print(cppTypes.arrayLengthMacro()); - printer.print("("); - declarator.getId().accept(this, arg); - printer.printLn("_DATA) };"); - printer = staticInitializerPrinter; - } else if (isNonToCharArrayMethodCall(declarator.getInit())) { - staticReleases.add(declarator.getId().getName() - + ".release()"); - declarator.getId().accept(this, arg); - printer.print(" = "); - if (declarator.getInit() instanceof ArrayInitializerExpr) { - - ArrayInitializerExpr aie = (ArrayInitializerExpr) declarator.getInit(); - printer.print(cppTypes.arrayTemplate()); - printer.print("<"); - suppressPointer = true; - rt.getType().accept(this, arg); - suppressPointer = false; - printer.print(", "); - printer.print(cppTypes.intType()); - printer.print(">::"); - printer.print(cppTypes.newArrayCreator()); - printer.print("("); - printer.print("" + aie.getValues().size()); - printer.printLn(");"); - printArrayInit(declarator.getId(), aie.getValues(), - arg); - } else { - declarator.getInit().accept(this, arg); - printer.printLn(";"); - } - } - } else { - if (ModifierSet.isStatic(modifiers)) { - printer = tempPrinterHolder; - n.getType().accept(this, arg); - printer.print(" "); - printer.print(className); - printer.print("::"); - String clazzName = n.getType().toString(); - String field = declarator.getId().toString(); - if (symbolTable.isAttributeOrElementName(clazzName, field)) { - if ("AttributeName".equals(clazzName)) { - printer.print("ATTR_"); - } else if ("ElementName".equals(clazzName)) { - printer.print("ELT_"); - } - } - declarator.getId().accept(this, arg); - printer.print(" = "); - printer.print(cppTypes.nullLiteral()); - printer.printLn(";"); - printer = staticInitializerPrinter; - } - - if ("AttributeName".equals(n.getType().toString())) { - printer.print("ATTR_"); - staticReleases.add("delete ATTR_" - + declarator.getId().getName()); - } else if ("ElementName".equals(n.getType().toString())) { - printer.print("ELT_"); - staticReleases.add("delete ELT_" - + declarator.getId().getName()); - } else { - staticReleases.add("delete " - + declarator.getId().getName()); - } - declarator.accept(this, arg); - printer.printLn(";"); - } - } else { - throw new IllegalStateException( - "Non-reference, non-primitive fields not supported."); - } - } - currentArrayCount = 0; - printer = tempPrinterHolder; - inPrimitiveNoLengthFieldDeclarator = false; - inField = false; - } - - private void printArrayInit(VariableDeclaratorId variableDeclaratorId, - List values, LocalSymbolTable arg) { - for (int i = 0; i < values.size(); i++) { - Expression exp = values.get(i); - variableDeclaratorId.accept(this, arg); - printer.print("["); - printer.print("" + i); - printer.print("] = "); - if (exp instanceof NameExpr) { - if ("AttributeName".equals(javaClassName)) { - printer.print("ATTR_"); - } else if ("ElementName".equals(javaClassName)) { - printer.print("ELT_"); - } - } - exp.accept(this, arg); - printer.printLn(";"); - } - } - - public void visit(VariableDeclarator n, LocalSymbolTable arg) { - n.getId().accept(this, arg); - - if (n.getInit() != null) { - printer.print(" = "); - n.getInit().accept(this, arg); - } - } - - public void visit(VariableDeclaratorId n, LocalSymbolTable arg) { - printer.print(n.getName()); - if (noLength()) { - for (int i = 0; i < currentArrayCount; i++) { - if (inPrimitiveNoLengthFieldDeclarator) { - printer.print("[]"); - } - } - } - for (int i = 0; i < n.getArrayCount(); i++) { - printer.print("[]"); - } - } - - public void visit(ArrayInitializerExpr n, LocalSymbolTable arg) { - printer.print("{"); - if (n.getValues() != null) { - printer.print(" "); - for (Iterator i = n.getValues().iterator(); i.hasNext();) { - Expression expr = i.next(); - expr.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - printer.print(" "); - } - printer.print("}"); - } - - public void visit(VoidType n, LocalSymbolTable arg) { - printer.print("void"); - } - - public void visit(ArrayAccessExpr n, LocalSymbolTable arg) { - n.getName().accept(this, arg); - printer.print("["); - n.getIndex().accept(this, arg); - printer.print("]"); - } - - public void visit(ArrayCreationExpr n, LocalSymbolTable arg) { - // printer.print("new "); - // n.getType().accept(this, arg); - // printTypeArgs(n.getTypeArgs(), arg); - - inArray = true; - if (n.getDimensions() != null) { - if (noLength()) { - for (Expression dim : n.getDimensions()) { - printer.print("new "); - n.getType().accept(this, arg); - printer.print("["); - dim.accept(this, arg); - printer.print("]"); - } - } else { - for (Expression dim : n.getDimensions()) { - printer.print(cppTypes.arrayTemplate()); - printer.print("<"); - n.getType().accept(this, arg); - printer.print(", "); - printer.print(cppTypes.intType()); - printer.print(">::"); - printer.print(cppTypes.newArrayCreator()); - printer.print("("); - dim.accept(this, arg); - printer.print(")"); - } - } - if (n.getArrayCount() > 0) { - throw new IllegalStateException( - "Nested array allocation not supported. " - + n.toString()); - } - } else { - throw new IllegalStateException( - "Array initializer as part of array creation not supported. " - + n.toString()); - } - inArray = false; - } - - public void visit(AssignExpr n, LocalSymbolTable arg) { - if (inConstructorBody) { - n.getTarget().accept(this, arg); - printer.print("("); - n.getValue().accept(this, arg); - printer.print(")"); - } else { - n.getTarget().accept(this, arg); - printer.print(" "); - switch (n.getOperator()) { - case assign: - printer.print("="); - break; - case and: - printer.print("&="); - break; - case or: - printer.print("|="); - break; - case xor: - printer.print("^="); - break; - case plus: - printer.print("+="); - break; - case minus: - printer.print("-="); - break; - case rem: - printer.print("%="); - break; - case slash: - printer.print("/="); - break; - case star: - printer.print("*="); - break; - case lShift: - printer.print("<<="); - break; - case rSignedShift: - printer.print(">>="); - break; - case rUnsignedShift: - printer.print(">>>="); - break; - } - printer.print(" "); - n.getValue().accept(this, arg); - } - } - - public void visit(BinaryExpr n, LocalSymbolTable arg) { - Expression right = n.getRight(); - switch (n.getOperator()) { - case notEquals: - if (right instanceof NullLiteralExpr) { - printer.print("!!"); - n.getLeft().accept(this, arg); - return; - } else if (right instanceof IntegerLiteralExpr) { - IntegerLiteralExpr ile = (IntegerLiteralExpr) right; - if ("0".equals(ile.getValue())) { - n.getLeft().accept(this, arg); - return; - } - } - case equals: - if (right instanceof NullLiteralExpr) { - printer.print("!"); - n.getLeft().accept(this, arg); - return; - } else if (right instanceof IntegerLiteralExpr) { - IntegerLiteralExpr ile = (IntegerLiteralExpr) right; - if ("0".equals(ile.getValue())) { - printer.print("!"); - n.getLeft().accept(this, arg); - return; - } - } - default: - // fall thru - } - - n.getLeft().accept(this, arg); - printer.print(" "); - switch (n.getOperator()) { - case or: - printer.print("||"); - break; - case and: - printer.print("&&"); - break; - case binOr: - printer.print("|"); - break; - case binAnd: - printer.print("&"); - break; - case xor: - printer.print("^"); - break; - case equals: - printer.print("=="); - break; - case notEquals: - printer.print("!="); - break; - case less: - printer.print("<"); - break; - case greater: - printer.print(">"); - break; - case lessEquals: - printer.print("<="); - break; - case greaterEquals: - printer.print(">="); - break; - case lShift: - printer.print("<<"); - break; - case rSignedShift: - printer.print(">>"); - break; - case rUnsignedShift: - printer.print(">>>"); - break; - case plus: - printer.print("+"); - break; - case minus: - printer.print("-"); - break; - case times: - printer.print("*"); - break; - case divide: - printer.print("/"); - break; - case remainder: - printer.print("%"); - break; - } - printer.print(" "); - n.getRight().accept(this, arg); - } - - public void visit(CastExpr n, LocalSymbolTable arg) { - printer.print("("); - n.getType().accept(this, arg); - printer.print(") "); - n.getExpr().accept(this, arg); - } - - public void visit(ClassExpr n, LocalSymbolTable arg) { - n.getType().accept(this, arg); - printer.print(".class"); - } - - public void visit(ConditionalExpr n, LocalSymbolTable arg) { - n.getCondition().accept(this, arg); - printer.print(" ? "); - n.getThenExpr().accept(this, arg); - printer.print(" : "); - n.getElseExpr().accept(this, arg); - } - - public void visit(EnclosedExpr n, LocalSymbolTable arg) { - printer.print("("); - n.getInner().accept(this, arg); - printer.print(")"); - } - - public void visit(FieldAccessExpr n, LocalSymbolTable arg) { - Expression scope = n.getScope(); - String field = n.getField(); - if (inConstructorBody && (scope instanceof ThisExpr)) { - printer.print(field); - } else if ("length".equals(field) && !(scope instanceof ThisExpr)) { - scope.accept(this, arg); - printer.print(".length"); - } else if ("MAX_VALUE".equals(field) - && "Integer".equals(scope.toString())) { - printer.print(cppTypes.maxInteger()); - } else { - String clazzName = classNameFromExpression(scope); - if (clazzName == null) { - if ("DocumentMode".equals(scope.toString())) { - // printer.print(cppTypes.documentModeType()); - // printer.print("."); - } else if ("creator".equals(scope.toString()) || "this.creator".equals(scope.toString())) { - scope.accept(this, arg); - printer.print("."); - } else { - scope.accept(this, arg); - printer.print("->"); - } - } else { - printer.print(cppTypes.classPrefix()); - printer.print(clazzName); - printer.print("::"); - if (symbolTable.isAttributeOrElementName(clazzName, field)) { - if ("AttributeName".equals(clazzName)) { - printer.print("ATTR_"); - } else if ("ElementName".equals(clazzName)) { - printer.print("ELT_"); - } - } - } - printer.print(field); - } - } - - public void visit(InstanceOfExpr n, LocalSymbolTable arg) { - n.getExpr().accept(this, arg); - printer.print(" instanceof "); - n.getType().accept(this, arg); - } - - public void visit(CharLiteralExpr n, LocalSymbolTable arg) { - printCharLiteral(n.getValue()); - } - - private void printCharLiteral(String val) { - if (val.length() != 1) { - printer.print("'"); - printer.print(val); - printer.print("'"); - return; - } - char c = val.charAt(0); - switch (c) { - case 0: - printer.print("'\\0'"); - break; - case '\n': - printer.print("'\\n'"); - break; - case '\t': - printer.print("'\\t'"); - break; - case 0xB: - printer.print("'\\v'"); - break; - case '\b': - printer.print("'\\b'"); - break; - case '\r': - printer.print("'\\r'"); - break; - case 0xC: - printer.print("'\\f'"); - break; - case 0x7: - printer.print("'\\a'"); - break; - case '\\': - printer.print("'\\\\'"); - break; - case '?': - printer.print("'\\?'"); - break; - case '\'': - printer.print("'\\''"); - break; - case '"': - printer.print("'\\\"'"); - break; - default: - if (c >= 0x20 && c <= 0x7F) { - printer.print("'" + c); - printer.print("'"); - } else { - printer.print("0x"); - printer.print(Integer.toHexString(c)); - } - break; - } - } - - public void visit(DoubleLiteralExpr n, LocalSymbolTable arg) { - printer.print(n.getValue()); - } - - public void visit(IntegerLiteralExpr n, LocalSymbolTable arg) { - printer.print(n.getValue()); - } - - public void visit(LongLiteralExpr n, LocalSymbolTable arg) { - printer.print(n.getValue()); - } - - public void visit(IntegerLiteralMinValueExpr n, LocalSymbolTable arg) { - printer.print(n.getValue()); - } - - public void visit(LongLiteralMinValueExpr n, LocalSymbolTable arg) { - printer.print(n.getValue()); - } - - public void visit(StringLiteralExpr n, LocalSymbolTable arg) { - String val = n.getValue(); - if ("http://www.w3.org/1999/xhtml".equals(val)) { - printer.print(cppTypes.xhtmlNamespaceLiteral()); - } else if ("http://www.w3.org/2000/svg".equals(val)) { - printer.print(cppTypes.svgNamespaceLiteral()); - } else if ("http://www.w3.org/2000/xmlns/".equals(val)) { - printer.print(cppTypes.xmlnsNamespaceLiteral()); - } else if ("http://www.w3.org/XML/1998/namespace".equals(val)) { - printer.print(cppTypes.xmlNamespaceLiteral()); - } else if ("http://www.w3.org/1999/xlink".equals(val)) { - printer.print(cppTypes.xlinkNamespaceLiteral()); - } else if ("http://www.w3.org/1998/Math/MathML".equals(val)) { - printer.print(cppTypes.mathmlNamespaceLiteral()); - } else if ("".equals(val) && "AttributeName".equals(javaClassName)) { - printer.print(cppTypes.noNamespaceLiteral()); - } else if (val.startsWith("-/") || val.startsWith("+//") - || val.startsWith("http://") || val.startsWith("XSLT")) { - printer.print(cppTypes.stringForLiteral(val)); - } else if (("hidden".equals(val) || "isindex".equals(val) - || "text/html".equals(val) - || "application/xhtml+xml".equals(val) || "content-type".equals(val)) - && "TreeBuilder".equals(javaClassName)) { - printer.print(cppTypes.stringForLiteral(val)); - } else if ("isQuirky".equals(currentMethod) && "html".equals(val)) { - printer.print(cppTypes.stringForLiteral(val)); - } else { - printer.print(cppTypes.localForLiteral(val)); - } - } - - public void visit(BooleanLiteralExpr n, LocalSymbolTable arg) { - if (n.getValue()) { - printer.print(cppTypes.trueLiteral()); - } else { - printer.print(cppTypes.falseLiteral()); - } - } - - public void visit(NullLiteralExpr n, LocalSymbolTable arg) { - printer.print(cppTypes.nullLiteral()); - } - - public void visit(ThisExpr n, LocalSymbolTable arg) { - if (n.getClassExpr() != null) { - n.getClassExpr().accept(this, arg); - printer.print("."); - } - printer.print("this"); - } - - public void visit(SuperExpr n, LocalSymbolTable arg) { - if (n.getClassExpr() != null) { - n.getClassExpr().accept(this, arg); - printer.print("."); - } - printer.print("super"); - } - - public void visit(MethodCallExpr n, LocalSymbolTable arg) { - if ("releaseArray".equals(n.getName()) - && "Portability".equals(n.getScope().toString())) { - n.getArgs().get(0).accept(this, arg); - printer.print(".release()"); - } else if ("releaseString".equals(n.getName()) - && "Portability".equals(n.getScope().toString())) { - n.getArgs().get(0).accept(this, arg); - printer.print(".Release()"); - } else if ("deleteArray".equals(n.getName()) - && "Portability".equals(n.getScope().toString())) { - printer.print("delete[] "); - n.getArgs().get(0).accept(this, arg); - } else if ("delete".equals(n.getName()) - && "Portability".equals(n.getScope().toString())) { - printer.print("delete "); - n.getArgs().get(0).accept(this, arg); - } else if (("retainElement".equals(n.getName()) || "releaseElement".equals(n.getName())) - && "Portability".equals(n.getScope().toString())) { - // ignore for now - } else if ("transition".equals(n.getName()) - && n.getScope() == null) { - visitTransition(n, arg); - } else if ("arraycopy".equals(n.getName()) - && "System".equals(n.getScope().toString())) { - printer.print(cppTypes.arrayCopy()); - printer.print("("); - if (n.getArgs().get(0).toString().equals( - n.getArgs().get(2).toString())) { - n.getArgs().get(0).accept(this, arg); - printer.print(", "); - n.getArgs().get(1).accept(this, arg); - printer.print(", "); - n.getArgs().get(3).accept(this, arg); - printer.print(", "); - n.getArgs().get(4).accept(this, arg); - } else if (n.getArgs().get(1).toString().equals("0") - && n.getArgs().get(3).toString().equals("0")) { - n.getArgs().get(0).accept(this, arg); - printer.print(", "); - n.getArgs().get(2).accept(this, arg); - printer.print(", "); - n.getArgs().get(4).accept(this, arg); - } else { - for (Iterator i = n.getArgs().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - } else if ("binarySearch".equals(n.getName()) - && "Arrays".equals(n.getScope().toString())) { - n.getArgs().get(0).accept(this, arg); - printer.print(".binarySearch("); - n.getArgs().get(1).accept(this, arg); - printer.print(")"); - } else { - Expression scope = n.getScope(); - if (scope != null) { - if (scope instanceof StringLiteralExpr) { - StringLiteralExpr strLit = (StringLiteralExpr) scope; - String str = strLit.getValue(); - if (!"toCharArray".equals(n.getName())) { - throw new IllegalStateException( - "Unsupported method call on string literal: " - + n.getName()); - } - printer.print("{ "); - for (int i = 0; i < str.length(); i++) { - char c = str.charAt(i); - if (i != 0) { - printer.print(", "); - } - printCharLiteral("" + c); - } - printer.print(" }"); - return; - } else { - String clazzName = classNameFromExpression(scope); - if (clazzName == null) { - scope.accept(this, arg); - if ("length".equals(n.getName()) - || "charAt".equals(n.getName()) - || "creator".equals(scope.toString())) { - printer.print("."); - } else { - printer.print("->"); - } - } else { - printer.print(cppTypes.classPrefix()); - printer.print(clazzName); - printer.print("::"); - } - } - } - printTypeArgs(n.getTypeArgs(), arg); - printer.print(n.getName()); - if ("stateLoop".equals(n.getName()) - && "Tokenizer".equals(javaClassName) - && cppTypes.stateLoopPolicies().length > 0) { - printer.print("<"); - printer.print(cppTypes.stateLoopPolicies()[stateLoopCallCount]); - printer.print(">"); - stateLoopCallCount++; - } - printer.print("("); - if (n.getArgs() != null) { - for (Iterator i = n.getArgs().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - } - } - - public void visit(ObjectCreationExpr n, LocalSymbolTable arg) { - if (n.getScope() != null) { - n.getScope().accept(this, arg); - printer.print("."); - } - - printer.print("new "); - - suppressPointer = true; - printTypeArgs(n.getTypeArgs(), arg); - n.getType().accept(this, arg); - suppressPointer = false; - - if ("AttributeName".equals(n.getType().getName())) { - List args = n.getArgs(); - while (args != null && args.size() > 3) { - args.remove(3); - } - } - - printer.print("("); - if (n.getArgs() != null) { - for (Iterator i = n.getArgs().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - - if (n.getAnonymousClassBody() != null) { - printer.printLn(" {"); - printer.indent(); - printMembers(n.getAnonymousClassBody(), arg); - printer.unindent(); - printer.print("}"); - } - } - - public void visit(UnaryExpr n, LocalSymbolTable arg) { - switch (n.getOperator()) { - case positive: - printer.print("+"); - break; - case negative: - printer.print("-"); - break; - case inverse: - printer.print("~"); - break; - case not: - printer.print("!"); - break; - case preIncrement: - printer.print("++"); - break; - case preDecrement: - printer.print("--"); - break; - } - - n.getExpr().accept(this, arg); - - switch (n.getOperator()) { - case posIncrement: - printer.print("++"); - break; - case posDecrement: - printer.print("--"); - break; - } - } - - public void visit(ConstructorDeclaration n, LocalSymbolTable arg) { - if ("TreeBuilder".equals(javaClassName)) { - return; - } - - arg = new LocalSymbolTable(javaClassName, symbolTable); - - // if (n.getJavaDoc() != null) { - // n.getJavaDoc().accept(this, arg); - // } - currentAnnotations = n.getAnnotations(); - - printModifiers(n.getModifiers()); - - printMethodNamespace(); - printConstructorExplicit(n.getParameters()); - printer.print(className); - currentAnnotations = null; - - printer.print("("); - if (n.getParameters() != null) { - for (Iterator i = n.getParameters().iterator(); i.hasNext();) { - Parameter p = i.next(); - p.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - - printConstructorBody(n.getBlock(), arg); - } - - protected void printConstructorExplicit(List params) { - } - - protected void printConstructorBody(BlockStmt block, LocalSymbolTable arg) { - inConstructorBody = true; - List statements = block.getStmts(); - List nonAssigns = new LinkedList(); - int i = 0; - boolean needOutdent = false; - for (Statement statement : statements) { - if (statement instanceof ExpressionStmt - && ((ExpressionStmt) statement).getExpression() instanceof AssignExpr) { - printer.printLn(); - if (i == 0) { - // : firstMember(arg) - printer.indent(); - printer.print(": "); - needOutdent = true; - } else { - // , secondMember(arg) - printer.print(", "); - } - statement.accept(this, arg); - i++; - } else { - nonAssigns.add(statement); - } - } - if (needOutdent) { - printer.unindent(); - } - inConstructorBody = false; - printer.printLn(); - printer.printLn("{"); - printer.indent(); - String boilerplate = cppTypes.constructorBoilerplate(className); - if (boilerplate != null) { - printer.printLn(boilerplate); - } - for (Statement statement : nonAssigns) { - statement.accept(this, arg); - printer.printLn(); - } - printer.unindent(); - printer.printLn("}"); - printer.printLn(); - } - - public void visit(MethodDeclaration n, LocalSymbolTable arg) { - arg = new LocalSymbolTable(javaClassName, symbolTable); - if (isPrintableMethod(n.getModifiers()) - && !(n.getName().equals("endCoalescing") || n.getName().equals( - "startCoalescing"))) { - printMethodDeclaration(n, arg); - } - } - - private boolean isPrintableMethod(int modifiers) { - return !(ModifierSet.isAbstract(modifiers) || (ModifierSet.isProtected(modifiers) && !(ModifierSet.isFinal(modifiers) || "Tokenizer".equals(javaClassName)))); - } - - protected void printMethodDeclaration(MethodDeclaration n, - LocalSymbolTable arg) { - if (n.getName().startsWith("fatal") || n.getName().startsWith("err") - || n.getName().startsWith("warn") - || n.getName().startsWith("maybeErr") - || n.getName().startsWith("maybeWarn") - || n.getName().startsWith("note") - || "releaseArray".equals(n.getName()) - || "releaseString".equals(n.getName()) - || "deleteArray".equals(n.getName()) - || "delete".equals(n.getName())) { - return; - } - - currentMethod = n.getName(); - - destructor = "destructor".equals(currentMethod); - - // if (n.getJavaDoc() != null) { - // n.getJavaDoc().accept(this, arg); - // } - currentAnnotations = n.getAnnotations(); - boolean isInline = inline(); - if (isInline && !inHeader()) { - return; - } - - if (destructor) { - printModifiers(ModifierSet.PUBLIC); - } else { - printModifiers(n.getModifiers()); - } - - if ("stateLoop".equals(currentMethod) - && "Tokenizer".equals(javaClassName) - && cppTypes.stateLoopPolicies().length > 0) { - printer.print("template"); - if (inHeader()) { - printer.print(" "); - } else { - printer.printLn(); - } - } - - printTypeParameters(n.getTypeParameters(), arg); - if (n.getTypeParameters() != null) { - printer.print(" "); - } - if (!destructor) { - n.getType().accept(this, arg); - printer.print(" "); - } - printMethodNamespace(); - if (destructor) { - printer.print("~"); - printer.print(className); - } else { - printer.print(n.getName()); - } - - printer.print("("); - if (n.getParameters() != null) { - for (Iterator i = n.getParameters().iterator(); i.hasNext();) { - Parameter p = i.next(); - p.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - - for (int i = 0; i < n.getArrayCount(); i++) { - printer.print("[]"); - } - - if (override() && inHeader()) { - printer.print(" override"); - } - - currentAnnotations = null; - - if (inHeader() == isInline) { - printMethodBody(n.getBody(), arg); - } else { - printer.printLn(";"); - } - } - - private void printMethodBody(BlockStmt n, LocalSymbolTable arg) { - if (n == null) { - printer.print(";"); - } else { - printer.printLn(); - printer.printLn("{"); - printer.indent(); - if (destructor) { - String boilerplate = cppTypes.destructorBoilerplate(className); - if (boilerplate != null) { - printer.printLn(boilerplate); - } - } - if (n.getStmts() != null) { - for (Statement s : n.getStmts()) { - s.accept(this, arg); - printer.printLn(); - } - } - printer.unindent(); - printer.print("}"); - } - printer.printLn(); - printer.printLn(); - } - - protected void printMethodNamespace() { - printer.printLn(); - printer.print(className); - printer.print("::"); - } - - public void visit(Parameter n, LocalSymbolTable arg) { - currentAnnotations = n.getAnnotations(); - - arg.putLocalType(n.getId().getName(), convertType(n.getType(), - n.getModifiers())); - - n.getType().accept(this, arg); - if (n.isVarArgs()) { - printer.print("..."); - } - printer.print(" "); - n.getId().accept(this, arg); - currentAnnotations = null; - } - - public void visit(ExplicitConstructorInvocationStmt n, LocalSymbolTable arg) { - if (n.isThis()) { - printTypeArgs(n.getTypeArgs(), arg); - printer.print("this"); - } else { - if (n.getExpr() != null) { - n.getExpr().accept(this, arg); - printer.print("."); - } - printTypeArgs(n.getTypeArgs(), arg); - printer.print("super"); - } - printer.print("("); - if (n.getArgs() != null) { - for (Iterator i = n.getArgs().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(");"); - } - - public void visit(VariableDeclarationExpr n, LocalSymbolTable arg) { - currentAnnotations = n.getAnnotations(); - - arg.putLocalType(n.getVars().get(0).toString(), convertType( - n.getType(), n.getModifiers())); - - n.getType().accept(this, arg); - printer.print(" "); - - for (Iterator i = n.getVars().iterator(); i.hasNext();) { - VariableDeclarator v = i.next(); - v.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - currentAnnotations = null; - } - - public void visit(TypeDeclarationStmt n, LocalSymbolTable arg) { - n.getTypeDeclaration().accept(this, arg); - } - - public void visit(AssertStmt n, LocalSymbolTable arg) { - String message = null; - Expression msg = n.getMessage(); - boolean hasCheck = true; - if (msg != null) { - if (msg instanceof StringLiteralExpr) { - StringLiteralExpr sle = (StringLiteralExpr) msg; - message = sle.getValue(); - } else { - throw new RuntimeException("Bad assertion message."); - } - } - String macro = cppTypes.assertionMacro(); - if (message != null && message.startsWith("RELEASE: ")) { - message = message.substring("RELEASE: ".length()); - macro = cppTypes.releaseAssertionMacro(); - Expression check = n.getCheck(); - if (check instanceof BooleanLiteralExpr) { - BooleanLiteralExpr expr = (BooleanLiteralExpr) check; - if (!expr.getValue()) { - hasCheck = false; - macro = cppTypes.crashMacro(); - } - } - } - if (macro != null) { - printer.print(macro); - printer.print("("); - if (hasCheck) { - n.getCheck().accept(this, arg); - } - if (message != null) { - if (hasCheck) { - printer.print(", "); - } - printer.print("\""); - for (int i = 0; i < message.length(); i++) { - char c = message.charAt(i); - if (c == '"') { - printer.print("\""); - } else if (c >= ' ' && c <= '~') { - printer.print("" + c); - } else { - throw new RuntimeException("Bad assertion message string."); - } - } - printer.print("\""); - } - printer.print(");"); - } - } - - public void visit(BlockStmt n, LocalSymbolTable arg) { - printer.printLn("{"); - if (n.getStmts() != null) { - printer.indent(); - for (Statement s : n.getStmts()) { - s.accept(this, arg); - printer.printLn(); - } - printer.unindent(); - } - printer.print("}"); - - } - - public void visit(LabeledStmt n, LocalSymbolTable arg) { - // Only conditionless for loops are needed and supported - // Not implementing general Java continue semantics in order - // to keep the generated C++ more readable. - Statement stmt = n.getStmt(); - if (stmt instanceof ForStmt) { - ForStmt forLoop = (ForStmt) stmt; - if (!(forLoop.getInit() == null && forLoop.getCompare() == null && forLoop.getUpdate() == null)) { - forLoopsWithCondition.add(n.getLabel()); - } - } else { - throw new IllegalStateException( - "Only for loop supported as labeled statement. Line: " - + n.getBeginLine()); - } - String label = n.getLabel(); - if (labels.contains(label)) { - printer.unindent(); - printer.print(label); - printer.indent(); - printer.printLn(":"); - } - stmt.accept(this, arg); - printer.printLn(); - label += "_end"; - if (labels.contains(label)) { - printer.unindent(); - printer.print(label); - printer.indent(); - printer.print(":;"); - } - } - - public void visit(EmptyStmt n, LocalSymbolTable arg) { - printer.print(";"); - } - - public void visit(ExpressionStmt n, LocalSymbolTable arg) { - Expression e = n.getExpression(); - if (isCompletedCharacterReference(e)) { - printer.print(cppTypes.completedCharacterReference()); - printer.print(";"); - return; - } - boolean needsCondition = isTokenizerErrorReportingExpression(e); - if (!needsCondition && isDroppedExpression(e)) { - return; - } - if (needsCondition) { - printer.print("if ("); - printer.print(cppTypes.tokenizerErrorCondition()); - printer.printLn(") {"); - printer.indent(); - } - e.accept(this, arg); - if (!inConstructorBody) { - printer.print(";"); - } - if (needsCondition) { - printer.printLn(); - printer.unindent(); - printer.print("}"); - } - } - - private void visitTransition(MethodCallExpr call, LocalSymbolTable arg) { - List args = call.getArgs(); - if (reportTransitions) { - printer.print(cppTypes.transition()); - printer.print("("); - printer.print(cppTypes.firstTransitionArg()); - printer.print(", "); - args.get(1).accept(this, arg); - printer.print(", "); - args.get(2).accept(this, arg); - printer.print(", "); - args.get(3).accept(this, arg); - printer.print(")"); - } else { - args.get(1).accept(this, arg); - } - } - - private boolean isTokenizerErrorReportingExpression(Expression e) { - if (!reportTransitions) { - return false; - } - if (e instanceof MethodCallExpr) { - MethodCallExpr methodCallExpr = (MethodCallExpr) e; - String name = methodCallExpr.getName(); - if (supportErrorReporting && !name.startsWith("errHtml4") - && ("stateLoop".equals(currentMethod)) - && (name.startsWith("err") || name.startsWith("maybeErr"))) { - return true; - } - } - return false; - } - - private boolean isCompletedCharacterReference(Expression e) { - if (!reportTransitions) { - return false; - } - if (e instanceof MethodCallExpr) { - MethodCallExpr methodCallExpr = (MethodCallExpr) e; - String name = methodCallExpr.getName(); - if (name.equals("completedNamedCharacterReference")) { - return true; - } - } - return false; - } - - private boolean isDroppedExpression(Expression e) { - if (e instanceof MethodCallExpr) { - MethodCallExpr methodCallExpr = (MethodCallExpr) e; - String name = methodCallExpr.getName(); - if (name.startsWith("fatal") || name.startsWith("note") - || name.startsWith("errHtml4") || name.startsWith("warn") - || name.startsWith("maybeWarn")) { - return true; - } - if (supportErrorReporting - && ("stateLoop".equals(currentMethod) && !reportTransitions) - && (name.startsWith("err") || name.startsWith("maybeErr"))) { - return true; - } - if (name.equals("completedNamedCharacterReference") - && !reportTransitions) { - return true; - } - } - return false; - } - - public void visit(SwitchStmt n, LocalSymbolTable arg) { - printer.print("switch ("); - n.getSelector().accept(this, arg); - printer.printLn(") {"); - if (n.getEntries() != null) { - printer.indent(); - for (SwitchEntryStmt e : n.getEntries()) { - e.accept(this, arg); - } - printer.unindent(); - } - printer.print("}"); - - } - - public void visit(SwitchEntryStmt n, LocalSymbolTable arg) { - if (n.getLabel() != null) { - boolean isMenuitem = n.getLabel().toString().equals("MENUITEM"); - if (isMenuitem) { - printer.printWithoutIndent("#ifdef ENABLE_VOID_MENUITEM\n"); - } - printer.print("case "); - n.getLabel().accept(this, arg); - printer.print(":"); - if (isMenuitem) { - printer.printWithoutIndent("\n#endif"); - } - } else { - printer.print("default:"); - } - if (isNoStatement(n.getStmts())) { - printer.printLn(); - printer.indent(); - if (n.getLabel() == null) { - printer.printLn("; // fall through"); - } - printer.unindent(); - } else { - printer.printLn(" {"); - printer.indent(); - for (Statement s : n.getStmts()) { - s.accept(this, arg); - printer.printLn(); - } - printer.unindent(); - printer.printLn("}"); - } - } - - private boolean isNoStatement(List stmts) { - if (stmts == null) { - return true; - } - for (Statement statement : stmts) { - if (!isDroppableStatement(statement)) { - return false; - } - } - return true; - } - - private boolean isDroppableStatement(Statement statement) { - if (statement instanceof AssertStmt) { - return true; - } else if (statement instanceof ExpressionStmt) { - ExpressionStmt es = (ExpressionStmt) statement; - if (isDroppedExpression(es.getExpression())) { - return true; - } - } - return false; - } - - public void visit(BreakStmt n, LocalSymbolTable arg) { - if (n.getId() != null) { - printer.print(cppTypes.breakMacro()); - printer.print("("); - printer.print(n.getId()); - printer.print(")"); - } else { - printer.print("break"); - } - printer.print(";"); - } - - public void visit(ReturnStmt n, LocalSymbolTable arg) { - printer.print("return"); - if (n.getExpr() != null) { - printer.print(" "); - n.getExpr().accept(this, arg); - } - printer.print(";"); - } - - public void visit(EnumDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - currentAnnotations = n.getAnnotations(); - // if (annotations != null) { - // for (AnnotationExpr a : annotations) { - // a.accept(this, arg); - // printer.printLn(); - // } - // } - printModifiers(n.getModifiers()); - - printer.print("enum "); - printer.print(n.getName()); - - currentAnnotations = null; - - if (n.getImplements() != null) { - printer.print(" implements "); - for (Iterator i = n.getImplements().iterator(); i.hasNext();) { - ClassOrInterfaceType c = i.next(); - c.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - - printer.printLn(" {"); - printer.indent(); - if (n.getEntries() != null) { - printer.printLn(); - for (Iterator i = n.getEntries().iterator(); i.hasNext();) { - EnumConstantDeclaration e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - if (n.getMembers() != null) { - printer.printLn(";"); - printMembers(n.getMembers(), arg); - } else { - if (n.getEntries() != null) { - printer.printLn(); - } - } - printer.unindent(); - printer.print("}"); - } - - public void visit(EnumConstantDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - currentAnnotations = n.getAnnotations(); - // if (annotations != null) { - // for (AnnotationExpr a : annotations) { - // a.accept(this, arg); - // printer.printLn(); - // } - // } - printer.print(n.getName()); - - currentAnnotations = null; - - if (n.getArgs() != null) { - printer.print("("); - for (Iterator i = n.getArgs().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - printer.print(")"); - } - - if (n.getClassBody() != null) { - printer.printLn(" {"); - printer.indent(); - printMembers(n.getClassBody(), arg); - printer.unindent(); - printer.printLn("}"); - } - } - - public void visit(EmptyMemberDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - printer.print(";"); - } - - public void visit(InitializerDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - if (n.isStatic()) { - printer.print("static "); - } - n.getBlock().accept(this, arg); - } - - public void visit(IfStmt n, LocalSymbolTable arg) { - if (TranslatorUtils.isDocumentModeHandlerNullCheck(n.getCondition())) { - Statement then = n.getThenStmt(); - if (then instanceof BlockStmt) { - BlockStmt block = (BlockStmt) then; - List statements = block.getStmts(); - if (statements != null && statements.size() == 1) { - statements.get(0).accept(this, arg); - } else { - then.accept(this, arg); - } - } else { - then.accept(this, arg); - } - } else if (!TranslatorUtils.isErrorHandlerIf(n.getCondition(), supportErrorReporting)) { - if (TranslatorUtils.isErrorOnlyBlock(n.getThenStmt(), supportErrorReporting)) { - if (n.getElseStmt() != null - && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), supportErrorReporting)) { - printer.print("if ("); - if (n.getCondition() instanceof BinaryExpr) { - BinaryExpr binExpr = (BinaryExpr) n.getCondition(); - switch (binExpr.getOperator()) { - case equals: - binExpr.getLeft().accept(this, arg); - printer.print(" != "); - binExpr.getRight().accept(this, arg); - break; - case notEquals: - binExpr.getLeft().accept(this, arg); - printer.print(" == "); - binExpr.getRight().accept(this, arg); - break; - default: - printer.print("!("); - formatCondition(n.getCondition(), arg); - printer.print(")"); - break; - } - } else { - printer.print("!("); - formatCondition(n.getCondition(), arg); - printer.print(")"); - } - printer.print(") "); - n.getElseStmt().accept(this, arg); - } - } else { - boolean unlikely = (currentMethod != null) - && (Arrays.binarySearch( - METHODS_WITH_UNLIKELY_CONDITIONS, - currentMethod) >= 0); - printer.print("if ("); - if (unlikely) { - printer.print(cppTypes.unlikely()); - printer.print("("); - } - formatCondition(n.getCondition(), arg); - if (unlikely) { - printer.print(")"); - } - printer.print(") "); - n.getThenStmt().accept(this, arg); - if (n.getElseStmt() != null - && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), supportErrorReporting)) { - printer.print(" else "); - n.getElseStmt().accept(this, arg); - } - } - } - } - - private void formatCondition(Expression expr, LocalSymbolTable arg) { - if (expr instanceof BinaryExpr) { - BinaryExpr binExpr = (BinaryExpr) expr; - switch (binExpr.getOperator()) { - case notEquals: - if (binExpr.getRight() instanceof NullLiteralExpr) { - binExpr.getLeft().accept(this, arg); - return; - } - break; - default: - break; - } - } - expr.accept(this, arg); - } - - - public void visit(WhileStmt n, LocalSymbolTable arg) { - printer.print("while ("); - n.getCondition().accept(this, arg); - printer.print(") "); - n.getBody().accept(this, arg); - } - - public void visit(ContinueStmt n, LocalSymbolTable arg) { - // Not supporting the general Java continue semantics. - // Instead, making the generated code more readable for the - // case at hand. - if (n.getId() != null) { - printer.print(cppTypes.continueMacro()); - printer.print("("); - printer.print(n.getId()); - printer.print(")"); - if (forLoopsWithCondition.contains(n.getId())) { - throw new IllegalStateException( - "Continue attempted with a loop that has a condition. " - + className + " " + n.getId()); - } - } else { - printer.print("continue"); - } - printer.print(";"); - } - - public void visit(DoStmt n, LocalSymbolTable arg) { - printer.print("do "); - n.getBody().accept(this, arg); - printer.print(" while ("); - n.getCondition().accept(this, arg); - printer.print(");"); - } - - public void visit(ForeachStmt n, LocalSymbolTable arg) { - printer.print("for ("); - n.getVariable().accept(this, arg); - printer.print(" : "); - n.getIterable().accept(this, arg); - printer.print(") "); - n.getBody().accept(this, arg); - } - - public void visit(ForStmt n, LocalSymbolTable arg) { - printer.print("for ("); - if (n.getInit() != null) { - for (Iterator i = n.getInit().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(";"); - if (n.getCompare() != null) { - printer.print(" "); - n.getCompare().accept(this, arg); - } - printer.print(";"); - if (n.getUpdate() != null) { - printer.print(" "); - for (Iterator i = n.getUpdate().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(") "); - n.getBody().accept(this, arg); - } - - public void visit(ThrowStmt n, LocalSymbolTable arg) { - printer.print("throw "); - n.getExpr().accept(this, arg); - printer.print(";"); - } - - public void visit(SynchronizedStmt n, LocalSymbolTable arg) { - printer.print("synchronized ("); - n.getExpr().accept(this, arg); - printer.print(") "); - n.getBlock().accept(this, arg); - } - - public void visit(TryStmt n, LocalSymbolTable arg) { - printer.print("try "); - n.getTryBlock().accept(this, arg); - if (n.getCatchs() != null) { - for (CatchClause c : n.getCatchs()) { - c.accept(this, arg); - } - } - if (n.getFinallyBlock() != null) { - printer.print(" finally "); - n.getFinallyBlock().accept(this, arg); - } - } - - public void visit(CatchClause n, LocalSymbolTable arg) { - printer.print(" catch ("); - n.getExcept().accept(this, arg); - printer.print(") "); - n.getCatchBlock().accept(this, arg); - - } - - public void visit(AnnotationDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - currentAnnotations = n.getAnnotations(); - // if (annotations != null) { - // for (AnnotationExpr a : annotations) { - // a.accept(this, arg); - // printer.printLn(); - // } - // } - printModifiers(n.getModifiers()); - - printer.print("@interface "); - printer.print(n.getName()); - currentAnnotations = null; - printer.printLn(" {"); - printer.indent(); - if (n.getMembers() != null) { - printMembers(n.getMembers(), arg); - } - printer.unindent(); - printer.print("}"); - } - - public void visit(AnnotationMemberDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - currentAnnotations = n.getAnnotations(); - // if (annotations != null) { - // for (AnnotationExpr a : annotations) { - // a.accept(this, arg); - // printer.printLn(); - // } - // } - printModifiers(n.getModifiers()); - - n.getType().accept(this, arg); - printer.print(" "); - printer.print(n.getName()); - currentAnnotations = null; - printer.print("()"); - if (n.getDefaultValue() != null) { - printer.print(" default "); - n.getDefaultValue().accept(this, arg); - } - printer.print(";"); - } - - public void visit(MarkerAnnotationExpr n, LocalSymbolTable arg) { - printer.print("@"); - n.getName().accept(this, arg); - } - - public void visit(SingleMemberAnnotationExpr n, LocalSymbolTable arg) { - printer.print("@"); - n.getName().accept(this, arg); - printer.print("("); - n.getMemberValue().accept(this, arg); - printer.print(")"); - } - - public void visit(NormalAnnotationExpr n, LocalSymbolTable arg) { - printer.print("@"); - n.getName().accept(this, arg); - printer.print("("); - if (n.getPairs() != null) { - for (Iterator i = n.getPairs().iterator(); i.hasNext();) { - MemberValuePair m = i.next(); - m.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - } - - public void visit(MemberValuePair n, LocalSymbolTable arg) { - printer.print(n.getName()); - printer.print(" = "); - n.getValue().accept(this, arg); - } - - public void visit(LineComment n, LocalSymbolTable arg) { - printer.print("//"); - printer.printLn(n.getContent()); - } - - public void visit(BlockComment n, LocalSymbolTable arg) { - printer.print("/*"); - printer.print(n.getContent()); - printer.printLn("*/"); - } - - public void setLabels(Set labels) { - this.labels = labels; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java b/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java deleted file mode 100644 index 475a793b..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java +++ /dev/null @@ -1,72 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.Reader; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class GkAtomParser { - - /* Please note we aren't looking for the following Atom definitions: - PseudoElementAtom or NonInheritingAnonBoxAtom or InheritingAnonBoxAtom */ - private static final Pattern ATOM = Pattern.compile("^Atom\\(\"([^,]+)\",\\s*\"([^\"]*)\"\\).*$"); - - private final BufferedReader reader; - - public GkAtomParser(Reader reader) { - this.reader = new BufferedReader(reader); - } - - public Map parse() throws IOException { - Map map = new HashMap(); - String line; - while((line = reader.readLine()) != null) { - Matcher m = ATOM.matcher(line.trim()); - if (m.matches()) { - map.put(m.group(2), m.group(1)); - } - } - return map; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java b/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java deleted file mode 100644 index 6161746d..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java +++ /dev/null @@ -1,291 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.util.List; - -import japa.parser.ast.body.FieldDeclaration; -import japa.parser.ast.body.MethodDeclaration; -import japa.parser.ast.body.ModifierSet; -import japa.parser.ast.body.Parameter; -import japa.parser.ast.body.VariableDeclarator; -import japa.parser.ast.stmt.BlockStmt; -import japa.parser.ast.type.PrimitiveType; -import japa.parser.ast.type.ReferenceType; - -public class HVisitor extends CppVisitor { - - private enum Visibility { - NONE, PRIVATE, PUBLIC, PROTECTED, - } - - private Visibility previousVisibility = Visibility.NONE; - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printMethodNamespace() - */ - @Override protected void printMethodNamespace() { - } - - public HVisitor(CppTypes cppTypes, SymbolTable symbolTable) { - super(cppTypes, symbolTable); - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#startClassDeclaration() - */ - @Override protected void startClassDeclaration() { - printer.print("#ifndef "); - printer.print(className); - printer.printLn("_h"); - printer.print("#define "); - printer.print(className); - printer.printLn("_h"); - - printer.printLn(); - - String[] incs = cppTypes.boilerplateIncludes(javaClassName); - for (int i = 0; i < incs.length; i++) { - String inc = incs[i]; - if (className.equals(inc)) { - continue; - } - printer.print("#include \""); - printer.print(inc); - printer.printLn(".h\""); - } - - printer.printLn(); - - String[] forwDecls = cppTypes.boilerplateForwardDeclarations(); - for (int i = 0; i < forwDecls.length; i++) { - String decl = forwDecls[i]; - printer.print("class "); - printer.print(decl); - printer.printLn(";"); - } - - printer.printLn(); - - for (int i = 0; i < Main.H_LIST.length; i++) { - String klazz = Main.H_LIST[i]; - if (!(klazz.equals(javaClassName) || klazz.equals("StackNode"))) { - printer.print("class "); - printer.print(cppTypes.classPrefix()); - printer.print(klazz); - printer.printLn(";"); - } - } - - printer.printLn(); - - String[] otherDecls = cppTypes.boilerplateDeclarations(javaClassName); - for (int i = 0; i < otherDecls.length; i++) { - String decl = otherDecls[i]; - printer.printLn(decl); - } - - printer.printLn(); - - printer.print("class "); - printer.print(className); - if ("StateSnapshot".equals(javaClassName) || "TreeBuilder".equals(javaClassName)) { - printer.print(" : public "); - printer.print(cppTypes.treeBuilderStateInterface()); - } - printer.printLn(); - printer.printLn("{"); - printer.indent(); - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#endClassDeclaration() - */ - @Override protected void endClassDeclaration() { - printModifiers(ModifierSet.PUBLIC | ModifierSet.STATIC); - printer.printLn("void initializeStatics();"); - printModifiers(ModifierSet.PUBLIC | ModifierSet.STATIC); - printer.printLn("void releaseStatics();"); - - printer.unindent(); - - if (cppTypes.hasSupplement(javaClassName)) { - printer.printLn(); - printer.print("#include \""); - printer.print(className); - printer.printLn("HSupplement.h\""); - } - - printer.printLn("};"); - printer.printLn(); - printer.print("#endif"); - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printModifiers(int) - */ - @Override protected void printModifiers(int modifiers) { - if (ModifierSet.isPrivate(modifiers)) { - if (previousVisibility != Visibility.PRIVATE) { - printer.unindent(); - printer.printLn("private:"); - printer.indent(); - previousVisibility = Visibility.PRIVATE; - } - } else if (ModifierSet.isProtected(modifiers)) { - if (previousVisibility != Visibility.PROTECTED) { - printer.unindent(); - printer.printLn("protected:"); - printer.indent(); - previousVisibility = Visibility.PROTECTED; - } - } else { - if (previousVisibility != Visibility.PUBLIC) { - printer.unindent(); - printer.printLn("public:"); - printer.indent(); - previousVisibility = Visibility.PUBLIC; - } - } - if (inline()) { - printer.print("inline "); - } - if (virtual()) { - printer.print("virtual "); - } - if (ModifierSet.isStatic(modifiers)) { - printer.print("static "); - } - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#fieldDeclaration(japa.parser.ast.body.FieldDeclaration, java.lang.LocalSymbolTable) - */ - @Override protected void fieldDeclaration(FieldDeclaration n, LocalSymbolTable arg) { - inField = true; - int modifiers = n.getModifiers(); - List variables = n.getVariables(); - VariableDeclarator declarator = variables.get(0); - if (ModifierSet.isStatic(modifiers) && ModifierSet.isFinal(modifiers) - && n.getType() instanceof PrimitiveType) { - PrimitiveType type = (PrimitiveType) n.getType(); - if (type.getType() != PrimitiveType.Primitive.Int) { - throw new IllegalStateException( - "Only int constant #defines supported."); - } - if (variables.size() != 1) { - throw new IllegalStateException( - "More than one variable declared by one declarator."); - } - printModifiers(modifiers); - printer.print("const "); - n.getType().accept(this, arg); - printer.print(" "); - declarator.getId().accept(this, arg); - printer.print(" = "); - declarator.getInit().accept(this, arg); - printer.printLn(";"); - printer.printLn(); - symbolTable.addPrimitiveConstant(javaClassName, declarator.getId().toString()); - } else { - if (n.getType() instanceof ReferenceType) { - ReferenceType rt = (ReferenceType) n.getType(); - currentArrayCount = rt.getArrayCount(); - if (currentArrayCount > 0 - && (rt.getType() instanceof PrimitiveType) && declarator.getInit() != null) { - if (!ModifierSet.isStatic(modifiers)) { - throw new IllegalStateException( - "Non-static array case not supported here." + declarator); - } - if (noLength()) { - inPrimitiveNoLengthFieldDeclarator = true; - } - } - } - printModifiers(modifiers); - inStatic = ModifierSet.isStatic(modifiers); - n.getType().accept(this, arg); - printer.print(" "); - if (ModifierSet.isStatic(modifiers)) { - if ("AttributeName".equals(n.getType().toString())) { - printer.print("ATTR_"); - } else if ("ElementName".equals(n.getType().toString())) { - printer.print("ELT_"); - } - } - declarator.getId().accept(this, arg); - printer.printLn(";"); - currentArrayCount = 0; - inStatic = false; - inPrimitiveNoLengthFieldDeclarator = false; - } - inField = false; - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printConstructorExplicit(java.util.List) - */ - @Override protected void printConstructorExplicit(List params) { - if (params != null && params.size() == 1) { - printer.print("explicit "); - } - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printConstructorBody(japa.parser.ast.stmt.BlockStmt, java.lang.LocalSymbolTable) - */ - @Override protected void printConstructorBody(BlockStmt block, LocalSymbolTable arg) { - printer.printLn(";"); - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#visit(japa.parser.ast.body.MethodDeclaration, java.lang.LocalSymbolTable) - */ - @Override public void visit(MethodDeclaration n, LocalSymbolTable arg) { - arg = new LocalSymbolTable(javaClassName, symbolTable); - printMethodDeclaration(n, arg); - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#inHeader() - */ - @Override protected boolean inHeader() { - return true; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java b/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java deleted file mode 100644 index f27d465a..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java +++ /dev/null @@ -1,84 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import japa.parser.ast.stmt.BreakStmt; -import japa.parser.ast.stmt.ContinueStmt; -import japa.parser.ast.visitor.VoidVisitorAdapter; - -import java.util.HashSet; -import java.util.Set; - -public class LabelVisitor extends VoidVisitorAdapter { - - private final Set labels = new HashSet(); - - public LabelVisitor() { - } - - /** - * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.stmt.BreakStmt, java.lang.Object) - */ - @Override - public void visit(BreakStmt n, Object arg) { - String label = n.getId(); - if (label != null) { - labels.add(label + "_end"); - } - } - - /** - * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.stmt.ContinueStmt, java.lang.Object) - */ - @Override - public void visit(ContinueStmt n, Object arg) { - String label = n.getId(); - if (label != null) { - labels.add(label); - } - } - - /** - * Returns the labels. - * - * @return the labels - */ - public Set getLabels() { - return labels; - } -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java b/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java deleted file mode 100644 index e4030f43..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java +++ /dev/null @@ -1,75 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.Reader; - -public class LicenseExtractor { - - private final Reader reader; - - public LicenseExtractor(File file) throws IOException { - this.reader = new InputStreamReader(new FileInputStream(file), "utf-8"); - } - - public String extract() throws IOException { - boolean prevWasAsterisk = false; - StringBuilder sb = new StringBuilder(); - int c; - while ((c = reader.read()) != -1) { - sb.append((char)c); - switch (c) { - case '*': - prevWasAsterisk = true; - continue; - case '/': - if (prevWasAsterisk) { - return sb.toString(); - } - default: - prevWasAsterisk = false; - continue; - } - } - return ""; - } -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java b/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java deleted file mode 100644 index a9375e88..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java +++ /dev/null @@ -1,89 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.util.HashMap; -import java.util.Map; - -public class LocalSymbolTable { - - private final Map locals = new HashMap(); - - private final String javaClassName; - - private final SymbolTable delegate; - - /** - * @param javaClassName - * @param delegate - */ - public LocalSymbolTable(String javaClassName, SymbolTable delegate) { - this.javaClassName = javaClassName; - this.delegate = delegate; - } - - public void putLocalType(String name, Type type) { - locals.put(name, type); - } - - /** - * @param klazz - * @param variable - * @return - * @see nu.validator.htmlparser.cpptranslate.SymbolTable#getFieldType(java.lang.String, java.lang.String) - */ - public Type getVariableType(String klazz, String variable) { - if (klazz == null) { - Type type = locals.get(variable); - if (type != null) { - return type; - } - } - return delegate.getFieldType(((klazz == null || "this".equals(klazz)) ? javaClassName : klazz), variable); - } - - /** - * @param klazz may be null or "this" - * @param method - * @return - * @see nu.validator.htmlparser.cpptranslate.SymbolTable#getMethodReturnType(java.lang.String, java.lang.String) - */ - public Type getMethodReturnType(String klazz, String method) { - return delegate.getMethodReturnType(((klazz == null || "this".equals(klazz)) ? javaClassName : klazz), method); - } -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/Main.java b/translator-src/nu/validator/htmlparser/cpptranslate/Main.java deleted file mode 100644 index 741b7419..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/Main.java +++ /dev/null @@ -1,145 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.UnsupportedEncodingException; - -import japa.parser.JavaParser; -import japa.parser.ParseException; -import japa.parser.ast.CompilationUnit; - -public class Main { - - static final String[] H_LIST = { - "AttributeName", - "ElementName", - "Tokenizer", - "TreeBuilder", - "MetaScanner", - "StackNode", - "UTF16Buffer", - "StateSnapshot", - "Portability", - }; - - private static final String[] CPP_LIST = { - "AttributeName", - "ElementName", - "Tokenizer", - "TreeBuilder", - "MetaScanner", - "StackNode", - "UTF16Buffer", - "StateSnapshot", - }; - - /** - * @param args - * @throws ParseException - * @throws IOException - */ - public static void main(String[] args) throws ParseException, IOException { - CppTypes cppTypes = new CppTypes(new File(args[2]), new File(args[3])); - SymbolTable symbolTable = new SymbolTable(); - - File javaDirectory = new File(args[0]); - File targetDirectory = new File(args[1]); - File cppDirectory = targetDirectory; - File javaCopyDirectory = new File(targetDirectory, "javasrc"); - - for (int i = 0; i < H_LIST.length; i++) { - parseFile(cppTypes, javaDirectory, cppDirectory, H_LIST[i], ".h", new HVisitor(cppTypes, symbolTable)); - copyFile(new File(javaDirectory, H_LIST[i] + ".java"), new File(javaCopyDirectory, H_LIST[i] + ".java")); - } - for (int i = 0; i < CPP_LIST.length; i++) { - parseFile(cppTypes, javaDirectory, cppDirectory, CPP_LIST[i], ".cpp", new CppVisitor(cppTypes, symbolTable)); - } - cppTypes.finished(); - } - - private static void copyFile(File input, File output) throws IOException { - if (input.getCanonicalFile().equals(output.getCanonicalFile())) { - return; // files are the same! - } - // This is horribly inefficient, but perf is not really much of a concern here. - FileInputStream in = new FileInputStream(input); - FileOutputStream out = new FileOutputStream(output); - int b; - while ((b = in.read()) != -1) { - out.write(b); - } - out.flush(); - out.close(); - in.close(); - } - - private static void parseFile(CppTypes cppTypes, File javaDirectory, - File cppDirectory, String className, String fne, CppVisitor visitor) - throws FileNotFoundException, UnsupportedEncodingException, - IOException { - File file = null; - try { - file = new File(javaDirectory, className + ".java"); - String license = new LicenseExtractor(file).extract(); - CompilationUnit cu = JavaParser.parse(new NoCppInputStream( - new CppOnlyInputStream(new FileInputStream(file))), "utf-8"); - LabelVisitor labelVisitor = new LabelVisitor(); - cu.accept(labelVisitor, null); - visitor.setLabels(labelVisitor.getLabels()); - cu.accept(visitor, null); - FileOutputStream out = new FileOutputStream(new File(cppDirectory, - cppTypes.classPrefix() + className + fne)); - OutputStreamWriter w = new OutputStreamWriter(out, "utf-8"); - w.write(license); - w.write("\n\n/*\n * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.\n * Please edit " - + className + ".java instead and regenerate.\n */\n\n"); - w.write(visitor.getSource()); - w.close(); - } catch (ParseException e) { - System.err.println(file); - e.printStackTrace(); - } - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java b/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java deleted file mode 100644 index 86f9ae7f..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java +++ /dev/null @@ -1,86 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.IOException; -import java.io.InputStream; - -public class NoCppInputStream extends InputStream { - - private final static char[] START = "[NOCPP[".toCharArray(); - - private final static char[] END = "]NOCPP]".toCharArray(); - - private int state; - - private final InputStream delegate; - - - - /** - * @param delegate - */ - public NoCppInputStream(InputStream delegate) { - this.delegate = delegate; - this.state = 0; - } - - @Override public int read() throws IOException { - int c; - if (state == START.length) { - int endState = 0; - while (endState != END.length) { - c = delegate.read(); - if (END[endState] == c) { - endState++; - } else { - endState = 0; - } - } - state = 0; - } - c = delegate.read(); - if (START[state] == c) { - state++; - } else { - state = 0; - } - return c; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java b/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java deleted file mode 100644 index 305f516a..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java +++ /dev/null @@ -1,70 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.Reader; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class StringLiteralParser { - - private static final Pattern STRING_DECL = Pattern.compile("^.*\\(([^ ]+) = new nsString\\(\\)\\)->Assign\\(NS_LITERAL_STRING\\(\"([^\"]*)\"\\)\\);.*$"); - - private final BufferedReader reader; - - public StringLiteralParser(Reader reader) { - this.reader = new BufferedReader(reader); - } - - public Map parse() throws IOException { - Map map = new HashMap(); - String line; - while((line = reader.readLine()) != null) { - Matcher m = STRING_DECL.matcher(line); - if (m.matches()) { - map.put(m.group(2), m.group(1)); - } - } - return map; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java b/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java deleted file mode 100644 index e24247f7..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java +++ /dev/null @@ -1,73 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -public class StringPair { - - /** - * @param first - * @param second - */ - public StringPair(String first, String second) { - this.first = first; - this.second = second; - } - - private final String first; - - private final String second; - - /** - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override public boolean equals(Object o) { - if (o instanceof StringPair) { - StringPair other = (StringPair) o; - return first.equals(other.first) && second.equals(other.second); - } - return false; - } - - /** - * @see java.lang.Object#hashCode() - */ - @Override public int hashCode() { - return first.hashCode() ^ second.hashCode(); - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java b/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java deleted file mode 100644 index 09ba5a00..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java +++ /dev/null @@ -1,93 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -public class SymbolTable { - - private final Set primitiveConstants = new HashSet(); - - private final Map fields = new HashMap(); - - private final Map methodReturns = new HashMap(); - - /** - * This is a sad hack to work around the fact the there's no real symbol - * table yet. - * - * @param field - * @return - */ - public boolean isAttributeOrElementName(String klazz, String field) { - if (isPrimitiveConstant(klazz, field)) { - return false; - } - return !("ATTRIBUTE_HASHES".equals(field) - || "ATTRIBUTE_NAMES".equals(field) - || "ELEMENT_HASHES".equals(field) - || "ELEMENT_NAMES".equals(field) || "ALL_NO_NS".equals(field)); - } - - public void addPrimitiveConstant(String klazz, String field) { - primitiveConstants.add(new StringPair(klazz, field)); - } - - public void putFieldType(String klazz, String field, Type type) { - fields.put(new StringPair(klazz, field), type); - } - - public void putMethodReturnType(String klazz, String method, Type type) { - methodReturns.put(new StringPair(klazz, method), type); - } - - public boolean isPrimitiveConstant(String klazz, String field) { - return primitiveConstants.contains(new StringPair(klazz, field)); - } - - public Type getFieldType(String klazz, String field) { - return fields.get(new StringPair(klazz, field)); - } - - public Type getMethodReturnType(String klazz, String method) { - return methodReturns.get(new StringPair(klazz, method)); - } -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java b/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java deleted file mode 100644 index 00f7c574..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java +++ /dev/null @@ -1,71 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import japa.parser.ast.body.ClassOrInterfaceDeclaration; -import japa.parser.ast.body.FieldDeclaration; -import japa.parser.ast.body.MethodDeclaration; - -public class SymbolTableVisitor extends AnnotationHelperVisitor { - - private String javaClassName; - - /** - * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.FieldDeclaration, java.lang.Object) - */ - @Override public void visit(FieldDeclaration n, SymbolTable arg) { - currentAnnotations = n.getAnnotations(); - arg.putFieldType(javaClassName, n.getVariables().get(0).getId().getName(), convertType(n.getType(), n.getModifiers())); - } - - /** - * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.MethodDeclaration, java.lang.Object) - */ - @Override public void visit(MethodDeclaration n, SymbolTable arg) { - currentAnnotations = n.getAnnotations(); - arg.putMethodReturnType(javaClassName, n.getName(), convertType(n.getType(), n.getModifiers())); - } - - /** - * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.ClassOrInterfaceDeclaration, java.lang.Object) - */ - @Override public void visit(ClassOrInterfaceDeclaration n, SymbolTable arg) { - javaClassName = n.getName(); - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java b/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java deleted file mode 100644 index 866db093..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java +++ /dev/null @@ -1,81 +0,0 @@ -package nu.validator.htmlparser.cpptranslate; - -import japa.parser.ast.expr.BinaryExpr; -import japa.parser.ast.expr.BinaryExpr.Operator; -import japa.parser.ast.expr.Expression; -import japa.parser.ast.expr.MethodCallExpr; -import japa.parser.ast.expr.NameExpr; -import japa.parser.ast.expr.NullLiteralExpr; -import japa.parser.ast.stmt.BlockStmt; -import japa.parser.ast.stmt.ExpressionStmt; -import japa.parser.ast.stmt.Statement; - -import java.util.List; - -public class TranslatorUtils { - public static boolean isErrorOnlyBlock(Statement elseStmt, boolean supportErrorReporting) { - if (supportErrorReporting) { - return false; - } - if (elseStmt instanceof BlockStmt) { - BlockStmt block = (BlockStmt) elseStmt; - List statements = block.getStmts(); - if (statements == null) { - return false; - } - if (statements.size() != 1) { - return false; - } - Statement statement = statements.get(0); - if (statement instanceof ExpressionStmt) { - ExpressionStmt exprStmt = (ExpressionStmt) statement; - Expression expr = exprStmt.getExpression(); - if (expr instanceof MethodCallExpr) { - MethodCallExpr call = (MethodCallExpr) expr; - if (call.getName().startsWith("err")) { - return true; - } - } - } - } - return false; - } - - public static boolean isErrorHandlerIf(Expression condition, boolean supportErrorReporting) { - if (supportErrorReporting) { - return false; - } - while (condition instanceof BinaryExpr) { - BinaryExpr binex = (BinaryExpr) condition; - condition = binex.getLeft(); - if (condition instanceof NameExpr) { - NameExpr name = (NameExpr) condition; - if ("errorHandler".equals(name.getName())) { - return true; - } - } - } - return false; - } - - public static boolean isDocumentModeHandlerNullCheck(Expression condition) { - if (condition instanceof BinaryExpr) { - BinaryExpr binex = (BinaryExpr) condition; - if (binex.getOperator() != Operator.notEquals) { - return false; - } - if (!(binex.getRight() instanceof NullLiteralExpr)) { - return false; - } - Expression left = binex.getLeft(); - if (left instanceof NameExpr) { - NameExpr name = (NameExpr) left; - if ("documentModeHandler".equals(name.getName())) { - return true; - } - } - } - return false; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/Type.java b/translator-src/nu/validator/htmlparser/cpptranslate/Type.java deleted file mode 100644 index 783a3bbd..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/Type.java +++ /dev/null @@ -1,99 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -public class Type { - - /** - * @param type - * @param arrayCount - * @param noLength - * @param modifiers - */ - public Type(String type, int arrayCount, boolean noLength, int modifiers) { - this.type = type; - this.arrayCount = arrayCount; - this.noLength = noLength; - this.modifiers = modifiers; - } - - private final String type; - - private final int arrayCount; - - private final boolean noLength; - - private final int modifiers; - - /** - * Returns the type. - * - * @return the type - */ - public String getType() { - return type; - } - - /** - * Returns the arrayCount. - * - * @return the arrayCount - */ - public int getArrayCount() { - return arrayCount; - } - - /** - * Returns the noLength. - * - * @return the noLength - */ - public boolean isNoLength() { - return noLength; - } - - /** - * Returns the modifiers. - * - * @return the modifiers - */ - public int getModifiers() { - return modifiers; - } - -} diff --git a/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java b/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java deleted file mode 100644 index ea9d47b1..00000000 --- a/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java +++ /dev/null @@ -1,579 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.generator; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.Map; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import nu.validator.htmlparser.cpptranslate.CppTypes; - -public class GenerateNamedCharactersCpp { - - /** - * The license for the output of this program except for data files. - */ - private static final String OUTPUT_LICENSE = "/*\n" - + " * Copyright (c) 2008-2010 Mozilla Foundation\n" - + " *\n" - + " * Permission is hereby granted, free of charge, to any person obtaining a \n" - + " * copy of this software and associated documentation files (the \"Software\"), \n" - + " * to deal in the Software without restriction, including without limitation \n" - + " * the rights to use, copy, modify, merge, publish, distribute, sublicense, \n" - + " * and/or sell copies of the Software, and to permit persons to whom the \n" - + " * Software is furnished to do so, subject to the following conditions:\n" - + " *\n" - + " * The above copyright notice and this permission notice shall be included in \n" - + " * all copies or substantial portions of the Software.\n" - + " *\n" - + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR \n" - + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, \n" - + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL \n" - + " * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER \n" - + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING \n" - + " * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER \n" - + " * DEALINGS IN THE SOFTWARE.\n" + " */\n\n"; - - /** - * The license for the generated data files. - */ - private static final String DATA_LICENSE = "/*\n" - + " * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera \n" - + " * Software ASA.\n" - + " * \n" - + " * You are granted a license to use, reproduce and create derivative works of \n" - + " * this document.\n" + " */\n\n"; - - private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10); - - private static final Pattern LINE_PATTERN = Pattern.compile(" ([^<]*) U\\+(\\S*) (?:U\\+(\\S*) )?"); - - private static String toHexString(int c) { - String hexString = Integer.toHexString(c); - switch (hexString.length()) { - case 1: - return "0x000" + hexString; - case 2: - return "0x00" + hexString; - case 3: - return "0x0" + hexString; - case 4: - return "0x" + hexString; - default: - throw new RuntimeException("Unreachable."); - } - } - - /** - * @param args - * @throws IOException - */ - public static void main(String[] args) throws IOException { - TreeMap entities = new TreeMap(); - BufferedReader reader = new BufferedReader(new InputStreamReader( - new FileInputStream(args[0]), "utf-8")); - String line; - while ((line = reader.readLine()) != null) { - Matcher m = LINE_PATTERN.matcher(line); - while (m.find()) { - String value; - if (m.group(3) != null) { - // two BMP chars - int firstIntVal = Integer.parseInt(m.group(2), 16); - int secondIntVal = Integer.parseInt(m.group(3), 16); - value = ("" + (char)firstIntVal) + (char)secondIntVal; - } else { - // one code point - int intVal = Integer.parseInt(m.group(2), 16); - if (intVal <= 0xFFFF) { - value = "" + (char)intVal; - } else { - int high = (LEAD_OFFSET + (intVal >> 10)); - int low = (0xDC00 + (intVal & 0x3FF)); - value = ("" + (char)high) + (char)low; - } - } - entities.put(m.group(1), value); - } - } - - CppTypes cppTypes = new CppTypes(null, null); - File targetDirectory = new File(args[1]); - - generateH(targetDirectory, cppTypes, entities); - generateInclude(targetDirectory, cppTypes, entities); - generateCpp(targetDirectory, cppTypes, entities); - generateAccelH(targetDirectory, cppTypes, entities); - generateAccelCpp(targetDirectory, cppTypes, entities); - } - - private static void generateAccelCpp(File targetDirectory, - CppTypes cppTypes, TreeMap entities) throws IOException { - String includeFile = cppTypes.classPrefix() - + "NamedCharactersInclude.h"; - File cppFile = new File(targetDirectory, cppTypes.classPrefix() - + "NamedCharactersAccel.cpp"); - Writer out = new OutputStreamWriter(new FileOutputStream(cppFile), - "utf-8"); - - out.write(DATA_LICENSE); - out.write('\n'); - out.write("#include \"" + cppTypes.classPrefix() - + "NamedCharactersAccel.h\"\n"); - out.write("\n"); - - // Java initializes arrays to zero. Zero is our magic value for no hilo - // value. - int[][] hiLoTable = new int['z' + 1]['Z' - 'A' + 1 + 'z' - 'a' + 1]; - - String firstName = entities.entrySet().iterator().next().getKey(); - int firstKey = charToIndex(firstName.charAt(0)); - int secondKey = firstName.charAt(1); - int row = 0; - int lo = 0; - - for (Map.Entry entity : entities.entrySet()) { - String name = entity.getKey(); - int newFirst = charToIndex(name.charAt(0)); - int newSecond = name.charAt(1); - assert !(newFirst == 0 && newSecond == 0) : "Not prepared for name starting with AA"; - if (firstKey != newFirst || secondKey != newSecond) { - hiLoTable[secondKey][firstKey] = ((row - 1) << 16) | lo; - lo = row; - firstKey = newFirst; - secondKey = newSecond; - } - row++; - } - - hiLoTable[secondKey][firstKey] = ((entities.size() - 1) << 16) | lo; - - for (int i = 0; i < hiLoTable.length; i++) { - if (!allZero(hiLoTable[i])) { - out.write("static " + cppTypes.intType() + " const HILO_ACCEL_" - + i + "[] = {\n"); - for (int j = 0; j < hiLoTable[i].length; j++) { - if (j != 0) { - out.write(", "); - } - out.write("" + hiLoTable[i][j]); - } - out.write("\n};\n\n"); - } - } - - out.write("const int32_t* const " + cppTypes.classPrefix() - + "NamedCharactersAccel::HILO_ACCEL[] = {\n"); - for (int i = 0; i < hiLoTable.length; i++) { - if (i != 0) { - out.write(",\n"); - } - if (allZero(hiLoTable[i])) { - out.write(" 0"); - } else { - out.write(" HILO_ACCEL_" + i); - } - } - out.write("\n};\n\n"); - - out.flush(); - out.close(); - } - - private static void generateAccelH(File targetDirectory, CppTypes cppTypes, - TreeMap entities) throws IOException { - File hFile = new File(targetDirectory, cppTypes.classPrefix() - + "NamedCharactersAccel.h"); - Writer out = new OutputStreamWriter(new FileOutputStream(hFile), - "utf-8"); - out.write(DATA_LICENSE); - out.write("#ifndef " + cppTypes.classPrefix() + "NamedCharactersAccel_h\n"); - out.write("#define " + cppTypes.classPrefix() + "NamedCharactersAccel_h\n"); - out.write('\n'); - - String[] includes = cppTypes.namedCharactersIncludes(); - for (int i = 0; i < includes.length; i++) { - String include = includes[i]; - out.write("#include \"" + include + ".h\"\n"); - } - - out.write('\n'); - - out.write("class " + cppTypes.classPrefix() + "NamedCharactersAccel\n"); - out.write("{\n"); - out.write(" public:\n"); - out.write(" static const " + cppTypes.intType() - + "* const HILO_ACCEL[];\n"); - out.write("};\n"); - - out.write("\n#endif // " + cppTypes.classPrefix() - + "NamedCharactersAccel_h\n"); - out.flush(); - out.close(); - } - - private static void generateH(File targetDirectory, CppTypes cppTypes, - Map entities) throws IOException { - File hFile = new File(targetDirectory, cppTypes.classPrefix() - + "NamedCharacters.h"); - Writer out = new OutputStreamWriter(new FileOutputStream(hFile), - "utf-8"); - out.write(OUTPUT_LICENSE); - out.write("#ifndef " + cppTypes.classPrefix() + "NamedCharacters_h\n"); - out.write("#define " + cppTypes.classPrefix() + "NamedCharacters_h\n"); - out.write('\n'); - - String[] includes = cppTypes.namedCharactersIncludes(); - for (int i = 0; i < includes.length; i++) { - String include = includes[i]; - out.write("#include \"" + include + ".h\"\n"); - } - - out.write("\nstruct "); - out.write(cppTypes.characterNameTypeDeclaration()); - out.write(" {\n "); - out.write(cppTypes.unsignedShortType()); - out.write(" nameStart;\n "); - out.write(cppTypes.unsignedShortType()); - out.write(" nameLen;\n #ifdef DEBUG\n "); - out.write(cppTypes.intType()); - out.write(" n;\n #endif\n "); - out.write(cppTypes.intType()); - out.write(" length() const;\n "); - out.write(cppTypes.charType()); - out.write(" charAt("); - out.write(cppTypes.intType()); - out.write(" index) const;\n};\n\n"); - - out.write("class " + cppTypes.classPrefix() + "NamedCharacters\n"); - out.write("{\n"); - out.write(" public:\n"); - out.write(" static const " + cppTypes.characterNameTypeDeclaration() + " NAMES[];\n"); - out.write(" static const " + cppTypes.charType() + " VALUES[][2];\n"); - out.write(" static " + cppTypes.charType() + "** WINDOWS_1252;\n"); - out.write(" static void initializeStatics();\n"); - out.write(" static void releaseStatics();\n"); - out.write("};\n"); - - out.write("\n#endif // " + cppTypes.classPrefix() - + "NamedCharacters_h\n"); - out.flush(); - out.close(); - } - - private static void generateInclude(File targetDirectory, - CppTypes cppTypes, Map entities) throws IOException { - File includeFile = new File(targetDirectory, cppTypes.classPrefix() - + "NamedCharactersInclude.h"); - Writer out = new OutputStreamWriter(new FileOutputStream(includeFile), - "utf-8"); - - out.write(DATA_LICENSE); - out.write("/* Data generated from the table of named character references found at\n"); - out.write(" *\n"); - out.write(" * http://www.whatwg.org/specs/web-apps/current-work/multipage/named-character-references.html#named-character-references\n"); - out.write(" *\n"); - out.write(" * Files that #include this file must #define NAMED_CHARACTER_REFERENCE as a\n"); - out.write(" * macro of four parameters:\n"); - out.write(" *\n"); - out.write(" * 1. a unique integer N identifying the Nth [0,1,..] macro expansion in this file,\n"); - out.write(" * 2. a comma-separated sequence of characters comprising the character name,\n"); - out.write(" * without the first two letters or 0 if the sequence would be empty. \n"); - out.write(" * See Tokenizer.java.\n"); - out.write(" * 3. the length of this sequence of characters,\n"); - out.write(" * 4. placeholder flag (0 if argument #is not a placeholder and 1 if it is),\n"); - out.write(" * 5. a comma-separated sequence of char16_t literals corresponding\n"); - out.write(" * to the code-point(s) of the named character.\n"); - out.write(" *\n"); - out.write(" * The macro expansion doesn't have to refer to all or any of these parameters,\n"); - out.write(" * but common sense dictates that it should involve at least one of them.\n"); - out.write(" */\n"); - out.write("\n"); - out.write("// This #define allows the NAMED_CHARACTER_REFERENCE macro to accept comma-\n"); - out.write("// separated sequences as single macro arguments. Using commas directly would\n"); - out.write("// split the sequence into multiple macro arguments.\n"); - out.write("#define _ ,\n"); - out.write("\n"); - - int i = 0; - for (Map.Entry entity : entities.entrySet()) { - out.write("NAMED_CHARACTER_REFERENCE(" + i++ + ", "); - String name = entity.getKey(); - writeNameInitializer(out, name, " _ "); - out.write(", " + (name.length() - 2) + ", "); - out.write((name.length() == 2 ? "1" : "0") + ", "); - writeValueInitializer(out, entity.getValue(), " _ "); - out.write(")\n"); - } - - out.write("\n"); - out.write("#undef _\n"); - - out.flush(); - out.close(); - } - - private static void writeNameInitializer(Writer out, - String name, String separator) - throws IOException { - out.write("/* " + name.charAt(0) + " " + name.charAt(1) + " */ "); - if (name.length() == 2) { - out.write("0"); - } else { - for (int i = 2; i < name.length(); i++) { - out.write("'" + name.charAt(i) + "'"); - if (i < name.length() - 1) - out.write(separator); - } - } - } - - private static void writeValueInitializer(Writer out, - String value, String separator) - throws IOException { - if (value.length() == 1) { - out.write(toHexString(value.charAt(0))); - out.write(separator); - out.write("0"); - } else { - out.write(toHexString(value.charAt(0))); - out.write(separator); - out.write(toHexString(value.charAt(1))); - } - } - - private static void defineMacroAndInclude(Writer out, String expansion, - String includeFile) throws IOException { - out.write("#define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n" - + expansion + "\n"); - out.write("#include \"" + includeFile + "\"\n"); - out.write("#undef NAMED_CHARACTER_REFERENCE\n"); - } - - private static void defineMacroAndInclude(Writer out, String expansion, - String debugExpansion, String includeFile) throws IOException { - out.write("#ifdef DEBUG\n"); - out.write(" #define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n" - + debugExpansion + "\n"); - out.write("#else\n"); - out.write(" #define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n" - + expansion + "\n"); - out.write("#endif\n"); - out.write("#include \"" + includeFile + "\"\n"); - out.write("#undef NAMED_CHARACTER_REFERENCE\n"); - } - - private static void writeStaticMemberDeclaration(Writer out, - CppTypes cppTypes, String type, String name) throws IOException { - out.write(type + " " + cppTypes.classPrefix() + "NamedCharacters::" - + name + ";\n"); - } - - private static int charToIndex(char c) { - if (c >= 'a' && c <= 'z') { - return c - 'a' + 26; - } else if (c >= 'A' && c <= 'Z') { - return c - 'A'; - } - throw new IllegalArgumentException("Bad char in named character name: " - + c); - } - - private static boolean allZero(int[] arr) { - for (int i = 0; i < arr.length; i++) { - if (arr[i] != 0) { - return false; - } - } - return true; - } - - private static void generateCpp(File targetDirectory, CppTypes cppTypes, - Map entities) throws IOException { - String includeFile = cppTypes.classPrefix() - + "NamedCharactersInclude.h"; - File cppFile = new File(targetDirectory, cppTypes.classPrefix() - + "NamedCharacters.cpp"); - Writer out = new OutputStreamWriter(new FileOutputStream(cppFile), - "utf-8"); - - out.write(OUTPUT_LICENSE); - out.write("#define " + cppTypes.classPrefix() - + "NamedCharacters_cpp_\n"); - - String[] includes = cppTypes.namedCharactersIncludes(); - for (int i = 0; i < includes.length; i++) { - String include = includes[i]; - out.write("#include \"" + include + ".h\"\n"); - } - - out.write('\n'); - out.write("#include \"" + cppTypes.classPrefix() - + "NamedCharacters.h\"\n"); - out.write("\n"); - - out.write("const " + cppTypes.charType() + " " + cppTypes.classPrefix() - + "NamedCharacters::VALUES[][2] = {\n"); - defineMacroAndInclude(out, "{ VALUE },", includeFile); - // The useless terminator entry makes the above macro simpler with - // compilers that whine about a comma after the last item - out.write("{0, 0} };\n\n"); - - String staticMemberType = cppTypes.charType() + "**"; - writeStaticMemberDeclaration(out, cppTypes, staticMemberType, - "WINDOWS_1252"); - - out.write("static " + cppTypes.charType() - + " const WINDOWS_1252_DATA[] = {\n"); - out.write(" 0x20AC,\n"); - out.write(" 0x0081,\n"); - out.write(" 0x201A,\n"); - out.write(" 0x0192,\n"); - out.write(" 0x201E,\n"); - out.write(" 0x2026,\n"); - out.write(" 0x2020,\n"); - out.write(" 0x2021,\n"); - out.write(" 0x02C6,\n"); - out.write(" 0x2030,\n"); - out.write(" 0x0160,\n"); - out.write(" 0x2039,\n"); - out.write(" 0x0152,\n"); - out.write(" 0x008D,\n"); - out.write(" 0x017D,\n"); - out.write(" 0x008F,\n"); - out.write(" 0x0090,\n"); - out.write(" 0x2018,\n"); - out.write(" 0x2019,\n"); - out.write(" 0x201C,\n"); - out.write(" 0x201D,\n"); - out.write(" 0x2022,\n"); - out.write(" 0x2013,\n"); - out.write(" 0x2014,\n"); - out.write(" 0x02DC,\n"); - out.write(" 0x2122,\n"); - out.write(" 0x0161,\n"); - out.write(" 0x203A,\n"); - out.write(" 0x0153,\n"); - out.write(" 0x009D,\n"); - out.write(" 0x017E,\n"); - out.write(" 0x0178\n"); - out.write("};\n\n"); - - out.write("/**\n"); - out.write(" * To avoid having lots of pointers in the |charData| array, below,\n"); - out.write(" * which would cause us to have to do lots of relocations at library\n"); - out.write(" * load time, store all the string data for the names in one big array.\n"); - out.write(" * Then use tricks with enums to help us build an array that contains\n"); - out.write(" * the positions of each within the big arrays.\n"); - out.write(" */\n\n"); - - out.write("static const " + cppTypes.byteType() + " ALL_NAMES[] = {\n"); - - defineMacroAndInclude(out, "CHARS ,", includeFile); - - out.write("};\n\n"); - - out.write("enum NamePositions {\n"); - out.write(" DUMMY_INITIAL_NAME_POSITION = 0,\n"); - - out.write("/* enums don't take up space, so generate _START and _END */\n"); - defineMacroAndInclude(out, - "NAME_##N##_DUMMY, /* automatically one higher than previous */ \\\n" - + "NAME_##N##_START = NAME_##N##_DUMMY - 1, \\\n" - + "NAME_##N##_END = NAME_##N##_START + LEN + FLAG,", - includeFile); - - out.write(" DUMMY_FINAL_NAME_VALUE\n"); - out.write("};\n\n"); - - String arrayLengthMacro = cppTypes.arrayLengthMacro(); - String staticAssert = cppTypes.staticAssert(); - if (staticAssert != null && arrayLengthMacro != null) { - out.write(staticAssert + "(" + arrayLengthMacro - + "(ALL_NAMES) < 0x10000, \"Start positions should fit in 16 bits\");\n\n"); - } - - out.write("const " + cppTypes.characterNameTypeDeclaration() + " " + cppTypes.classPrefix() - + "NamedCharacters::NAMES[] = {\n"); - defineMacroAndInclude(out, "{ NAME_##N##_START, LEN, },", "{ NAME_##N##_START, LEN, N },", includeFile); - out.write("};\n\n"); - - out.write(cppTypes.intType()); - out.write("\n"); - out.write(cppTypes.characterNameTypeDeclaration()); - out.write("::length() const\n{\n return nameLen;\n}\n\n"); - out.write(cppTypes.charType()); - out.write("\n"); - out.write(cppTypes.characterNameTypeDeclaration()); - out.write("::charAt("); - out.write("int32_t"); - out.write(" index) const\n{\n return static_cast<"); - out.write(cppTypes.charType()); - out.write("> (ALL_NAMES[nameStart + index]);\n}\n\n"); - - out.write("void\n"); - out.write(cppTypes.classPrefix() - + "NamedCharacters::initializeStatics()\n"); - out.write("{\n"); - out.write(" WINDOWS_1252 = new " + cppTypes.charType() + "*[32];\n"); - out.write(" for (" + cppTypes.intType() + " i = 0; i < 32; ++i) {\n"); - out.write(" WINDOWS_1252[i] = (" + cppTypes.charType() - + "*)&(WINDOWS_1252_DATA[i]);\n"); - out.write(" }\n"); - out.write("}\n"); - out.write("\n"); - - out.write("void\n"); - out.write(cppTypes.classPrefix() - + "NamedCharacters::releaseStatics()\n"); - out.write("{\n"); - out.write(" delete[] WINDOWS_1252;\n"); - out.write("}\n"); - out.flush(); - out.close(); - } -} From 6f9916f8a93e3b5916dfeea1d7fadc6b4b641b97 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:58:32 +0200 Subject: [PATCH 10/30] Clean up: GenerateNamedCharacters.java --- .../generator/GenerateNamedCharacters.java | 182 ------------------ 1 file changed, 182 deletions(-) delete mode 100644 translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java diff --git a/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java b/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java deleted file mode 100644 index 69ddb318..00000000 --- a/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright (c) 2008-2009 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.generator; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.Map; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class GenerateNamedCharacters { - - private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10); - - private static final Pattern LINE_PATTERN = Pattern.compile(" ([^<]*) U\\+(\\S*) (?:U\\+(\\S*) )?"); - - private static String toUString(int c) { - String hexString = Integer.toHexString(c); - switch (hexString.length()) { - case 1: - return "\\u000" + hexString; - case 2: - return "\\u00" + hexString; - case 3: - return "\\u0" + hexString; - case 4: - return "\\u" + hexString; - default: - throw new RuntimeException("Unreachable."); - } - } - - private static int charToIndex(char c) { - if (c >= 'a' && c <= 'z') { - return c - 'a' + 26; - } else if (c >= 'A' && c <= 'Z') { - return c - 'A'; - } - throw new IllegalArgumentException("Bad char in named character name: " - + c); - } - - private static boolean allZero(int[] arr) { - for (int i = 0; i < arr.length; i++) { - if (arr[i] != 0) { - return false; - } - } - return true; - } - - /** - * @param args - * @throws IOException - */ - public static void main(String[] args) throws IOException { - TreeMap entities = new TreeMap(); - BufferedReader reader = new BufferedReader(new InputStreamReader( - System.in, "utf-8")); - String line; - while ((line = reader.readLine()) != null) { - Matcher m = LINE_PATTERN.matcher(line); - while (m.find()) { - String value; - if (m.group(3) != null) { - // two BMP chars - int firstIntVal = Integer.parseInt(m.group(2), 16); - int secondIntVal = Integer.parseInt(m.group(3), 16); - value = ("" + (char)firstIntVal) + (char)secondIntVal; - } else { - // one code point - int intVal = Integer.parseInt(m.group(2), 16); - if (intVal <= 0xFFFF) { - value = "" + (char)intVal; - } else { - int high = (LEAD_OFFSET + (intVal >> 10)); - int low = (0xDC00 + (intVal & 0x3FF)); - value = ("" + (char)high) + (char)low; - } - } - entities.put(m.group(1), value); - } - } - - // Java initializes arrays to zero. Zero is our magic value for no hilo - // value. - int[][] hiLoTable = new int['z' + 1]['Z' - 'A' + 1 + 'z' - 'a' + 1]; - - String firstName = entities.entrySet().iterator().next().getKey(); - int firstKey = charToIndex(firstName.charAt(0)); - int secondKey = firstName.charAt(1); - int row = 0; - int lo = 0; - - System.out.print("static final @NoLength @CharacterName String[] NAMES = {\n"); - for (Map.Entry entity : entities.entrySet()) { - String name = entity.getKey(); - int newFirst = charToIndex(name.charAt(0)); - int newSecond = name.charAt(1); - assert !(newFirst == 0 && newSecond == 0) : "Not prepared for name starting with AA"; - if (firstKey != newFirst || secondKey != newSecond) { - hiLoTable[secondKey][firstKey] = ((row - 1) << 16) | lo; - lo = row; - firstKey = newFirst; - secondKey = newSecond; - } - System.out.print("\""); - System.out.print(name.substring(2)); - System.out.print("\",\n"); - row++; - } - System.out.print("};\n"); - - hiLoTable[secondKey][firstKey] = ((entities.size() - 1) << 16) | lo; - - System.out.print("static final @NoLength char[][] VALUES = {\n"); - for (Map.Entry entity : entities.entrySet()) { - String value = entity.getValue(); - System.out.print("{"); - if (value.length() == 1) { - char c = value.charAt(0); - if (c == '\'') { - System.out.print("\'\\\'\'"); - } else if (c == '\n') { - System.out.print("\'\\n\'"); - } else if (c == '\\') { - System.out.print("\'\\\\\'"); - } else if (c <= 0xFFFF) { - System.out.print("\'"); - System.out.print(toUString(c)); - System.out.print("\'"); - } - } else { - System.out.print("\'"); - System.out.print(toUString(value.charAt(0))); - System.out.print("\', \'"); - System.out.print(toUString(value.charAt(1))); - System.out.print("\'"); - } - System.out.print("},\n"); - } - System.out.print("};\n"); - - System.out.print("static final @NoLength int[][] HILO_ACCEL = {\n"); - for (int i = 0; i < hiLoTable.length; i++) { - if (allZero(hiLoTable[i])) { - System.out.print("null,\n"); - } else { - System.out.print("{"); - for (int j = 0; j < hiLoTable[i].length; j++) { - System.out.print(hiLoTable[i][j]); - System.out.print(", "); - } - System.out.print("},\n"); - } - } - System.out.print("};\n"); - } - -} From 6d0005505914a4fa5b45a257523e44d149939d85 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Sat, 25 Apr 2020 11:05:13 +0200 Subject: [PATCH 11/30] Remove RPM package support --- pom.xml | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/pom.xml b/pom.xml index 41f46725..68a8aebf 100644 --- a/pom.xml +++ b/pom.xml @@ -162,45 +162,6 @@ - - org.codehaus.mojo - rpm-maven-plugin - - 1 - The MIT License - Development/Java - /var/tmp/${project.build.finalName} - - _javadir ${rpm.java.dir} - _javadocdir ${rpm.javadoc.dir} - - - - ${rpm.java.dir} - 644 - root - root - - - ${project.build.directory}/${project.build.finalName}.jar - - - - - ${rpm.javadoc.dir}/${project.build.finalName} - 644 - root - root - - - ${project.build.directory}/apidocs - - - - - %__ln_s ${project.build.finalName}.jar %{buildroot}%{_javadir}/${project.name}.jar - - @@ -233,8 +194,6 @@ - /usr/share/java - /usr/share/javadoc UTF-8 From 1f70f9bbf1cb0ad7c99cf697c80cf74fcdd4f361 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Sat, 25 Apr 2020 11:06:01 +0200 Subject: [PATCH 12/30] Remove OSGi bundle support --- pom.xml | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/pom.xml b/pom.xml index 68a8aebf..558b4580 100644 --- a/pom.xml +++ b/pom.xml @@ -24,7 +24,6 @@ 4.0.0 nu.validator.htmlparser htmlparser - bundle 1.4 htmlparser http://about.validator.nu/htmlparser/ @@ -33,14 +32,12 @@ Usage notes for this POM: To build without signing, run: - mvn clean source:jar javadoc:jar repository:bundle-create - (enter 0 when prompted) + mvn clean source:jar javadoc:jar To build and sign, run: - mvn clean source:jar javadoc:jar package gpg:sign repository:bundle-create - (enter 0 when prompted) + mvn clean source:jar javadoc:jar package gpg:sign - This POM file is used for creating the bundle for distribution via the + This POM file is used for creating the JAR for distribution via the Maven Central Repository. It is not used as part of the normal development process of the parser and the maintainer of the parser (Henri Sivonen) isn't experienced in POM tweaking. If you need this POM to do something @@ -144,24 +141,6 @@ true - - org.apache.felix - maven-bundle-plugin - 2.3.7 - true - - - false - - - ${project.name} - nu.validator.htmlparser - ${project.version} - J2SE-1.5 - <_removeheaders>Built-By,Bnd-LastModified - - - From 1c484d4a348b0f1fe254a6898260ed446694b788 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 08:20:26 +0200 Subject: [PATCH 13/30] Update URLs --- pom.xml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pom.xml b/pom.xml index 558b4580..4c471738 100644 --- a/pom.xml +++ b/pom.xml @@ -20,13 +20,13 @@ * DEALINGS IN THE SOFTWARE. --> + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 nu.validator.htmlparser htmlparser 1.4 htmlparser - http://about.validator.nu/htmlparser/ + https://about.validator.nu/htmlparser/ The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser. hsivonen Henri Sivonen hsivonen@iki.fi - http://hsivonen.iki.fi/ + https://hsivonen.fi/ The MIT License - http://www.opensource.org/licenses/mit-license.php + https://opensource.org/licenses/mit-license.php repo The (New) BSD License - http://www.opensource.org/licenses/bsd-license.php + https://opensource.org/licenses/bsd-license.php repo - scm:hg:http://hg.mozilla.org/projects/htmlparser/ - http://hg.mozilla.org/projects/htmlparser/ + scm:git:https://github.com/validator/htmlparser.git + https://github.com/validator/htmlparser ${project.build.directory}/src From 31888b3a85e86f51ad3862467f7d8522c66bb785 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 08:27:34 +0200 Subject: [PATCH 14/30] Format POM --- pom.xml | 99 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 53 insertions(+), 46 deletions(-) diff --git a/pom.xml b/pom.xml index 4c471738..65031dc3 100644 --- a/pom.xml +++ b/pom.xml @@ -22,12 +22,6 @@ 4.0.0 - nu.validator.htmlparser - htmlparser - 1.4 - htmlparser - https://about.validator.nu/htmlparser/ - The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser. - - - hsivonen - Henri Sivonen - hsivonen@iki.fi - https://hsivonen.fi/ - - + + nu.validator.htmlparser + htmlparser + 1.4 + + htmlparser + The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser. + https://about.validator.nu/htmlparser/ The MIT License @@ -65,10 +59,55 @@ repo + + + + hsivonen + Henri Sivonen + hsivonen@iki.fi + https://hsivonen.fi/ + + + scm:git:https://github.com/validator/htmlparser.git https://github.com/validator/htmlparser + + + UTF-8 + + + + + com.ibm.icu + icu4j + 4.0.1 + compile + true + + + xom + xom + 1.1 + compile + true + + + net.sourceforge.jchardet + jchardet + 1.0 + compile + true + + + com.sdicons.jsontools + jsontools-core + 1.4 + test + + + ${project.build.directory}/src ${basedir}/test-src @@ -143,36 +182,4 @@ - - - com.ibm.icu - icu4j - 4.0.1 - compile - true - - - xom - xom - 1.1 - compile - true - - - net.sourceforge.jchardet - jchardet - 1.0 - compile - true - - - com.sdicons.jsontools - jsontools-core - 1.4 - test - - - - UTF-8 - From ea606388d7e5275eb3e056961aed4b860fba8ed1 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:58:42 +0200 Subject: [PATCH 15/30] Adopt Maven directory layout --- .../nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java | 0 .../htmlparser/impl => hotspot/resources}/HotSpotWorkaround.txt | 0 src/{ => main/java}/nu/validator/htmlparser/annotation/Auto.java | 0 .../java}/nu/validator/htmlparser/annotation/CharacterName.java | 0 src/{ => main/java}/nu/validator/htmlparser/annotation/Const.java | 0 .../java}/nu/validator/htmlparser/annotation/Creator.java | 0 .../java}/nu/validator/htmlparser/annotation/HtmlCreator.java | 0 .../java}/nu/validator/htmlparser/annotation/IdType.java | 0 .../java}/nu/validator/htmlparser/annotation/Inline.java | 0 .../java}/nu/validator/htmlparser/annotation/Literal.java | 0 src/{ => main/java}/nu/validator/htmlparser/annotation/Local.java | 0 .../java}/nu/validator/htmlparser/annotation/NoLength.java | 0 src/{ => main/java}/nu/validator/htmlparser/annotation/NsUri.java | 0 .../java}/nu/validator/htmlparser/annotation/Prefix.java | 0 src/{ => main/java}/nu/validator/htmlparser/annotation/QName.java | 0 .../java}/nu/validator/htmlparser/annotation/SvgCreator.java | 0 .../java}/nu/validator/htmlparser/annotation/Unsigned.java | 0 .../java}/nu/validator/htmlparser/annotation/Virtual.java | 0 .../java}/nu/validator/htmlparser/annotation/package.html | 0 .../java}/nu/validator/htmlparser/common/ByteReadable.java | 0 .../java}/nu/validator/htmlparser/common/CharacterHandler.java | 0 .../java}/nu/validator/htmlparser/common/DocumentMode.java | 0 .../java}/nu/validator/htmlparser/common/DocumentModeHandler.java | 0 .../validator/htmlparser/common/EncodingDeclarationHandler.java | 0 .../java}/nu/validator/htmlparser/common/Heuristics.java | 0 src/{ => main/java}/nu/validator/htmlparser/common/Interner.java | 0 .../java}/nu/validator/htmlparser/common/TokenHandler.java | 0 .../java}/nu/validator/htmlparser/common/TransitionHandler.java | 0 .../java}/nu/validator/htmlparser/common/XmlViolationPolicy.java | 0 src/{ => main/java}/nu/validator/htmlparser/common/package.html | 0 .../java}/nu/validator/htmlparser/dom/DOMTreeBuilder.java | 0 src/{ => main/java}/nu/validator/htmlparser/dom/Dom2Sax.java | 0 .../java}/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java | 0 src/{ => main/java}/nu/validator/htmlparser/dom/package.html | 0 .../java}/nu/validator/htmlparser/extra/ChardetSniffer.java | 0 .../java}/nu/validator/htmlparser/extra/IcuDetectorSniffer.java | 0 .../java}/nu/validator/htmlparser/extra/NormalizationChecker.java | 0 .../java}/nu/validator/htmlparser/impl/AttributeName.java | 0 .../java}/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java | 0 src/{ => main/java}/nu/validator/htmlparser/impl/ElementName.java | 0 .../nu/validator/htmlparser/impl/ErrorReportingTokenizer.java | 0 .../java}/nu/validator/htmlparser/impl/HtmlAttributes.java | 0 src/{ => main/java}/nu/validator/htmlparser/impl/LocatorImpl.java | 0 src/{ => main/java}/nu/validator/htmlparser/impl/MetaScanner.java | 0 src/{ => main/java}/nu/validator/htmlparser/impl/NCName.java | 0 .../java}/nu/validator/htmlparser/impl/NamedCharacters.java | 0 .../java}/nu/validator/htmlparser/impl/NamedCharactersAccel.java | 0 src/{ => main/java}/nu/validator/htmlparser/impl/Portability.java | 0 .../java}/nu/validator/htmlparser/impl/PushedLocation.java | 0 src/{ => main/java}/nu/validator/htmlparser/impl/StackNode.java | 0 .../java}/nu/validator/htmlparser/impl/StateSnapshot.java | 0 .../java}/nu/validator/htmlparser/impl/TaintableLocatorImpl.java | 0 src/{ => main/java}/nu/validator/htmlparser/impl/Tokenizer.java | 0 src/{ => main/java}/nu/validator/htmlparser/impl/TreeBuilder.java | 0 .../java}/nu/validator/htmlparser/impl/TreeBuilderState.java | 0 src/{ => main/java}/nu/validator/htmlparser/impl/UTF16Buffer.java | 0 src/{ => main/java}/nu/validator/htmlparser/impl/package.html | 0 src/{ => main/java}/nu/validator/htmlparser/io/BomSniffer.java | 0 src/{ => main/java}/nu/validator/htmlparser/io/Confidence.java | 0 src/{ => main/java}/nu/validator/htmlparser/io/Driver.java | 0 src/{ => main/java}/nu/validator/htmlparser/io/Encoding.java | 0 .../java}/nu/validator/htmlparser/io/HtmlInputStreamReader.java | 0 src/{ => main/java}/nu/validator/htmlparser/io/MetaSniffer.java | 0 .../java}/nu/validator/htmlparser/rewindable/Rewindable.java | 0 .../nu/validator/htmlparser/rewindable/RewindableInputStream.java | 0 src/{ => main/java}/nu/validator/htmlparser/sax/HtmlParser.java | 0 .../java}/nu/validator/htmlparser/sax/HtmlSerializer.java | 0 .../nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java | 0 .../nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java | 0 src/{ => main/java}/nu/validator/htmlparser/sax/SAXStreamer.java | 0 .../java}/nu/validator/htmlparser/sax/SAXTreeBuilder.java | 0 .../java}/nu/validator/htmlparser/sax/XmlSerializer.java | 0 src/{ => main/java}/nu/validator/htmlparser/sax/package.html | 0 src/{ => main/java}/nu/validator/htmlparser/xom/FormPointer.java | 0 .../java}/nu/validator/htmlparser/xom/FormPtrElement.java | 0 src/{ => main/java}/nu/validator/htmlparser/xom/HtmlBuilder.java | 0 .../java}/nu/validator/htmlparser/xom/ModalDocument.java | 0 src/{ => main/java}/nu/validator/htmlparser/xom/Mode.java | 0 .../java}/nu/validator/htmlparser/xom/SimpleNodeFactory.java | 0 .../java}/nu/validator/htmlparser/xom/XOMTreeBuilder.java | 0 src/{ => main/java}/nu/validator/htmlparser/xom/package.html | 0 src/{ => main/java}/nu/validator/saxtree/CDATA.java | 0 src/{ => main/java}/nu/validator/saxtree/CharBufferNode.java | 0 src/{ => main/java}/nu/validator/saxtree/Characters.java | 0 src/{ => main/java}/nu/validator/saxtree/Comment.java | 0 src/{ => main/java}/nu/validator/saxtree/DTD.java | 0 src/{ => main/java}/nu/validator/saxtree/Document.java | 0 src/{ => main/java}/nu/validator/saxtree/DocumentFragment.java | 0 src/{ => main/java}/nu/validator/saxtree/Element.java | 0 src/{ => main/java}/nu/validator/saxtree/Entity.java | 0 src/{ => main/java}/nu/validator/saxtree/IgnorableWhitespace.java | 0 src/{ => main/java}/nu/validator/saxtree/LocatorImpl.java | 0 src/{ => main/java}/nu/validator/saxtree/Node.java | 0 src/{ => main/java}/nu/validator/saxtree/NodeType.java | 0 src/{ => main/java}/nu/validator/saxtree/NullLexicalHandler.java | 0 src/{ => main/java}/nu/validator/saxtree/ParentNode.java | 0 src/{ => main/java}/nu/validator/saxtree/PrefixMapping.java | 0 .../java}/nu/validator/saxtree/ProcessingInstruction.java | 0 src/{ => main/java}/nu/validator/saxtree/SkippedEntity.java | 0 src/{ => main/java}/nu/validator/saxtree/TreeBuilder.java | 0 src/{ => main/java}/nu/validator/saxtree/TreeParser.java | 0 src/{ => main/java}/nu/validator/saxtree/package.html | 0 .../java}/nu/validator/htmlparser/test/DecoderLoopTester.java | 0 .../test/java}/nu/validator/htmlparser/test/DomIdTester.java | 0 .../test/java}/nu/validator/htmlparser/test/DomTest.java | 0 .../test/java}/nu/validator/htmlparser/test/EncodingTester.java | 0 .../java}/nu/validator/htmlparser/test/JSONArrayTokenHandler.java | 0 .../test/java}/nu/validator/htmlparser/test/ListErrorHandler.java | 0 .../java}/nu/validator/htmlparser/test/SystemErrErrorHandler.java | 0 .../test/java}/nu/validator/htmlparser/test/TokenPrinter.java | 0 .../test/java}/nu/validator/htmlparser/test/TokenizerTester.java | 0 .../nu/validator/htmlparser/test/TreeDumpContentHandler.java | 0 .../test/java}/nu/validator/htmlparser/test/TreePrinter.java | 0 .../test/java}/nu/validator/htmlparser/test/TreeTester.java | 0 .../java}/nu/validator/htmlparser/test/UntilHashInputStream.java | 0 .../java}/nu/validator/htmlparser/test/XmlSerializerTester.java | 0 .../test/java}/nu/validator/htmlparser/test/XomTest.java | 0 .../test/java}/nu/validator/htmlparser/test/package.html | 0 .../test/java}/nu/validator/saxtree/test/PassThruPrinter.java | 0 .../test/java}/nu/validator/saxtree/test/package.html | 0 120 files changed, 0 insertions(+), 0 deletions(-) rename {translator-src => src/hotspot/java}/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java (100%) rename src/{nu/validator/htmlparser/impl => hotspot/resources}/HotSpotWorkaround.txt (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/Auto.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/CharacterName.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/Const.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/Creator.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/HtmlCreator.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/IdType.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/Inline.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/Literal.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/Local.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/NoLength.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/NsUri.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/Prefix.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/QName.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/SvgCreator.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/Unsigned.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/Virtual.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/annotation/package.html (100%) rename src/{ => main/java}/nu/validator/htmlparser/common/ByteReadable.java (100%) mode change 100755 => 100644 rename src/{ => main/java}/nu/validator/htmlparser/common/CharacterHandler.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/common/DocumentMode.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/common/DocumentModeHandler.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/common/EncodingDeclarationHandler.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/common/Heuristics.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/common/Interner.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/common/TokenHandler.java (100%) mode change 100755 => 100644 rename src/{ => main/java}/nu/validator/htmlparser/common/TransitionHandler.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/common/XmlViolationPolicy.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/common/package.html (100%) rename src/{ => main/java}/nu/validator/htmlparser/dom/DOMTreeBuilder.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/dom/Dom2Sax.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/dom/package.html (100%) rename src/{ => main/java}/nu/validator/htmlparser/extra/ChardetSniffer.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/extra/IcuDetectorSniffer.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/extra/NormalizationChecker.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/AttributeName.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/ElementName.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/HtmlAttributes.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/LocatorImpl.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/MetaScanner.java (100%) mode change 100755 => 100644 rename src/{ => main/java}/nu/validator/htmlparser/impl/NCName.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/NamedCharacters.java (100%) mode change 100755 => 100644 rename src/{ => main/java}/nu/validator/htmlparser/impl/NamedCharactersAccel.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/Portability.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/PushedLocation.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/StackNode.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/StateSnapshot.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/TaintableLocatorImpl.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/Tokenizer.java (100%) mode change 100755 => 100644 rename src/{ => main/java}/nu/validator/htmlparser/impl/TreeBuilder.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/TreeBuilderState.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/UTF16Buffer.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/impl/package.html (100%) rename src/{ => main/java}/nu/validator/htmlparser/io/BomSniffer.java (100%) mode change 100755 => 100644 rename src/{ => main/java}/nu/validator/htmlparser/io/Confidence.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/io/Driver.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/io/Encoding.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/io/HtmlInputStreamReader.java (100%) mode change 100755 => 100644 rename src/{ => main/java}/nu/validator/htmlparser/io/MetaSniffer.java (100%) mode change 100755 => 100644 rename src/{ => main/java}/nu/validator/htmlparser/rewindable/Rewindable.java (100%) mode change 100755 => 100644 rename src/{ => main/java}/nu/validator/htmlparser/rewindable/RewindableInputStream.java (100%) mode change 100755 => 100644 rename src/{ => main/java}/nu/validator/htmlparser/sax/HtmlParser.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/sax/HtmlSerializer.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/sax/SAXStreamer.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/sax/SAXTreeBuilder.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/sax/XmlSerializer.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/sax/package.html (100%) rename src/{ => main/java}/nu/validator/htmlparser/xom/FormPointer.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/xom/FormPtrElement.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/xom/HtmlBuilder.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/xom/ModalDocument.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/xom/Mode.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/xom/SimpleNodeFactory.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/xom/XOMTreeBuilder.java (100%) rename src/{ => main/java}/nu/validator/htmlparser/xom/package.html (100%) rename src/{ => main/java}/nu/validator/saxtree/CDATA.java (100%) rename src/{ => main/java}/nu/validator/saxtree/CharBufferNode.java (100%) rename src/{ => main/java}/nu/validator/saxtree/Characters.java (100%) rename src/{ => main/java}/nu/validator/saxtree/Comment.java (100%) rename src/{ => main/java}/nu/validator/saxtree/DTD.java (100%) rename src/{ => main/java}/nu/validator/saxtree/Document.java (100%) rename src/{ => main/java}/nu/validator/saxtree/DocumentFragment.java (100%) rename src/{ => main/java}/nu/validator/saxtree/Element.java (100%) rename src/{ => main/java}/nu/validator/saxtree/Entity.java (100%) rename src/{ => main/java}/nu/validator/saxtree/IgnorableWhitespace.java (100%) rename src/{ => main/java}/nu/validator/saxtree/LocatorImpl.java (100%) rename src/{ => main/java}/nu/validator/saxtree/Node.java (100%) rename src/{ => main/java}/nu/validator/saxtree/NodeType.java (100%) rename src/{ => main/java}/nu/validator/saxtree/NullLexicalHandler.java (100%) rename src/{ => main/java}/nu/validator/saxtree/ParentNode.java (100%) rename src/{ => main/java}/nu/validator/saxtree/PrefixMapping.java (100%) rename src/{ => main/java}/nu/validator/saxtree/ProcessingInstruction.java (100%) rename src/{ => main/java}/nu/validator/saxtree/SkippedEntity.java (100%) rename src/{ => main/java}/nu/validator/saxtree/TreeBuilder.java (100%) rename src/{ => main/java}/nu/validator/saxtree/TreeParser.java (100%) rename src/{ => main/java}/nu/validator/saxtree/package.html (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/DecoderLoopTester.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/DomIdTester.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/DomTest.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/EncodingTester.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/JSONArrayTokenHandler.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/ListErrorHandler.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/SystemErrErrorHandler.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/TokenPrinter.java (100%) mode change 100755 => 100644 rename {test-src => src/test/java}/nu/validator/htmlparser/test/TokenizerTester.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/TreeDumpContentHandler.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/TreePrinter.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/TreeTester.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/UntilHashInputStream.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/XmlSerializerTester.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/XomTest.java (100%) rename {test-src => src/test/java}/nu/validator/htmlparser/test/package.html (100%) rename {test-src => src/test/java}/nu/validator/saxtree/test/PassThruPrinter.java (100%) rename {test-src => src/test/java}/nu/validator/saxtree/test/package.html (100%) diff --git a/translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java b/src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java similarity index 100% rename from translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java rename to src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java diff --git a/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt b/src/hotspot/resources/HotSpotWorkaround.txt similarity index 100% rename from src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt rename to src/hotspot/resources/HotSpotWorkaround.txt diff --git a/src/nu/validator/htmlparser/annotation/Auto.java b/src/main/java/nu/validator/htmlparser/annotation/Auto.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Auto.java rename to src/main/java/nu/validator/htmlparser/annotation/Auto.java diff --git a/src/nu/validator/htmlparser/annotation/CharacterName.java b/src/main/java/nu/validator/htmlparser/annotation/CharacterName.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/CharacterName.java rename to src/main/java/nu/validator/htmlparser/annotation/CharacterName.java diff --git a/src/nu/validator/htmlparser/annotation/Const.java b/src/main/java/nu/validator/htmlparser/annotation/Const.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Const.java rename to src/main/java/nu/validator/htmlparser/annotation/Const.java diff --git a/src/nu/validator/htmlparser/annotation/Creator.java b/src/main/java/nu/validator/htmlparser/annotation/Creator.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Creator.java rename to src/main/java/nu/validator/htmlparser/annotation/Creator.java diff --git a/src/nu/validator/htmlparser/annotation/HtmlCreator.java b/src/main/java/nu/validator/htmlparser/annotation/HtmlCreator.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/HtmlCreator.java rename to src/main/java/nu/validator/htmlparser/annotation/HtmlCreator.java diff --git a/src/nu/validator/htmlparser/annotation/IdType.java b/src/main/java/nu/validator/htmlparser/annotation/IdType.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/IdType.java rename to src/main/java/nu/validator/htmlparser/annotation/IdType.java diff --git a/src/nu/validator/htmlparser/annotation/Inline.java b/src/main/java/nu/validator/htmlparser/annotation/Inline.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Inline.java rename to src/main/java/nu/validator/htmlparser/annotation/Inline.java diff --git a/src/nu/validator/htmlparser/annotation/Literal.java b/src/main/java/nu/validator/htmlparser/annotation/Literal.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Literal.java rename to src/main/java/nu/validator/htmlparser/annotation/Literal.java diff --git a/src/nu/validator/htmlparser/annotation/Local.java b/src/main/java/nu/validator/htmlparser/annotation/Local.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Local.java rename to src/main/java/nu/validator/htmlparser/annotation/Local.java diff --git a/src/nu/validator/htmlparser/annotation/NoLength.java b/src/main/java/nu/validator/htmlparser/annotation/NoLength.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/NoLength.java rename to src/main/java/nu/validator/htmlparser/annotation/NoLength.java diff --git a/src/nu/validator/htmlparser/annotation/NsUri.java b/src/main/java/nu/validator/htmlparser/annotation/NsUri.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/NsUri.java rename to src/main/java/nu/validator/htmlparser/annotation/NsUri.java diff --git a/src/nu/validator/htmlparser/annotation/Prefix.java b/src/main/java/nu/validator/htmlparser/annotation/Prefix.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Prefix.java rename to src/main/java/nu/validator/htmlparser/annotation/Prefix.java diff --git a/src/nu/validator/htmlparser/annotation/QName.java b/src/main/java/nu/validator/htmlparser/annotation/QName.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/QName.java rename to src/main/java/nu/validator/htmlparser/annotation/QName.java diff --git a/src/nu/validator/htmlparser/annotation/SvgCreator.java b/src/main/java/nu/validator/htmlparser/annotation/SvgCreator.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/SvgCreator.java rename to src/main/java/nu/validator/htmlparser/annotation/SvgCreator.java diff --git a/src/nu/validator/htmlparser/annotation/Unsigned.java b/src/main/java/nu/validator/htmlparser/annotation/Unsigned.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Unsigned.java rename to src/main/java/nu/validator/htmlparser/annotation/Unsigned.java diff --git a/src/nu/validator/htmlparser/annotation/Virtual.java b/src/main/java/nu/validator/htmlparser/annotation/Virtual.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Virtual.java rename to src/main/java/nu/validator/htmlparser/annotation/Virtual.java diff --git a/src/nu/validator/htmlparser/annotation/package.html b/src/main/java/nu/validator/htmlparser/annotation/package.html similarity index 100% rename from src/nu/validator/htmlparser/annotation/package.html rename to src/main/java/nu/validator/htmlparser/annotation/package.html diff --git a/src/nu/validator/htmlparser/common/ByteReadable.java b/src/main/java/nu/validator/htmlparser/common/ByteReadable.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/common/ByteReadable.java rename to src/main/java/nu/validator/htmlparser/common/ByteReadable.java diff --git a/src/nu/validator/htmlparser/common/CharacterHandler.java b/src/main/java/nu/validator/htmlparser/common/CharacterHandler.java similarity index 100% rename from src/nu/validator/htmlparser/common/CharacterHandler.java rename to src/main/java/nu/validator/htmlparser/common/CharacterHandler.java diff --git a/src/nu/validator/htmlparser/common/DocumentMode.java b/src/main/java/nu/validator/htmlparser/common/DocumentMode.java similarity index 100% rename from src/nu/validator/htmlparser/common/DocumentMode.java rename to src/main/java/nu/validator/htmlparser/common/DocumentMode.java diff --git a/src/nu/validator/htmlparser/common/DocumentModeHandler.java b/src/main/java/nu/validator/htmlparser/common/DocumentModeHandler.java similarity index 100% rename from src/nu/validator/htmlparser/common/DocumentModeHandler.java rename to src/main/java/nu/validator/htmlparser/common/DocumentModeHandler.java diff --git a/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java b/src/main/java/nu/validator/htmlparser/common/EncodingDeclarationHandler.java similarity index 100% rename from src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java rename to src/main/java/nu/validator/htmlparser/common/EncodingDeclarationHandler.java diff --git a/src/nu/validator/htmlparser/common/Heuristics.java b/src/main/java/nu/validator/htmlparser/common/Heuristics.java similarity index 100% rename from src/nu/validator/htmlparser/common/Heuristics.java rename to src/main/java/nu/validator/htmlparser/common/Heuristics.java diff --git a/src/nu/validator/htmlparser/common/Interner.java b/src/main/java/nu/validator/htmlparser/common/Interner.java similarity index 100% rename from src/nu/validator/htmlparser/common/Interner.java rename to src/main/java/nu/validator/htmlparser/common/Interner.java diff --git a/src/nu/validator/htmlparser/common/TokenHandler.java b/src/main/java/nu/validator/htmlparser/common/TokenHandler.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/common/TokenHandler.java rename to src/main/java/nu/validator/htmlparser/common/TokenHandler.java diff --git a/src/nu/validator/htmlparser/common/TransitionHandler.java b/src/main/java/nu/validator/htmlparser/common/TransitionHandler.java similarity index 100% rename from src/nu/validator/htmlparser/common/TransitionHandler.java rename to src/main/java/nu/validator/htmlparser/common/TransitionHandler.java diff --git a/src/nu/validator/htmlparser/common/XmlViolationPolicy.java b/src/main/java/nu/validator/htmlparser/common/XmlViolationPolicy.java similarity index 100% rename from src/nu/validator/htmlparser/common/XmlViolationPolicy.java rename to src/main/java/nu/validator/htmlparser/common/XmlViolationPolicy.java diff --git a/src/nu/validator/htmlparser/common/package.html b/src/main/java/nu/validator/htmlparser/common/package.html similarity index 100% rename from src/nu/validator/htmlparser/common/package.html rename to src/main/java/nu/validator/htmlparser/common/package.html diff --git a/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java b/src/main/java/nu/validator/htmlparser/dom/DOMTreeBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/dom/DOMTreeBuilder.java rename to src/main/java/nu/validator/htmlparser/dom/DOMTreeBuilder.java diff --git a/src/nu/validator/htmlparser/dom/Dom2Sax.java b/src/main/java/nu/validator/htmlparser/dom/Dom2Sax.java similarity index 100% rename from src/nu/validator/htmlparser/dom/Dom2Sax.java rename to src/main/java/nu/validator/htmlparser/dom/Dom2Sax.java diff --git a/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java b/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java rename to src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java diff --git a/src/nu/validator/htmlparser/dom/package.html b/src/main/java/nu/validator/htmlparser/dom/package.html similarity index 100% rename from src/nu/validator/htmlparser/dom/package.html rename to src/main/java/nu/validator/htmlparser/dom/package.html diff --git a/src/nu/validator/htmlparser/extra/ChardetSniffer.java b/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java similarity index 100% rename from src/nu/validator/htmlparser/extra/ChardetSniffer.java rename to src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java diff --git a/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java b/src/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java similarity index 100% rename from src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java rename to src/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java diff --git a/src/nu/validator/htmlparser/extra/NormalizationChecker.java b/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java similarity index 100% rename from src/nu/validator/htmlparser/extra/NormalizationChecker.java rename to src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java diff --git a/src/nu/validator/htmlparser/impl/AttributeName.java b/src/main/java/nu/validator/htmlparser/impl/AttributeName.java similarity index 100% rename from src/nu/validator/htmlparser/impl/AttributeName.java rename to src/main/java/nu/validator/htmlparser/impl/AttributeName.java diff --git a/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java b/src/main/java/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java rename to src/main/java/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java diff --git a/src/nu/validator/htmlparser/impl/ElementName.java b/src/main/java/nu/validator/htmlparser/impl/ElementName.java similarity index 100% rename from src/nu/validator/htmlparser/impl/ElementName.java rename to src/main/java/nu/validator/htmlparser/impl/ElementName.java diff --git a/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java similarity index 100% rename from src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java rename to src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java diff --git a/src/nu/validator/htmlparser/impl/HtmlAttributes.java b/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java similarity index 100% rename from src/nu/validator/htmlparser/impl/HtmlAttributes.java rename to src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java diff --git a/src/nu/validator/htmlparser/impl/LocatorImpl.java b/src/main/java/nu/validator/htmlparser/impl/LocatorImpl.java similarity index 100% rename from src/nu/validator/htmlparser/impl/LocatorImpl.java rename to src/main/java/nu/validator/htmlparser/impl/LocatorImpl.java diff --git a/src/nu/validator/htmlparser/impl/MetaScanner.java b/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/impl/MetaScanner.java rename to src/main/java/nu/validator/htmlparser/impl/MetaScanner.java diff --git a/src/nu/validator/htmlparser/impl/NCName.java b/src/main/java/nu/validator/htmlparser/impl/NCName.java similarity index 100% rename from src/nu/validator/htmlparser/impl/NCName.java rename to src/main/java/nu/validator/htmlparser/impl/NCName.java diff --git a/src/nu/validator/htmlparser/impl/NamedCharacters.java b/src/main/java/nu/validator/htmlparser/impl/NamedCharacters.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/impl/NamedCharacters.java rename to src/main/java/nu/validator/htmlparser/impl/NamedCharacters.java diff --git a/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java b/src/main/java/nu/validator/htmlparser/impl/NamedCharactersAccel.java similarity index 100% rename from src/nu/validator/htmlparser/impl/NamedCharactersAccel.java rename to src/main/java/nu/validator/htmlparser/impl/NamedCharactersAccel.java diff --git a/src/nu/validator/htmlparser/impl/Portability.java b/src/main/java/nu/validator/htmlparser/impl/Portability.java similarity index 100% rename from src/nu/validator/htmlparser/impl/Portability.java rename to src/main/java/nu/validator/htmlparser/impl/Portability.java diff --git a/src/nu/validator/htmlparser/impl/PushedLocation.java b/src/main/java/nu/validator/htmlparser/impl/PushedLocation.java similarity index 100% rename from src/nu/validator/htmlparser/impl/PushedLocation.java rename to src/main/java/nu/validator/htmlparser/impl/PushedLocation.java diff --git a/src/nu/validator/htmlparser/impl/StackNode.java b/src/main/java/nu/validator/htmlparser/impl/StackNode.java similarity index 100% rename from src/nu/validator/htmlparser/impl/StackNode.java rename to src/main/java/nu/validator/htmlparser/impl/StackNode.java diff --git a/src/nu/validator/htmlparser/impl/StateSnapshot.java b/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java similarity index 100% rename from src/nu/validator/htmlparser/impl/StateSnapshot.java rename to src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java diff --git a/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java b/src/main/java/nu/validator/htmlparser/impl/TaintableLocatorImpl.java similarity index 100% rename from src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java rename to src/main/java/nu/validator/htmlparser/impl/TaintableLocatorImpl.java diff --git a/src/nu/validator/htmlparser/impl/Tokenizer.java b/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/impl/Tokenizer.java rename to src/main/java/nu/validator/htmlparser/impl/Tokenizer.java diff --git a/src/nu/validator/htmlparser/impl/TreeBuilder.java b/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/impl/TreeBuilder.java rename to src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java diff --git a/src/nu/validator/htmlparser/impl/TreeBuilderState.java b/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java similarity index 100% rename from src/nu/validator/htmlparser/impl/TreeBuilderState.java rename to src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java diff --git a/src/nu/validator/htmlparser/impl/UTF16Buffer.java b/src/main/java/nu/validator/htmlparser/impl/UTF16Buffer.java similarity index 100% rename from src/nu/validator/htmlparser/impl/UTF16Buffer.java rename to src/main/java/nu/validator/htmlparser/impl/UTF16Buffer.java diff --git a/src/nu/validator/htmlparser/impl/package.html b/src/main/java/nu/validator/htmlparser/impl/package.html similarity index 100% rename from src/nu/validator/htmlparser/impl/package.html rename to src/main/java/nu/validator/htmlparser/impl/package.html diff --git a/src/nu/validator/htmlparser/io/BomSniffer.java b/src/main/java/nu/validator/htmlparser/io/BomSniffer.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/io/BomSniffer.java rename to src/main/java/nu/validator/htmlparser/io/BomSniffer.java diff --git a/src/nu/validator/htmlparser/io/Confidence.java b/src/main/java/nu/validator/htmlparser/io/Confidence.java similarity index 100% rename from src/nu/validator/htmlparser/io/Confidence.java rename to src/main/java/nu/validator/htmlparser/io/Confidence.java diff --git a/src/nu/validator/htmlparser/io/Driver.java b/src/main/java/nu/validator/htmlparser/io/Driver.java similarity index 100% rename from src/nu/validator/htmlparser/io/Driver.java rename to src/main/java/nu/validator/htmlparser/io/Driver.java diff --git a/src/nu/validator/htmlparser/io/Encoding.java b/src/main/java/nu/validator/htmlparser/io/Encoding.java similarity index 100% rename from src/nu/validator/htmlparser/io/Encoding.java rename to src/main/java/nu/validator/htmlparser/io/Encoding.java diff --git a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/io/HtmlInputStreamReader.java rename to src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java diff --git a/src/nu/validator/htmlparser/io/MetaSniffer.java b/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/io/MetaSniffer.java rename to src/main/java/nu/validator/htmlparser/io/MetaSniffer.java diff --git a/src/nu/validator/htmlparser/rewindable/Rewindable.java b/src/main/java/nu/validator/htmlparser/rewindable/Rewindable.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/rewindable/Rewindable.java rename to src/main/java/nu/validator/htmlparser/rewindable/Rewindable.java diff --git a/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java b/src/main/java/nu/validator/htmlparser/rewindable/RewindableInputStream.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/rewindable/RewindableInputStream.java rename to src/main/java/nu/validator/htmlparser/rewindable/RewindableInputStream.java diff --git a/src/nu/validator/htmlparser/sax/HtmlParser.java b/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java similarity index 100% rename from src/nu/validator/htmlparser/sax/HtmlParser.java rename to src/main/java/nu/validator/htmlparser/sax/HtmlParser.java diff --git a/src/nu/validator/htmlparser/sax/HtmlSerializer.java b/src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java similarity index 100% rename from src/nu/validator/htmlparser/sax/HtmlSerializer.java rename to src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java diff --git a/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java b/src/main/java/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java similarity index 100% rename from src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java rename to src/main/java/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java diff --git a/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java b/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java similarity index 100% rename from src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java rename to src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java diff --git a/src/nu/validator/htmlparser/sax/SAXStreamer.java b/src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java similarity index 100% rename from src/nu/validator/htmlparser/sax/SAXStreamer.java rename to src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java diff --git a/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java b/src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/sax/SAXTreeBuilder.java rename to src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java diff --git a/src/nu/validator/htmlparser/sax/XmlSerializer.java b/src/main/java/nu/validator/htmlparser/sax/XmlSerializer.java similarity index 100% rename from src/nu/validator/htmlparser/sax/XmlSerializer.java rename to src/main/java/nu/validator/htmlparser/sax/XmlSerializer.java diff --git a/src/nu/validator/htmlparser/sax/package.html b/src/main/java/nu/validator/htmlparser/sax/package.html similarity index 100% rename from src/nu/validator/htmlparser/sax/package.html rename to src/main/java/nu/validator/htmlparser/sax/package.html diff --git a/src/nu/validator/htmlparser/xom/FormPointer.java b/src/main/java/nu/validator/htmlparser/xom/FormPointer.java similarity index 100% rename from src/nu/validator/htmlparser/xom/FormPointer.java rename to src/main/java/nu/validator/htmlparser/xom/FormPointer.java diff --git a/src/nu/validator/htmlparser/xom/FormPtrElement.java b/src/main/java/nu/validator/htmlparser/xom/FormPtrElement.java similarity index 100% rename from src/nu/validator/htmlparser/xom/FormPtrElement.java rename to src/main/java/nu/validator/htmlparser/xom/FormPtrElement.java diff --git a/src/nu/validator/htmlparser/xom/HtmlBuilder.java b/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/xom/HtmlBuilder.java rename to src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java diff --git a/src/nu/validator/htmlparser/xom/ModalDocument.java b/src/main/java/nu/validator/htmlparser/xom/ModalDocument.java similarity index 100% rename from src/nu/validator/htmlparser/xom/ModalDocument.java rename to src/main/java/nu/validator/htmlparser/xom/ModalDocument.java diff --git a/src/nu/validator/htmlparser/xom/Mode.java b/src/main/java/nu/validator/htmlparser/xom/Mode.java similarity index 100% rename from src/nu/validator/htmlparser/xom/Mode.java rename to src/main/java/nu/validator/htmlparser/xom/Mode.java diff --git a/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java b/src/main/java/nu/validator/htmlparser/xom/SimpleNodeFactory.java similarity index 100% rename from src/nu/validator/htmlparser/xom/SimpleNodeFactory.java rename to src/main/java/nu/validator/htmlparser/xom/SimpleNodeFactory.java diff --git a/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java b/src/main/java/nu/validator/htmlparser/xom/XOMTreeBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/xom/XOMTreeBuilder.java rename to src/main/java/nu/validator/htmlparser/xom/XOMTreeBuilder.java diff --git a/src/nu/validator/htmlparser/xom/package.html b/src/main/java/nu/validator/htmlparser/xom/package.html similarity index 100% rename from src/nu/validator/htmlparser/xom/package.html rename to src/main/java/nu/validator/htmlparser/xom/package.html diff --git a/src/nu/validator/saxtree/CDATA.java b/src/main/java/nu/validator/saxtree/CDATA.java similarity index 100% rename from src/nu/validator/saxtree/CDATA.java rename to src/main/java/nu/validator/saxtree/CDATA.java diff --git a/src/nu/validator/saxtree/CharBufferNode.java b/src/main/java/nu/validator/saxtree/CharBufferNode.java similarity index 100% rename from src/nu/validator/saxtree/CharBufferNode.java rename to src/main/java/nu/validator/saxtree/CharBufferNode.java diff --git a/src/nu/validator/saxtree/Characters.java b/src/main/java/nu/validator/saxtree/Characters.java similarity index 100% rename from src/nu/validator/saxtree/Characters.java rename to src/main/java/nu/validator/saxtree/Characters.java diff --git a/src/nu/validator/saxtree/Comment.java b/src/main/java/nu/validator/saxtree/Comment.java similarity index 100% rename from src/nu/validator/saxtree/Comment.java rename to src/main/java/nu/validator/saxtree/Comment.java diff --git a/src/nu/validator/saxtree/DTD.java b/src/main/java/nu/validator/saxtree/DTD.java similarity index 100% rename from src/nu/validator/saxtree/DTD.java rename to src/main/java/nu/validator/saxtree/DTD.java diff --git a/src/nu/validator/saxtree/Document.java b/src/main/java/nu/validator/saxtree/Document.java similarity index 100% rename from src/nu/validator/saxtree/Document.java rename to src/main/java/nu/validator/saxtree/Document.java diff --git a/src/nu/validator/saxtree/DocumentFragment.java b/src/main/java/nu/validator/saxtree/DocumentFragment.java similarity index 100% rename from src/nu/validator/saxtree/DocumentFragment.java rename to src/main/java/nu/validator/saxtree/DocumentFragment.java diff --git a/src/nu/validator/saxtree/Element.java b/src/main/java/nu/validator/saxtree/Element.java similarity index 100% rename from src/nu/validator/saxtree/Element.java rename to src/main/java/nu/validator/saxtree/Element.java diff --git a/src/nu/validator/saxtree/Entity.java b/src/main/java/nu/validator/saxtree/Entity.java similarity index 100% rename from src/nu/validator/saxtree/Entity.java rename to src/main/java/nu/validator/saxtree/Entity.java diff --git a/src/nu/validator/saxtree/IgnorableWhitespace.java b/src/main/java/nu/validator/saxtree/IgnorableWhitespace.java similarity index 100% rename from src/nu/validator/saxtree/IgnorableWhitespace.java rename to src/main/java/nu/validator/saxtree/IgnorableWhitespace.java diff --git a/src/nu/validator/saxtree/LocatorImpl.java b/src/main/java/nu/validator/saxtree/LocatorImpl.java similarity index 100% rename from src/nu/validator/saxtree/LocatorImpl.java rename to src/main/java/nu/validator/saxtree/LocatorImpl.java diff --git a/src/nu/validator/saxtree/Node.java b/src/main/java/nu/validator/saxtree/Node.java similarity index 100% rename from src/nu/validator/saxtree/Node.java rename to src/main/java/nu/validator/saxtree/Node.java diff --git a/src/nu/validator/saxtree/NodeType.java b/src/main/java/nu/validator/saxtree/NodeType.java similarity index 100% rename from src/nu/validator/saxtree/NodeType.java rename to src/main/java/nu/validator/saxtree/NodeType.java diff --git a/src/nu/validator/saxtree/NullLexicalHandler.java b/src/main/java/nu/validator/saxtree/NullLexicalHandler.java similarity index 100% rename from src/nu/validator/saxtree/NullLexicalHandler.java rename to src/main/java/nu/validator/saxtree/NullLexicalHandler.java diff --git a/src/nu/validator/saxtree/ParentNode.java b/src/main/java/nu/validator/saxtree/ParentNode.java similarity index 100% rename from src/nu/validator/saxtree/ParentNode.java rename to src/main/java/nu/validator/saxtree/ParentNode.java diff --git a/src/nu/validator/saxtree/PrefixMapping.java b/src/main/java/nu/validator/saxtree/PrefixMapping.java similarity index 100% rename from src/nu/validator/saxtree/PrefixMapping.java rename to src/main/java/nu/validator/saxtree/PrefixMapping.java diff --git a/src/nu/validator/saxtree/ProcessingInstruction.java b/src/main/java/nu/validator/saxtree/ProcessingInstruction.java similarity index 100% rename from src/nu/validator/saxtree/ProcessingInstruction.java rename to src/main/java/nu/validator/saxtree/ProcessingInstruction.java diff --git a/src/nu/validator/saxtree/SkippedEntity.java b/src/main/java/nu/validator/saxtree/SkippedEntity.java similarity index 100% rename from src/nu/validator/saxtree/SkippedEntity.java rename to src/main/java/nu/validator/saxtree/SkippedEntity.java diff --git a/src/nu/validator/saxtree/TreeBuilder.java b/src/main/java/nu/validator/saxtree/TreeBuilder.java similarity index 100% rename from src/nu/validator/saxtree/TreeBuilder.java rename to src/main/java/nu/validator/saxtree/TreeBuilder.java diff --git a/src/nu/validator/saxtree/TreeParser.java b/src/main/java/nu/validator/saxtree/TreeParser.java similarity index 100% rename from src/nu/validator/saxtree/TreeParser.java rename to src/main/java/nu/validator/saxtree/TreeParser.java diff --git a/src/nu/validator/saxtree/package.html b/src/main/java/nu/validator/saxtree/package.html similarity index 100% rename from src/nu/validator/saxtree/package.html rename to src/main/java/nu/validator/saxtree/package.html diff --git a/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java b/src/test/java/nu/validator/htmlparser/test/DecoderLoopTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/DecoderLoopTester.java rename to src/test/java/nu/validator/htmlparser/test/DecoderLoopTester.java diff --git a/test-src/nu/validator/htmlparser/test/DomIdTester.java b/src/test/java/nu/validator/htmlparser/test/DomIdTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/DomIdTester.java rename to src/test/java/nu/validator/htmlparser/test/DomIdTester.java diff --git a/test-src/nu/validator/htmlparser/test/DomTest.java b/src/test/java/nu/validator/htmlparser/test/DomTest.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/DomTest.java rename to src/test/java/nu/validator/htmlparser/test/DomTest.java diff --git a/test-src/nu/validator/htmlparser/test/EncodingTester.java b/src/test/java/nu/validator/htmlparser/test/EncodingTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/EncodingTester.java rename to src/test/java/nu/validator/htmlparser/test/EncodingTester.java diff --git a/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java b/src/test/java/nu/validator/htmlparser/test/JSONArrayTokenHandler.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java rename to src/test/java/nu/validator/htmlparser/test/JSONArrayTokenHandler.java diff --git a/test-src/nu/validator/htmlparser/test/ListErrorHandler.java b/src/test/java/nu/validator/htmlparser/test/ListErrorHandler.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/ListErrorHandler.java rename to src/test/java/nu/validator/htmlparser/test/ListErrorHandler.java diff --git a/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java b/src/test/java/nu/validator/htmlparser/test/SystemErrErrorHandler.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java rename to src/test/java/nu/validator/htmlparser/test/SystemErrErrorHandler.java diff --git a/test-src/nu/validator/htmlparser/test/TokenPrinter.java b/src/test/java/nu/validator/htmlparser/test/TokenPrinter.java old mode 100755 new mode 100644 similarity index 100% rename from test-src/nu/validator/htmlparser/test/TokenPrinter.java rename to src/test/java/nu/validator/htmlparser/test/TokenPrinter.java diff --git a/test-src/nu/validator/htmlparser/test/TokenizerTester.java b/src/test/java/nu/validator/htmlparser/test/TokenizerTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/TokenizerTester.java rename to src/test/java/nu/validator/htmlparser/test/TokenizerTester.java diff --git a/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java b/src/test/java/nu/validator/htmlparser/test/TreeDumpContentHandler.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java rename to src/test/java/nu/validator/htmlparser/test/TreeDumpContentHandler.java diff --git a/test-src/nu/validator/htmlparser/test/TreePrinter.java b/src/test/java/nu/validator/htmlparser/test/TreePrinter.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/TreePrinter.java rename to src/test/java/nu/validator/htmlparser/test/TreePrinter.java diff --git a/test-src/nu/validator/htmlparser/test/TreeTester.java b/src/test/java/nu/validator/htmlparser/test/TreeTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/TreeTester.java rename to src/test/java/nu/validator/htmlparser/test/TreeTester.java diff --git a/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java b/src/test/java/nu/validator/htmlparser/test/UntilHashInputStream.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/UntilHashInputStream.java rename to src/test/java/nu/validator/htmlparser/test/UntilHashInputStream.java diff --git a/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java b/src/test/java/nu/validator/htmlparser/test/XmlSerializerTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/XmlSerializerTester.java rename to src/test/java/nu/validator/htmlparser/test/XmlSerializerTester.java diff --git a/test-src/nu/validator/htmlparser/test/XomTest.java b/src/test/java/nu/validator/htmlparser/test/XomTest.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/XomTest.java rename to src/test/java/nu/validator/htmlparser/test/XomTest.java diff --git a/test-src/nu/validator/htmlparser/test/package.html b/src/test/java/nu/validator/htmlparser/test/package.html similarity index 100% rename from test-src/nu/validator/htmlparser/test/package.html rename to src/test/java/nu/validator/htmlparser/test/package.html diff --git a/test-src/nu/validator/saxtree/test/PassThruPrinter.java b/src/test/java/nu/validator/saxtree/test/PassThruPrinter.java similarity index 100% rename from test-src/nu/validator/saxtree/test/PassThruPrinter.java rename to src/test/java/nu/validator/saxtree/test/PassThruPrinter.java diff --git a/test-src/nu/validator/saxtree/test/package.html b/src/test/java/nu/validator/saxtree/test/package.html similarity index 100% rename from test-src/nu/validator/saxtree/test/package.html rename to src/test/java/nu/validator/saxtree/test/package.html From 44b25b68c3c87add80751b197acadb7374459611 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 09:03:55 +0200 Subject: [PATCH 16/30] Fix Maven build --- pom.xml | 149 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 98 insertions(+), 51 deletions(-) diff --git a/pom.xml b/pom.xml index 65031dc3..03a6d589 100644 --- a/pom.xml +++ b/pom.xml @@ -75,6 +75,9 @@ + 1.5 + 1.5 + true UTF-8 @@ -109,77 +112,121 @@ - ${project.build.directory}/src - ${basedir}/test-src + + + + maven-clean-plugin + 3.1.0 + + + maven-compiler-plugin + 3.8.1 + + + maven-javadoc-plugin + 3.2.0 + + + maven-resources-plugin + 3.2.0 + + + maven-source-plugin + 3.2.1 + + + maven-surefire-plugin + 2.22.2 + + + org.codehaus.mojo + exec-maven-plugin + 3.0.0 + + + - org.apache.maven.plugins - maven-compiler-plugin - - 1.5 - 1.5 - + maven-resources-plugin + + + backup-tokenizer + process-sources + + copy-resources + + + true + ${project.build.directory}/hotspot-backup + + + src/main/java/nu/validator/htmlparser/impl + +
  • Tokenizer.java
  • +
    +
    +
    +
    +
    + + restore-tokenizer + process-classes + + copy-resources + + + true + src/main/java/nu/validator/htmlparser/impl + + + ${project.build.directory}/hotspot-backup + +
  • Tokenizer.java
  • +
    +
    +
    +
    +
    +
    - maven-antrun-plugin - 1.7 - - - com.sun - tools - 1.5.0 - system - ${java.home}/../lib/tools.jar - - + org.codehaus.mojo + exec-maven-plugin - intitialize-sources - initialize + tokenizer-hotspot-workaround-javac + process-sources - run + exec - - - - - - - + javac + + ${project.basedir}/src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java + -d + ${project.build.directory}/hotspot-classes + - tokenizer-hotspot-workaround + tokenizer-hotspot-workaround-java process-sources - run + exec - - - - - - - - - - - - - + java + + -cp + ${project.build.directory}/hotspot-classes + nu.validator.htmlparser.generator.ApplyHotSpotWorkaround + ${project.build.sourceDirectory}/nu/validator/htmlparser/impl/Tokenizer.java + ${project.basedir}/src/hotspot/resources/HotSpotWorkaround.txt + - - org.apache.maven.plugins - maven-surefire-plugin - - true - -
    From c7c30d7385de92cc8776793b4352c264ccedd504 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:58:49 +0200 Subject: [PATCH 17/30] Split into separate Maven modules Note that the saxtree test is in the htmlparser module due to its dependence on XmlSerializer. --- .../nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java | 0 {src => htmlparser/src}/hotspot/resources/HotSpotWorkaround.txt | 0 .../src}/main/java/nu/validator/htmlparser/annotation/Auto.java | 0 .../java/nu/validator/htmlparser/annotation/CharacterName.java | 0 .../src}/main/java/nu/validator/htmlparser/annotation/Const.java | 0 .../main/java/nu/validator/htmlparser/annotation/Creator.java | 0 .../main/java/nu/validator/htmlparser/annotation/HtmlCreator.java | 0 .../src}/main/java/nu/validator/htmlparser/annotation/IdType.java | 0 .../src}/main/java/nu/validator/htmlparser/annotation/Inline.java | 0 .../main/java/nu/validator/htmlparser/annotation/Literal.java | 0 .../src}/main/java/nu/validator/htmlparser/annotation/Local.java | 0 .../main/java/nu/validator/htmlparser/annotation/NoLength.java | 0 .../src}/main/java/nu/validator/htmlparser/annotation/NsUri.java | 0 .../src}/main/java/nu/validator/htmlparser/annotation/Prefix.java | 0 .../src}/main/java/nu/validator/htmlparser/annotation/QName.java | 0 .../main/java/nu/validator/htmlparser/annotation/SvgCreator.java | 0 .../main/java/nu/validator/htmlparser/annotation/Unsigned.java | 0 .../main/java/nu/validator/htmlparser/annotation/Virtual.java | 0 .../main/java/nu/validator/htmlparser/annotation/package.html | 0 .../main/java/nu/validator/htmlparser/common/ByteReadable.java | 0 .../java/nu/validator/htmlparser/common/CharacterHandler.java | 0 .../main/java/nu/validator/htmlparser/common/DocumentMode.java | 0 .../java/nu/validator/htmlparser/common/DocumentModeHandler.java | 0 .../validator/htmlparser/common/EncodingDeclarationHandler.java | 0 .../src}/main/java/nu/validator/htmlparser/common/Heuristics.java | 0 .../src}/main/java/nu/validator/htmlparser/common/Interner.java | 0 .../main/java/nu/validator/htmlparser/common/TokenHandler.java | 0 .../java/nu/validator/htmlparser/common/TransitionHandler.java | 0 .../java/nu/validator/htmlparser/common/XmlViolationPolicy.java | 0 .../src}/main/java/nu/validator/htmlparser/common/package.html | 0 .../main/java/nu/validator/htmlparser/dom/DOMTreeBuilder.java | 0 .../src}/main/java/nu/validator/htmlparser/dom/Dom2Sax.java | 0 .../java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java | 0 .../src}/main/java/nu/validator/htmlparser/dom/package.html | 0 .../main/java/nu/validator/htmlparser/extra/ChardetSniffer.java | 0 .../java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java | 0 .../java/nu/validator/htmlparser/extra/NormalizationChecker.java | 0 .../main/java/nu/validator/htmlparser/impl/AttributeName.java | 0 .../java/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java | 0 .../src}/main/java/nu/validator/htmlparser/impl/ElementName.java | 0 .../nu/validator/htmlparser/impl/ErrorReportingTokenizer.java | 0 .../main/java/nu/validator/htmlparser/impl/HtmlAttributes.java | 0 .../src}/main/java/nu/validator/htmlparser/impl/LocatorImpl.java | 0 .../src}/main/java/nu/validator/htmlparser/impl/MetaScanner.java | 0 .../src}/main/java/nu/validator/htmlparser/impl/NCName.java | 0 .../main/java/nu/validator/htmlparser/impl/NamedCharacters.java | 0 .../java/nu/validator/htmlparser/impl/NamedCharactersAccel.java | 0 .../src}/main/java/nu/validator/htmlparser/impl/Portability.java | 0 .../main/java/nu/validator/htmlparser/impl/PushedLocation.java | 0 .../src}/main/java/nu/validator/htmlparser/impl/StackNode.java | 0 .../main/java/nu/validator/htmlparser/impl/StateSnapshot.java | 0 .../java/nu/validator/htmlparser/impl/TaintableLocatorImpl.java | 0 .../src}/main/java/nu/validator/htmlparser/impl/Tokenizer.java | 0 .../src}/main/java/nu/validator/htmlparser/impl/TreeBuilder.java | 0 .../main/java/nu/validator/htmlparser/impl/TreeBuilderState.java | 0 .../src}/main/java/nu/validator/htmlparser/impl/UTF16Buffer.java | 0 .../src}/main/java/nu/validator/htmlparser/impl/package.html | 0 .../src}/main/java/nu/validator/htmlparser/io/BomSniffer.java | 0 .../src}/main/java/nu/validator/htmlparser/io/Confidence.java | 0 .../src}/main/java/nu/validator/htmlparser/io/Driver.java | 0 .../src}/main/java/nu/validator/htmlparser/io/Encoding.java | 0 .../java/nu/validator/htmlparser/io/HtmlInputStreamReader.java | 0 .../src}/main/java/nu/validator/htmlparser/io/MetaSniffer.java | 0 .../main/java/nu/validator/htmlparser/rewindable/Rewindable.java | 0 .../nu/validator/htmlparser/rewindable/RewindableInputStream.java | 0 .../src}/main/java/nu/validator/htmlparser/sax/HtmlParser.java | 0 .../main/java/nu/validator/htmlparser/sax/HtmlSerializer.java | 0 .../nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java | 0 .../nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java | 0 .../src}/main/java/nu/validator/htmlparser/sax/SAXStreamer.java | 0 .../main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java | 0 .../src}/main/java/nu/validator/htmlparser/sax/XmlSerializer.java | 0 .../src}/main/java/nu/validator/htmlparser/sax/package.html | 0 .../test/java/nu/validator/htmlparser/test/DecoderLoopTester.java | 0 .../src}/test/java/nu/validator/htmlparser/test/DomIdTester.java | 0 .../src}/test/java/nu/validator/htmlparser/test/DomTest.java | 0 .../test/java/nu/validator/htmlparser/test/EncodingTester.java | 0 .../java/nu/validator/htmlparser/test/JSONArrayTokenHandler.java | 0 .../test/java/nu/validator/htmlparser/test/ListErrorHandler.java | 0 .../java/nu/validator/htmlparser/test/SystemErrErrorHandler.java | 0 .../src}/test/java/nu/validator/htmlparser/test/TokenPrinter.java | 0 .../test/java/nu/validator/htmlparser/test/TokenizerTester.java | 0 .../java/nu/validator/htmlparser/test/TreeDumpContentHandler.java | 0 .../src}/test/java/nu/validator/htmlparser/test/TreePrinter.java | 0 .../src}/test/java/nu/validator/htmlparser/test/TreeTester.java | 0 .../java/nu/validator/htmlparser/test/UntilHashInputStream.java | 0 .../java/nu/validator/htmlparser/test/XmlSerializerTester.java | 0 .../src}/test/java/nu/validator/htmlparser/test/package.html | 0 .../src}/test/java/nu/validator/saxtree/test/PassThruPrinter.java | 0 .../src}/test/java/nu/validator/saxtree/test/package.html | 0 {src => saxtree/src}/main/java/nu/validator/saxtree/CDATA.java | 0 .../src}/main/java/nu/validator/saxtree/CharBufferNode.java | 0 .../src}/main/java/nu/validator/saxtree/Characters.java | 0 {src => saxtree/src}/main/java/nu/validator/saxtree/Comment.java | 0 {src => saxtree/src}/main/java/nu/validator/saxtree/DTD.java | 0 {src => saxtree/src}/main/java/nu/validator/saxtree/Document.java | 0 .../src}/main/java/nu/validator/saxtree/DocumentFragment.java | 0 {src => saxtree/src}/main/java/nu/validator/saxtree/Element.java | 0 {src => saxtree/src}/main/java/nu/validator/saxtree/Entity.java | 0 .../src}/main/java/nu/validator/saxtree/IgnorableWhitespace.java | 0 .../src}/main/java/nu/validator/saxtree/LocatorImpl.java | 0 {src => saxtree/src}/main/java/nu/validator/saxtree/Node.java | 0 {src => saxtree/src}/main/java/nu/validator/saxtree/NodeType.java | 0 .../src}/main/java/nu/validator/saxtree/NullLexicalHandler.java | 0 .../src}/main/java/nu/validator/saxtree/ParentNode.java | 0 .../src}/main/java/nu/validator/saxtree/PrefixMapping.java | 0 .../main/java/nu/validator/saxtree/ProcessingInstruction.java | 0 .../src}/main/java/nu/validator/saxtree/SkippedEntity.java | 0 .../src}/main/java/nu/validator/saxtree/TreeBuilder.java | 0 .../src}/main/java/nu/validator/saxtree/TreeParser.java | 0 {src => saxtree/src}/main/java/nu/validator/saxtree/package.html | 0 .../src}/main/java/nu/validator/htmlparser/xom/FormPointer.java | 0 .../main/java/nu/validator/htmlparser/xom/FormPtrElement.java | 0 .../src}/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java | 0 .../src}/main/java/nu/validator/htmlparser/xom/ModalDocument.java | 0 {src => xom/src}/main/java/nu/validator/htmlparser/xom/Mode.java | 0 .../main/java/nu/validator/htmlparser/xom/SimpleNodeFactory.java | 0 .../main/java/nu/validator/htmlparser/xom/XOMTreeBuilder.java | 0 .../src}/main/java/nu/validator/htmlparser/xom/package.html | 0 .../src}/test/java/nu/validator/htmlparser/test/XomTest.java | 0 120 files changed, 0 insertions(+), 0 deletions(-) rename {src => htmlparser/src}/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java (100%) rename {src => htmlparser/src}/hotspot/resources/HotSpotWorkaround.txt (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/Auto.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/CharacterName.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/Const.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/Creator.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/HtmlCreator.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/IdType.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/Inline.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/Literal.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/Local.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/NoLength.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/NsUri.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/Prefix.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/QName.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/SvgCreator.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/Unsigned.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/Virtual.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/annotation/package.html (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/ByteReadable.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/CharacterHandler.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/DocumentMode.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/DocumentModeHandler.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/EncodingDeclarationHandler.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/Heuristics.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/Interner.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/TokenHandler.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/TransitionHandler.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/XmlViolationPolicy.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/common/package.html (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/dom/DOMTreeBuilder.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/dom/Dom2Sax.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/dom/package.html (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/AttributeName.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/ElementName.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/LocatorImpl.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/MetaScanner.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/NCName.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/NamedCharacters.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/NamedCharactersAccel.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/Portability.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/PushedLocation.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/StackNode.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/StateSnapshot.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/TaintableLocatorImpl.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/Tokenizer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/TreeBuilder.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/UTF16Buffer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/impl/package.html (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/io/BomSniffer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/io/Confidence.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/io/Driver.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/io/Encoding.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/io/MetaSniffer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/rewindable/Rewindable.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/rewindable/RewindableInputStream.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/sax/HtmlParser.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/sax/SAXStreamer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/sax/XmlSerializer.java (100%) rename {src => htmlparser/src}/main/java/nu/validator/htmlparser/sax/package.html (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/DecoderLoopTester.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/DomIdTester.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/DomTest.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/EncodingTester.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/JSONArrayTokenHandler.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/ListErrorHandler.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/SystemErrErrorHandler.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/TokenPrinter.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/TokenizerTester.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/TreeDumpContentHandler.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/TreePrinter.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/TreeTester.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/UntilHashInputStream.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/XmlSerializerTester.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/htmlparser/test/package.html (100%) rename {src => htmlparser/src}/test/java/nu/validator/saxtree/test/PassThruPrinter.java (100%) rename {src => htmlparser/src}/test/java/nu/validator/saxtree/test/package.html (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/CDATA.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/CharBufferNode.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/Characters.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/Comment.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/DTD.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/Document.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/DocumentFragment.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/Element.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/Entity.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/IgnorableWhitespace.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/LocatorImpl.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/Node.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/NodeType.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/NullLexicalHandler.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/ParentNode.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/PrefixMapping.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/ProcessingInstruction.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/SkippedEntity.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/TreeBuilder.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/TreeParser.java (100%) rename {src => saxtree/src}/main/java/nu/validator/saxtree/package.html (100%) rename {src => xom/src}/main/java/nu/validator/htmlparser/xom/FormPointer.java (100%) rename {src => xom/src}/main/java/nu/validator/htmlparser/xom/FormPtrElement.java (100%) rename {src => xom/src}/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java (100%) rename {src => xom/src}/main/java/nu/validator/htmlparser/xom/ModalDocument.java (100%) rename {src => xom/src}/main/java/nu/validator/htmlparser/xom/Mode.java (100%) rename {src => xom/src}/main/java/nu/validator/htmlparser/xom/SimpleNodeFactory.java (100%) rename {src => xom/src}/main/java/nu/validator/htmlparser/xom/XOMTreeBuilder.java (100%) rename {src => xom/src}/main/java/nu/validator/htmlparser/xom/package.html (100%) rename {src => xom/src}/test/java/nu/validator/htmlparser/test/XomTest.java (100%) diff --git a/src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java b/htmlparser/src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java similarity index 100% rename from src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java rename to htmlparser/src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java diff --git a/src/hotspot/resources/HotSpotWorkaround.txt b/htmlparser/src/hotspot/resources/HotSpotWorkaround.txt similarity index 100% rename from src/hotspot/resources/HotSpotWorkaround.txt rename to htmlparser/src/hotspot/resources/HotSpotWorkaround.txt diff --git a/src/main/java/nu/validator/htmlparser/annotation/Auto.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Auto.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/Auto.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Auto.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/CharacterName.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/CharacterName.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/CharacterName.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/CharacterName.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/Const.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Const.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/Const.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Const.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/Creator.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Creator.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/Creator.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Creator.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/HtmlCreator.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/HtmlCreator.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/HtmlCreator.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/HtmlCreator.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/IdType.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/IdType.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/IdType.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/IdType.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/Inline.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Inline.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/Inline.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Inline.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/Literal.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Literal.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/Literal.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Literal.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/Local.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Local.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/Local.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Local.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/NoLength.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/NoLength.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/NoLength.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/NoLength.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/NsUri.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/NsUri.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/NsUri.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/NsUri.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/Prefix.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Prefix.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/Prefix.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Prefix.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/QName.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/QName.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/QName.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/QName.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/SvgCreator.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/SvgCreator.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/SvgCreator.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/SvgCreator.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/Unsigned.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Unsigned.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/Unsigned.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Unsigned.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/Virtual.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Virtual.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/Virtual.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Virtual.java diff --git a/src/main/java/nu/validator/htmlparser/annotation/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/package.html similarity index 100% rename from src/main/java/nu/validator/htmlparser/annotation/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/package.html diff --git a/src/main/java/nu/validator/htmlparser/common/ByteReadable.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/ByteReadable.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/ByteReadable.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/ByteReadable.java diff --git a/src/main/java/nu/validator/htmlparser/common/CharacterHandler.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/CharacterHandler.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/CharacterHandler.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/CharacterHandler.java diff --git a/src/main/java/nu/validator/htmlparser/common/DocumentMode.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/DocumentMode.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/DocumentMode.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/DocumentMode.java diff --git a/src/main/java/nu/validator/htmlparser/common/DocumentModeHandler.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/DocumentModeHandler.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/DocumentModeHandler.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/DocumentModeHandler.java diff --git a/src/main/java/nu/validator/htmlparser/common/EncodingDeclarationHandler.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/EncodingDeclarationHandler.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/EncodingDeclarationHandler.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/EncodingDeclarationHandler.java diff --git a/src/main/java/nu/validator/htmlparser/common/Heuristics.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/Heuristics.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/Heuristics.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/Heuristics.java diff --git a/src/main/java/nu/validator/htmlparser/common/Interner.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/Interner.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/Interner.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/Interner.java diff --git a/src/main/java/nu/validator/htmlparser/common/TokenHandler.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/TokenHandler.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/TokenHandler.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/TokenHandler.java diff --git a/src/main/java/nu/validator/htmlparser/common/TransitionHandler.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/TransitionHandler.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/TransitionHandler.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/TransitionHandler.java diff --git a/src/main/java/nu/validator/htmlparser/common/XmlViolationPolicy.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/XmlViolationPolicy.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/XmlViolationPolicy.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/XmlViolationPolicy.java diff --git a/src/main/java/nu/validator/htmlparser/common/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/common/package.html similarity index 100% rename from src/main/java/nu/validator/htmlparser/common/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/common/package.html diff --git a/src/main/java/nu/validator/htmlparser/dom/DOMTreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/dom/DOMTreeBuilder.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/dom/DOMTreeBuilder.java rename to htmlparser/src/main/java/nu/validator/htmlparser/dom/DOMTreeBuilder.java diff --git a/src/main/java/nu/validator/htmlparser/dom/Dom2Sax.java b/htmlparser/src/main/java/nu/validator/htmlparser/dom/Dom2Sax.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/dom/Dom2Sax.java rename to htmlparser/src/main/java/nu/validator/htmlparser/dom/Dom2Sax.java diff --git a/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java rename to htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java diff --git a/src/main/java/nu/validator/htmlparser/dom/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/dom/package.html similarity index 100% rename from src/main/java/nu/validator/htmlparser/dom/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/dom/package.html diff --git a/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java diff --git a/src/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java diff --git a/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java rename to htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java diff --git a/src/main/java/nu/validator/htmlparser/impl/AttributeName.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/AttributeName.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/AttributeName.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/AttributeName.java diff --git a/src/main/java/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java diff --git a/src/main/java/nu/validator/htmlparser/impl/ElementName.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/ElementName.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/ElementName.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/ElementName.java diff --git a/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java diff --git a/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java diff --git a/src/main/java/nu/validator/htmlparser/impl/LocatorImpl.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/LocatorImpl.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/LocatorImpl.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/LocatorImpl.java diff --git a/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/MetaScanner.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java diff --git a/src/main/java/nu/validator/htmlparser/impl/NCName.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/NCName.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/NCName.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/NCName.java diff --git a/src/main/java/nu/validator/htmlparser/impl/NamedCharacters.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/NamedCharacters.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/NamedCharacters.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/NamedCharacters.java diff --git a/src/main/java/nu/validator/htmlparser/impl/NamedCharactersAccel.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/NamedCharactersAccel.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/NamedCharactersAccel.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/NamedCharactersAccel.java diff --git a/src/main/java/nu/validator/htmlparser/impl/Portability.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/Portability.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/Portability.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/Portability.java diff --git a/src/main/java/nu/validator/htmlparser/impl/PushedLocation.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/PushedLocation.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/PushedLocation.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/PushedLocation.java diff --git a/src/main/java/nu/validator/htmlparser/impl/StackNode.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/StackNode.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/StackNode.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/StackNode.java diff --git a/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java diff --git a/src/main/java/nu/validator/htmlparser/impl/TaintableLocatorImpl.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TaintableLocatorImpl.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/TaintableLocatorImpl.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/TaintableLocatorImpl.java diff --git a/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/Tokenizer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java diff --git a/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java diff --git a/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java diff --git a/src/main/java/nu/validator/htmlparser/impl/UTF16Buffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/UTF16Buffer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/UTF16Buffer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/UTF16Buffer.java diff --git a/src/main/java/nu/validator/htmlparser/impl/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/impl/package.html similarity index 100% rename from src/main/java/nu/validator/htmlparser/impl/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/package.html diff --git a/src/main/java/nu/validator/htmlparser/io/BomSniffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/BomSniffer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/io/BomSniffer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/BomSniffer.java diff --git a/src/main/java/nu/validator/htmlparser/io/Confidence.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/Confidence.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/io/Confidence.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/Confidence.java diff --git a/src/main/java/nu/validator/htmlparser/io/Driver.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/Driver.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/io/Driver.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/Driver.java diff --git a/src/main/java/nu/validator/htmlparser/io/Encoding.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/Encoding.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/io/Encoding.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/Encoding.java diff --git a/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java diff --git a/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/io/MetaSniffer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java diff --git a/src/main/java/nu/validator/htmlparser/rewindable/Rewindable.java b/htmlparser/src/main/java/nu/validator/htmlparser/rewindable/Rewindable.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/rewindable/Rewindable.java rename to htmlparser/src/main/java/nu/validator/htmlparser/rewindable/Rewindable.java diff --git a/src/main/java/nu/validator/htmlparser/rewindable/RewindableInputStream.java b/htmlparser/src/main/java/nu/validator/htmlparser/rewindable/RewindableInputStream.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/rewindable/RewindableInputStream.java rename to htmlparser/src/main/java/nu/validator/htmlparser/rewindable/RewindableInputStream.java diff --git a/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/sax/HtmlParser.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java diff --git a/src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java diff --git a/src/main/java/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java diff --git a/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java diff --git a/src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java diff --git a/src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java diff --git a/src/main/java/nu/validator/htmlparser/sax/XmlSerializer.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/XmlSerializer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/sax/XmlSerializer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/XmlSerializer.java diff --git a/src/main/java/nu/validator/htmlparser/sax/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/sax/package.html similarity index 100% rename from src/main/java/nu/validator/htmlparser/sax/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/package.html diff --git a/src/test/java/nu/validator/htmlparser/test/DecoderLoopTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/DecoderLoopTester.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/DecoderLoopTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/DecoderLoopTester.java diff --git a/src/test/java/nu/validator/htmlparser/test/DomIdTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/DomIdTester.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/DomIdTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/DomIdTester.java diff --git a/src/test/java/nu/validator/htmlparser/test/DomTest.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/DomTest.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/DomTest.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/DomTest.java diff --git a/src/test/java/nu/validator/htmlparser/test/EncodingTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/EncodingTester.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/EncodingTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/EncodingTester.java diff --git a/src/test/java/nu/validator/htmlparser/test/JSONArrayTokenHandler.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/JSONArrayTokenHandler.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/JSONArrayTokenHandler.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/JSONArrayTokenHandler.java diff --git a/src/test/java/nu/validator/htmlparser/test/ListErrorHandler.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/ListErrorHandler.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/ListErrorHandler.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/ListErrorHandler.java diff --git a/src/test/java/nu/validator/htmlparser/test/SystemErrErrorHandler.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/SystemErrErrorHandler.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/SystemErrErrorHandler.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/SystemErrErrorHandler.java diff --git a/src/test/java/nu/validator/htmlparser/test/TokenPrinter.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/TokenPrinter.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/TokenPrinter.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/TokenPrinter.java diff --git a/src/test/java/nu/validator/htmlparser/test/TokenizerTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/TokenizerTester.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/TokenizerTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/TokenizerTester.java diff --git a/src/test/java/nu/validator/htmlparser/test/TreeDumpContentHandler.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/TreeDumpContentHandler.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/TreeDumpContentHandler.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/TreeDumpContentHandler.java diff --git a/src/test/java/nu/validator/htmlparser/test/TreePrinter.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/TreePrinter.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/TreePrinter.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/TreePrinter.java diff --git a/src/test/java/nu/validator/htmlparser/test/TreeTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/TreeTester.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/TreeTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/TreeTester.java diff --git a/src/test/java/nu/validator/htmlparser/test/UntilHashInputStream.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/UntilHashInputStream.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/UntilHashInputStream.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/UntilHashInputStream.java diff --git a/src/test/java/nu/validator/htmlparser/test/XmlSerializerTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/XmlSerializerTester.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/XmlSerializerTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/XmlSerializerTester.java diff --git a/src/test/java/nu/validator/htmlparser/test/package.html b/htmlparser/src/test/java/nu/validator/htmlparser/test/package.html similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/package.html rename to htmlparser/src/test/java/nu/validator/htmlparser/test/package.html diff --git a/src/test/java/nu/validator/saxtree/test/PassThruPrinter.java b/htmlparser/src/test/java/nu/validator/saxtree/test/PassThruPrinter.java similarity index 100% rename from src/test/java/nu/validator/saxtree/test/PassThruPrinter.java rename to htmlparser/src/test/java/nu/validator/saxtree/test/PassThruPrinter.java diff --git a/src/test/java/nu/validator/saxtree/test/package.html b/htmlparser/src/test/java/nu/validator/saxtree/test/package.html similarity index 100% rename from src/test/java/nu/validator/saxtree/test/package.html rename to htmlparser/src/test/java/nu/validator/saxtree/test/package.html diff --git a/src/main/java/nu/validator/saxtree/CDATA.java b/saxtree/src/main/java/nu/validator/saxtree/CDATA.java similarity index 100% rename from src/main/java/nu/validator/saxtree/CDATA.java rename to saxtree/src/main/java/nu/validator/saxtree/CDATA.java diff --git a/src/main/java/nu/validator/saxtree/CharBufferNode.java b/saxtree/src/main/java/nu/validator/saxtree/CharBufferNode.java similarity index 100% rename from src/main/java/nu/validator/saxtree/CharBufferNode.java rename to saxtree/src/main/java/nu/validator/saxtree/CharBufferNode.java diff --git a/src/main/java/nu/validator/saxtree/Characters.java b/saxtree/src/main/java/nu/validator/saxtree/Characters.java similarity index 100% rename from src/main/java/nu/validator/saxtree/Characters.java rename to saxtree/src/main/java/nu/validator/saxtree/Characters.java diff --git a/src/main/java/nu/validator/saxtree/Comment.java b/saxtree/src/main/java/nu/validator/saxtree/Comment.java similarity index 100% rename from src/main/java/nu/validator/saxtree/Comment.java rename to saxtree/src/main/java/nu/validator/saxtree/Comment.java diff --git a/src/main/java/nu/validator/saxtree/DTD.java b/saxtree/src/main/java/nu/validator/saxtree/DTD.java similarity index 100% rename from src/main/java/nu/validator/saxtree/DTD.java rename to saxtree/src/main/java/nu/validator/saxtree/DTD.java diff --git a/src/main/java/nu/validator/saxtree/Document.java b/saxtree/src/main/java/nu/validator/saxtree/Document.java similarity index 100% rename from src/main/java/nu/validator/saxtree/Document.java rename to saxtree/src/main/java/nu/validator/saxtree/Document.java diff --git a/src/main/java/nu/validator/saxtree/DocumentFragment.java b/saxtree/src/main/java/nu/validator/saxtree/DocumentFragment.java similarity index 100% rename from src/main/java/nu/validator/saxtree/DocumentFragment.java rename to saxtree/src/main/java/nu/validator/saxtree/DocumentFragment.java diff --git a/src/main/java/nu/validator/saxtree/Element.java b/saxtree/src/main/java/nu/validator/saxtree/Element.java similarity index 100% rename from src/main/java/nu/validator/saxtree/Element.java rename to saxtree/src/main/java/nu/validator/saxtree/Element.java diff --git a/src/main/java/nu/validator/saxtree/Entity.java b/saxtree/src/main/java/nu/validator/saxtree/Entity.java similarity index 100% rename from src/main/java/nu/validator/saxtree/Entity.java rename to saxtree/src/main/java/nu/validator/saxtree/Entity.java diff --git a/src/main/java/nu/validator/saxtree/IgnorableWhitespace.java b/saxtree/src/main/java/nu/validator/saxtree/IgnorableWhitespace.java similarity index 100% rename from src/main/java/nu/validator/saxtree/IgnorableWhitespace.java rename to saxtree/src/main/java/nu/validator/saxtree/IgnorableWhitespace.java diff --git a/src/main/java/nu/validator/saxtree/LocatorImpl.java b/saxtree/src/main/java/nu/validator/saxtree/LocatorImpl.java similarity index 100% rename from src/main/java/nu/validator/saxtree/LocatorImpl.java rename to saxtree/src/main/java/nu/validator/saxtree/LocatorImpl.java diff --git a/src/main/java/nu/validator/saxtree/Node.java b/saxtree/src/main/java/nu/validator/saxtree/Node.java similarity index 100% rename from src/main/java/nu/validator/saxtree/Node.java rename to saxtree/src/main/java/nu/validator/saxtree/Node.java diff --git a/src/main/java/nu/validator/saxtree/NodeType.java b/saxtree/src/main/java/nu/validator/saxtree/NodeType.java similarity index 100% rename from src/main/java/nu/validator/saxtree/NodeType.java rename to saxtree/src/main/java/nu/validator/saxtree/NodeType.java diff --git a/src/main/java/nu/validator/saxtree/NullLexicalHandler.java b/saxtree/src/main/java/nu/validator/saxtree/NullLexicalHandler.java similarity index 100% rename from src/main/java/nu/validator/saxtree/NullLexicalHandler.java rename to saxtree/src/main/java/nu/validator/saxtree/NullLexicalHandler.java diff --git a/src/main/java/nu/validator/saxtree/ParentNode.java b/saxtree/src/main/java/nu/validator/saxtree/ParentNode.java similarity index 100% rename from src/main/java/nu/validator/saxtree/ParentNode.java rename to saxtree/src/main/java/nu/validator/saxtree/ParentNode.java diff --git a/src/main/java/nu/validator/saxtree/PrefixMapping.java b/saxtree/src/main/java/nu/validator/saxtree/PrefixMapping.java similarity index 100% rename from src/main/java/nu/validator/saxtree/PrefixMapping.java rename to saxtree/src/main/java/nu/validator/saxtree/PrefixMapping.java diff --git a/src/main/java/nu/validator/saxtree/ProcessingInstruction.java b/saxtree/src/main/java/nu/validator/saxtree/ProcessingInstruction.java similarity index 100% rename from src/main/java/nu/validator/saxtree/ProcessingInstruction.java rename to saxtree/src/main/java/nu/validator/saxtree/ProcessingInstruction.java diff --git a/src/main/java/nu/validator/saxtree/SkippedEntity.java b/saxtree/src/main/java/nu/validator/saxtree/SkippedEntity.java similarity index 100% rename from src/main/java/nu/validator/saxtree/SkippedEntity.java rename to saxtree/src/main/java/nu/validator/saxtree/SkippedEntity.java diff --git a/src/main/java/nu/validator/saxtree/TreeBuilder.java b/saxtree/src/main/java/nu/validator/saxtree/TreeBuilder.java similarity index 100% rename from src/main/java/nu/validator/saxtree/TreeBuilder.java rename to saxtree/src/main/java/nu/validator/saxtree/TreeBuilder.java diff --git a/src/main/java/nu/validator/saxtree/TreeParser.java b/saxtree/src/main/java/nu/validator/saxtree/TreeParser.java similarity index 100% rename from src/main/java/nu/validator/saxtree/TreeParser.java rename to saxtree/src/main/java/nu/validator/saxtree/TreeParser.java diff --git a/src/main/java/nu/validator/saxtree/package.html b/saxtree/src/main/java/nu/validator/saxtree/package.html similarity index 100% rename from src/main/java/nu/validator/saxtree/package.html rename to saxtree/src/main/java/nu/validator/saxtree/package.html diff --git a/src/main/java/nu/validator/htmlparser/xom/FormPointer.java b/xom/src/main/java/nu/validator/htmlparser/xom/FormPointer.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/xom/FormPointer.java rename to xom/src/main/java/nu/validator/htmlparser/xom/FormPointer.java diff --git a/src/main/java/nu/validator/htmlparser/xom/FormPtrElement.java b/xom/src/main/java/nu/validator/htmlparser/xom/FormPtrElement.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/xom/FormPtrElement.java rename to xom/src/main/java/nu/validator/htmlparser/xom/FormPtrElement.java diff --git a/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java b/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java rename to xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java diff --git a/src/main/java/nu/validator/htmlparser/xom/ModalDocument.java b/xom/src/main/java/nu/validator/htmlparser/xom/ModalDocument.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/xom/ModalDocument.java rename to xom/src/main/java/nu/validator/htmlparser/xom/ModalDocument.java diff --git a/src/main/java/nu/validator/htmlparser/xom/Mode.java b/xom/src/main/java/nu/validator/htmlparser/xom/Mode.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/xom/Mode.java rename to xom/src/main/java/nu/validator/htmlparser/xom/Mode.java diff --git a/src/main/java/nu/validator/htmlparser/xom/SimpleNodeFactory.java b/xom/src/main/java/nu/validator/htmlparser/xom/SimpleNodeFactory.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/xom/SimpleNodeFactory.java rename to xom/src/main/java/nu/validator/htmlparser/xom/SimpleNodeFactory.java diff --git a/src/main/java/nu/validator/htmlparser/xom/XOMTreeBuilder.java b/xom/src/main/java/nu/validator/htmlparser/xom/XOMTreeBuilder.java similarity index 100% rename from src/main/java/nu/validator/htmlparser/xom/XOMTreeBuilder.java rename to xom/src/main/java/nu/validator/htmlparser/xom/XOMTreeBuilder.java diff --git a/src/main/java/nu/validator/htmlparser/xom/package.html b/xom/src/main/java/nu/validator/htmlparser/xom/package.html similarity index 100% rename from src/main/java/nu/validator/htmlparser/xom/package.html rename to xom/src/main/java/nu/validator/htmlparser/xom/package.html diff --git a/src/test/java/nu/validator/htmlparser/test/XomTest.java b/xom/src/test/java/nu/validator/htmlparser/test/XomTest.java similarity index 100% rename from src/test/java/nu/validator/htmlparser/test/XomTest.java rename to xom/src/test/java/nu/validator/htmlparser/test/XomTest.java From 7c8b11f048b1c7e981d6414377bcb7154ddeb4d3 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 07:39:08 +0200 Subject: [PATCH 18/30] Fix Maven build --- htmlparser/pom.xml | 146 +++++++++++++++++++++++++++++++++++++++++++++ pom.xml | 144 +++++++++----------------------------------- saxtree/pom.xml | 35 +++++++++++ xom/pom.xml | 47 +++++++++++++++ 4 files changed, 257 insertions(+), 115 deletions(-) create mode 100644 htmlparser/pom.xml create mode 100644 saxtree/pom.xml create mode 100644 xom/pom.xml diff --git a/htmlparser/pom.xml b/htmlparser/pom.xml new file mode 100644 index 00000000..e6364e4c --- /dev/null +++ b/htmlparser/pom.xml @@ -0,0 +1,146 @@ + + + 4.0.0 + + + nu.validator.htmlparser + parent + 1.4 + + + htmlparser + + htmlparser + + + + nu.validator.htmlparser + saxtree + + + com.ibm.icu + icu4j + 4.0.1 + true + + + net.sourceforge.jchardet + jchardet + 1.0 + true + + + com.sdicons.jsontools + jsontools-core + 1.4 + test + + + + + + + maven-resources-plugin + + + backup-tokenizer + process-sources + + copy-resources + + + true + ${project.build.directory}/hotspot-backup + + + src/main/java/nu/validator/htmlparser/impl + +
  • Tokenizer.java
  • +
    +
    +
    +
    +
    + + restore-tokenizer + process-classes + + copy-resources + + + true + src/main/java/nu/validator/htmlparser/impl + + + ${project.build.directory}/hotspot-backup + +
  • Tokenizer.java
  • +
    +
    +
    +
    +
    +
    +
    + + org.codehaus.mojo + exec-maven-plugin + + + tokenizer-hotspot-workaround-javac + process-sources + + exec + + + javac + + ${project.basedir}/src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java + -d + ${project.build.directory}/hotspot-classes + + + + + tokenizer-hotspot-workaround-java + process-sources + + exec + + + java + + -cp + ${project.build.directory}/hotspot-classes + nu.validator.htmlparser.generator.ApplyHotSpotWorkaround + ${project.build.sourceDirectory}/nu/validator/htmlparser/impl/Tokenizer.java + ${project.basedir}/src/hotspot/resources/HotSpotWorkaround.txt + + + + + +
    +
    +
    diff --git a/pom.xml b/pom.xml index 03a6d589..d2963348 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,7 @@ To build and sign, run: mvn clean source:jar javadoc:jar package gpg:sign - This POM file is used for creating the JAR for distribution via the + This POM file is used for creating the JARs for distribution via the Maven Central Repository. It is not used as part of the normal development process of the parser and the maintainer of the parser (Henri Sivonen) isn't experienced in POM tweaking. If you need this POM to do something @@ -41,10 +41,11 @@ --> nu.validator.htmlparser - htmlparser + parent 1.4 + pom - htmlparser + parent The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser. https://about.validator.nu/htmlparser/ @@ -69,6 +70,12 @@ + + htmlparser + saxtree + xom + + scm:git:https://github.com/validator/htmlparser.git https://github.com/validator/htmlparser @@ -81,35 +88,25 @@ UTF-8 - - - com.ibm.icu - icu4j - 4.0.1 - compile - true - - - xom - xom - 1.1 - compile - true - - - net.sourceforge.jchardet - jchardet - 1.0 - compile - true - - - com.sdicons.jsontools - jsontools-core - 1.4 - test - - + + + + nu.validator.htmlparser + htmlparser + 1.4 + + + nu.validator.htmlparser + saxtree + 1.4 + + + nu.validator.htmlparser + xom + 1.4 + + + @@ -145,88 +142,5 @@ - - - maven-resources-plugin - - - backup-tokenizer - process-sources - - copy-resources - - - true - ${project.build.directory}/hotspot-backup - - - src/main/java/nu/validator/htmlparser/impl - -
  • Tokenizer.java
  • -
    -
    -
    -
    -
    - - restore-tokenizer - process-classes - - copy-resources - - - true - src/main/java/nu/validator/htmlparser/impl - - - ${project.build.directory}/hotspot-backup - -
  • Tokenizer.java
  • -
    -
    -
    -
    -
    -
    -
    - - org.codehaus.mojo - exec-maven-plugin - - - tokenizer-hotspot-workaround-javac - process-sources - - exec - - - javac - - ${project.basedir}/src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java - -d - ${project.build.directory}/hotspot-classes - - - - - tokenizer-hotspot-workaround-java - process-sources - - exec - - - java - - -cp - ${project.build.directory}/hotspot-classes - nu.validator.htmlparser.generator.ApplyHotSpotWorkaround - ${project.build.sourceDirectory}/nu/validator/htmlparser/impl/Tokenizer.java - ${project.basedir}/src/hotspot/resources/HotSpotWorkaround.txt - - - - - -
    diff --git a/saxtree/pom.xml b/saxtree/pom.xml new file mode 100644 index 00000000..b51ca7bf --- /dev/null +++ b/saxtree/pom.xml @@ -0,0 +1,35 @@ + + + 4.0.0 + + + nu.validator.htmlparser + parent + 1.4 + + + saxtree + + saxtree + diff --git a/xom/pom.xml b/xom/pom.xml new file mode 100644 index 00000000..7f46fec0 --- /dev/null +++ b/xom/pom.xml @@ -0,0 +1,47 @@ + + + 4.0.0 + + + nu.validator.htmlparser + parent + 1.4 + + + xom + + xom + + + + nu.validator.htmlparser + htmlparser + + + xom + xom + 1.1 + + + From 292855077617fa4485e7146760ad63d52668c3d1 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 17:51:58 +0200 Subject: [PATCH 19/30] Fix doclint errors --- .../validator/htmlparser/dom/HtmlDocumentBuilder.java | 6 +++--- .../htmlparser/extra/NormalizationChecker.java | 5 ++--- .../htmlparser/impl/ErrorReportingTokenizer.java | 2 +- .../java/nu/validator/htmlparser/impl/TreeBuilder.java | 2 -- .../validator/htmlparser/io/HtmlInputStreamReader.java | 4 +++- .../java/nu/validator/htmlparser/io/MetaSniffer.java | 1 - .../java/nu/validator/htmlparser/sax/HtmlParser.java | 10 +++++----- .../htmlparser/sax/NameCheckingXmlSerializer.java | 2 +- .../java/nu/validator/htmlparser/xom/HtmlBuilder.java | 6 +++--- 9 files changed, 18 insertions(+), 20 deletions(-) diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java index 1674aa72..4dbe6613 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java @@ -375,7 +375,7 @@ public void setTransitionHander(TransitionHandler handler) { /** * Indicates whether NFC normalization of source is being checked. * @return true if NFC normalization of source is being checked. - * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + * @see nu.validator.htmlparser.io.Driver#isCheckingNormalization() */ public boolean isCheckingNormalization() { return checkingNormalization; @@ -384,7 +384,7 @@ public boolean isCheckingNormalization() { /** * Toggles the checking of the NFC normalization of source. * @param enable true to check normalization - * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + * @see nu.validator.htmlparser.io.Driver#setCheckingNormalization(boolean) */ public void setCheckingNormalization(boolean enable) { this.checkingNormalization = enable; @@ -609,7 +609,7 @@ public void setNamePolicy(XmlViolationPolicy namePolicy) { * Sets the encoding sniffing heuristics. * * @param heuristics the heuristics to set - * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + * @see nu.validator.htmlparser.io.Driver#setHeuristics(nu.validator.htmlparser.common.Heuristics) */ public void setHeuristics(Heuristics heuristics) { this.heuristics = heuristics; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java index 45df62fb..05b6598a 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java @@ -122,10 +122,9 @@ private static boolean isComposingChar(int c) { } /** - * Constructor with mode selection. + * Constructor with locator. * - * @param sourceTextMode whether the source text-related messages - * should be enabled. + * @param locator */ public NormalizationChecker(Locator locator) { super(); diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java index 19fbe7a6..a8c11ef2 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java @@ -128,7 +128,7 @@ public void setContentNonXmlCharPolicy( /** * Sets the errorProfile. * - * @param errorProfile + * @param errorProfileMap */ public void setErrorProfile(HashMap errorProfileMap) { this.errorProfileMap = errorProfileMap; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java index 7b78b1b7..2d15ca57 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java @@ -2898,8 +2898,6 @@ private boolean isSpecialParentInForeign(StackNode stackNode) { * C++ memory note: The return value must be released. * * @return - * @throws SAXException - * @throws StopSniffingException */ public static String extractCharsetFromContent(String attributeValue // CPPONLY: , TreeBuilder tb diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java index 2971a27e..7b7dbff3 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java @@ -110,7 +110,9 @@ public final class HtmlInputStreamReader extends Reader implements ByteReadable, /** * @param inputStream * @param errorHandler - * @param locator + * @param tokenizer + * @param driver + * @param heuristics * @throws IOException * @throws SAXException */ diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java index 47fbbfbc..dd4dee9f 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java @@ -91,7 +91,6 @@ protected int read() throws IOException { * * @throws SAXException * @throws IOException - * @throws */ public Encoding sniff(ByteReadable readable) throws SAXException, IOException { this.readable = readable; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java index ad17e892..8b66cd76 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java @@ -714,7 +714,7 @@ public void setProperty(String name, Object value) /** * Indicates whether NFC normalization of source is being checked. * @return true if NFC normalization of source is being checked. - * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + * @see nu.validator.htmlparser.io.Driver#isCheckingNormalization() */ public boolean isCheckingNormalization() { return checkingNormalization; @@ -723,7 +723,7 @@ public boolean isCheckingNormalization() { /** * Toggles the checking of the NFC normalization of source. * @param enable true to check normalization - * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + * @see nu.validator.htmlparser.io.Driver#setCheckingNormalization(boolean) */ public void setCheckingNormalization(boolean enable) { this.checkingNormalization = enable; @@ -941,8 +941,8 @@ public boolean isReportingDoctype() { } /** - * @param errorProfile - * @see nu.validator.htmlparser.impl.errorReportingTokenizer#setErrorProfile(set) + * @param errorProfileMap + * @see nu.validator.htmlparser.impl.ErrorReportingTokenizer#setErrorProfile(HashMap) */ public void setErrorProfile(HashMap errorProfileMap) { this.errorProfileMap = errorProfileMap; @@ -965,7 +965,7 @@ public void setNamePolicy(XmlViolationPolicy namePolicy) { * Sets the encoding sniffing heuristics. * * @param heuristics the heuristics to set - * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + * @see nu.validator.htmlparser.io.Driver#setHeuristics(nu.validator.htmlparser.common.Heuristics) */ public void setHeuristics(Heuristics heuristics) { this.heuristics = heuristics; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java index b6cb2f87..bacbbfce 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java @@ -40,7 +40,7 @@ public NameCheckingXmlSerializer(Writer out) { } /** - * @see nu.validator.htmlparser.sax.XmlSerializer#checkNCName() + * @see nu.validator.htmlparser.sax.XmlSerializer#checkNCName(String) */ @Override protected void checkNCName(String name) throws SAXException { if (!NCName.isNCName(name)) { diff --git a/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java b/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java index d5884723..4b822b06 100644 --- a/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java +++ b/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java @@ -412,7 +412,7 @@ public void setTransitionHander(TransitionHandler handler) { /** * Indicates whether NFC normalization of source is being checked. * @return true if NFC normalization of source is being checked. - * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + * @see nu.validator.htmlparser.io.Driver#isCheckingNormalization() */ public boolean isCheckingNormalization() { return checkingNormalization; @@ -421,7 +421,7 @@ public boolean isCheckingNormalization() { /** * Toggles the checking of the NFC normalization of source. * @param enable true to check normalization - * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + * @see nu.validator.htmlparser.io.Driver#setCheckingNormalization(boolean) */ public void setCheckingNormalization(boolean enable) { this.checkingNormalization = enable; @@ -646,7 +646,7 @@ public void setNamePolicy(XmlViolationPolicy namePolicy) { * Sets the encoding sniffing heuristics. * * @param heuristics the heuristics to set - * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + * @see nu.validator.htmlparser.io.Driver#setHeuristics(nu.validator.htmlparser.common.Heuristics) */ public void setHeuristics(Heuristics heuristics) { this.heuristics = heuristics; From 88184e12849578f45352b7038652070e40599d85 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 17:53:19 +0200 Subject: [PATCH 20/30] Fix typos --- .../htmlparser/io/HtmlInputStreamReader.java | 2 +- .../validator/htmlparser/xom/HtmlBuilder.java | 22 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java index 7b7dbff3..493c6879 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java @@ -46,7 +46,7 @@ import org.xml.sax.SAXParseException; /** - * Be very careful with this class. It is not a general-purpose subclass of of + * Be very careful with this class. It is not a general-purpose subclass of * Reader. Instead, it is the minimal implementation that does * what Tokenizer needs while being an instance of * Reader. diff --git a/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java b/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java index 4b822b06..cb2d01c8 100644 --- a/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java +++ b/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java @@ -233,7 +233,7 @@ private void tokenize(InputSource is) throws ParsingException, IOException, * @param is the InputSource * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong */ public Document build(InputSource is) throws ParsingException, IOException { lazyInit(); @@ -249,7 +249,7 @@ public Document build(InputSource is) throws ParsingException, IOException { * @param context the name of the context element (HTML namespace assumed) * @return the fragment * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong */ public Nodes buildFragment(InputSource is, String context) throws IOException, ParsingException { @@ -263,10 +263,10 @@ public Nodes buildFragment(InputSource is, String context) * Parse a fragment from SAX InputSource. * @param is the InputSource * @param contextLocal the local name of the context element - * @parem contextNamespace the namespace of the context element + * @param contextNamespace the namespace of the context element * @return the fragment * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong */ public Nodes buildFragment(InputSource is, String contextLocal, String contextNamespace) throws IOException, ParsingException { @@ -281,7 +281,7 @@ public Nodes buildFragment(InputSource is, String contextLocal, String contextNa * @param file the file * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.io.File) */ @Override @@ -296,7 +296,7 @@ public Document build(File file) throws ParsingException, * @param uri the base URI * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.io.InputStream, java.lang.String) */ @Override @@ -312,7 +312,7 @@ public Document build(InputStream stream, String uri) * @param stream the stream * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.io.InputStream) */ @Override @@ -327,7 +327,7 @@ public Document build(InputStream stream) throws ParsingException, * @param uri the base URI * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.io.Reader, java.lang.String) */ @Override @@ -343,7 +343,7 @@ public Document build(Reader stream, String uri) throws ParsingException, * @param stream the reader * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.io.Reader) */ @Override @@ -358,7 +358,7 @@ public Document build(Reader stream) throws ParsingException, * @param uri the base URI * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.lang.String, java.lang.String) */ @Override @@ -372,7 +372,7 @@ public Document build(String content, String uri) throws ParsingException, * @param uri the URI of the document * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.lang.String) */ @Override From 5304a2c0b2e76c1ea00a2bb47b362f9cd2def13e Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:59:01 +0200 Subject: [PATCH 21/30] Fix some lint warnings --- .../java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java | 2 ++ .../nu/validator/htmlparser/extra/NormalizationChecker.java | 1 - .../src/main/java/nu/validator/htmlparser/sax/HtmlParser.java | 3 +++ xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java index 4dbe6613..cbeccb51 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java @@ -650,6 +650,7 @@ public XmlViolationPolicy getNamePolicy() { * Does nothing. * @deprecated */ + @Deprecated public void setBogusXmlnsPolicy( XmlViolationPolicy bogusXmlnsPolicy) { } @@ -659,6 +660,7 @@ public void setBogusXmlnsPolicy( * @deprecated * @return XmlViolationPolicy.ALTER_INFOSET */ + @Deprecated public XmlViolationPolicy getBogusXmlnsPolicy() { return XmlViolationPolicy.ALTER_INFOSET; } diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java index 05b6598a..973b9698 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java @@ -47,7 +47,6 @@ public final class NormalizationChecker implements CharacterHandler { /** * A thread-safe set of composing characters as per Charmod Norm. */ - @SuppressWarnings("deprecation") private static final UnicodeSet COMPOSING_CHARACTERS = (UnicodeSet) new UnicodeSet( "[[:nfc_qc=maybe:][:^ccc=0:]]").freeze(); diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java index 8b66cd76..c7751761 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java @@ -552,6 +552,7 @@ public void setTransitionHandler(TransitionHandler handler) { * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) * @deprecated For Validator.nu internal use */ + @Deprecated public void setTreeBuilderErrorHandlerOverride(ErrorHandler handler) { treeBuilderErrorHandler = handler; if (driver != null) { @@ -1006,6 +1007,7 @@ public XmlViolationPolicy getNamePolicy() { * Does nothing. * @deprecated */ + @Deprecated public void setBogusXmlnsPolicy( XmlViolationPolicy bogusXmlnsPolicy) { } @@ -1015,6 +1017,7 @@ public void setBogusXmlnsPolicy( * @deprecated * @return XmlViolationPolicy.ALTER_INFOSET */ + @Deprecated public XmlViolationPolicy getBogusXmlnsPolicy() { return XmlViolationPolicy.ALTER_INFOSET; } diff --git a/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java b/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java index cb2d01c8..2dcb1d76 100644 --- a/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java +++ b/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java @@ -687,6 +687,7 @@ public XmlViolationPolicy getNamePolicy() { * Does nothing. * @deprecated */ + @Deprecated public void setBogusXmlnsPolicy( XmlViolationPolicy bogusXmlnsPolicy) { } @@ -696,6 +697,7 @@ public void setBogusXmlnsPolicy( * @deprecated * @return XmlViolationPolicy.ALTER_INFOSET */ + @Deprecated public XmlViolationPolicy getBogusXmlnsPolicy() { return XmlViolationPolicy.ALTER_INFOSET; } From 653a769fce9e8d1c77f0d214103234db207ac2d5 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:59:04 +0200 Subject: [PATCH 22/30] Suppress remaining lint warnings --- .../impl/ErrorReportingTokenizer.java | 3 +++ .../htmlparser/impl/HtmlAttributes.java | 1 + .../validator/htmlparser/impl/MetaScanner.java | 1 + .../validator/htmlparser/impl/Tokenizer.java | 1 + .../validator/htmlparser/impl/TreeBuilder.java | 18 +++++++++++------- .../nu/validator/htmlparser/io/Driver.java | 1 + 6 files changed, 18 insertions(+), 7 deletions(-) diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java index a8c11ef2..4c41e33a 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java @@ -225,6 +225,7 @@ public boolean isNextCharOnNewLine() { cstart = 0x7fffffff; } + @SuppressWarnings("fallthrough") @Override protected char checkChar(@NoLength char[] buf, int pos) throws SAXException { linePrev = line; @@ -564,6 +565,7 @@ private boolean isAstralPrivateUse(int c) { } } + @SuppressWarnings("fallthrough") @Override protected char errNcrNonCharacter(char ch) throws SAXException { switch (contentNonXmlCharPolicy) { case FATAL: @@ -592,6 +594,7 @@ private boolean isAstralPrivateUse(int c) { err("Character reference expands to a surrogate."); } + @SuppressWarnings("fallthrough") @Override protected char errNcrControlChar(char ch) throws SAXException { switch (contentNonXmlCharPolicy) { case FATAL: diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java index 7f747c3a..f34cfb9a 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java @@ -482,6 +482,7 @@ public boolean equalsAnother(HtmlAttributes other) { return true; } + @SuppressWarnings("fallthrough") void processNonNcNames(TreeBuilder treeBuilder, XmlViolationPolicy namePolicy) throws SAXException { for (int i = 0; i < length; i++) { AttributeName attName = names[i]; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java index a1c8eb80..10c24e40 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java @@ -208,6 +208,7 @@ protected int read() throws IOException { /** * The runs the meta scanning algorithm. */ + @SuppressWarnings("fallthrough") protected final void stateLoop(int state) throws SAXException, IOException { int c = -1; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java index 345cef28..db6257e1 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java @@ -66,6 +66,7 @@ * @version $Id$ * @author hsivonen */ +@SuppressWarnings("fallthrough") public class Tokenizer implements Locator2 { private static final int DATA_AND_RCDATA_MASK = ~1; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java index 2d15ca57..475cf626 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java @@ -577,7 +577,7 @@ final void warn(String message, Locator locator) throws SAXException { // ]NOCPP] - @SuppressWarnings("unchecked") public final void startTokenization(Tokenizer self) throws SAXException { + @SuppressWarnings({"rawtypes", "unchecked"}) public final void startTokenization(Tokenizer self) throws SAXException { tokenizer = self; stackNodes = new StackNode[64]; stack = new StackNode[64]; @@ -844,6 +844,7 @@ public final void comment(@NoLength char[] buf, int start, int length) * @see nu.validator.htmlparser.common.TokenHandler#characters(char[], int, * int) */ + @SuppressWarnings("fallthrough") public final void characters(@Const @NoLength char[] buf, int start, int length) throws SAXException { // Note: Can't attach error messages to EOF in C++ yet @@ -1241,6 +1242,7 @@ public void zeroOriginatingReplacementCharacter() throws SAXException { } } + @SuppressWarnings("fallthrough") public final void eof() throws SAXException { flushCharacters(); // Note: Can't attach error messages to EOF in C++ yet @@ -1448,6 +1450,7 @@ public final void endTokenization() throws SAXException { end(); } + @SuppressWarnings("fallthrough") public final void startTag(ElementName elementName, HtmlAttributes attributes, boolean selfClosing) throws SAXException { flushCharacters(); @@ -3093,6 +3096,7 @@ private void checkMetaCharset(HtmlAttributes attributes) } } + @SuppressWarnings("fallthrough") public final void endTag(ElementName elementName) throws SAXException { flushCharacters(); needToDropLF = false; @@ -4290,7 +4294,7 @@ private void pushTemplateMode(int mode) { templateModeStack[templateModePtr] = mode; } - @SuppressWarnings("unchecked") private void push(StackNode node) throws SAXException { + @SuppressWarnings({"rawtypes", "unchecked"}) private void push(StackNode node) throws SAXException { currentPtr++; if (currentPtr == stack.length) { StackNode[] newStack = new StackNode[stack.length + 64]; @@ -4301,7 +4305,7 @@ private void pushTemplateMode(int mode) { elementPushed(node.ns, node.popName, node.node); } - @SuppressWarnings("unchecked") private void silentPush(StackNode node) throws SAXException { + @SuppressWarnings({"rawtypes", "unchecked"}) private void silentPush(StackNode node) throws SAXException { currentPtr++; if (currentPtr == stack.length) { StackNode[] newStack = new StackNode[stack.length + 64]; @@ -4311,7 +4315,7 @@ private void pushTemplateMode(int mode) { stack[currentPtr] = node; } - @SuppressWarnings("unchecked") private void append(StackNode node) { + @SuppressWarnings({"rawtypes", "unchecked"}) private void append(StackNode node) { listPtr++; if (listPtr == listOfActiveFormattingElements.length) { StackNode[] newList = new StackNode[listOfActiveFormattingElements.length + 64]; @@ -4785,7 +4789,7 @@ void notifyUnusedStackNode(int idxInStackNodes) { } } - @SuppressWarnings("unchecked") private StackNode getUnusedStackNode() { + @SuppressWarnings({"rawtypes", "unchecked"}) private StackNode getUnusedStackNode() { // Search for an unused stack node. while (stackNodesIdx < numStackNodes) { if (stackNodes[stackNodesIdx].isUnused()) { @@ -5869,7 +5873,7 @@ private boolean charBufferContainsNonWhitespace() { * @return a snapshot. * @throws SAXException */ - @SuppressWarnings("unchecked") public TreeBuilderState newSnapshot() + @SuppressWarnings({"rawtypes", "unchecked"}) public TreeBuilderState newSnapshot() throws SAXException { StackNode[] listCopy = new StackNode[listPtr + 1]; for (int i = 0; i < listCopy.length; i++) { @@ -5963,7 +5967,7 @@ public boolean snapshotMatches(TreeBuilderState snapshot) { return true; } - @SuppressWarnings("unchecked") public void loadState( + @SuppressWarnings({"rawtypes", "unchecked"}) public void loadState( TreeBuilderState snapshot) throws SAXException { // CPPONLY: mCurrentHtmlScriptIsAsyncOrDefer = false; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/io/Driver.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/Driver.java index aa2354c9..0c971eef 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/io/Driver.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/io/Driver.java @@ -497,6 +497,7 @@ protected Encoding whineAboutEncodingAndReturnActual(String encoding, } } + @SuppressWarnings("serial") private class ReparseException extends SAXException { } From 751136787b338c529098719a85700e1af1980c78 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:59:13 +0200 Subject: [PATCH 23/30] Remove unused imports --- .../java/nu/validator/htmlparser/impl/AttributeName.java | 5 ----- .../java/nu/validator/htmlparser/impl/HtmlAttributes.java | 1 - .../main/java/nu/validator/htmlparser/impl/TreeBuilder.java | 2 -- .../nu/validator/htmlparser/io/HtmlInputStreamReader.java | 1 - 4 files changed, 9 deletions(-) diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/AttributeName.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/AttributeName.java index ca748713..1f8ce5e4 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/AttributeName.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/AttributeName.java @@ -22,11 +22,6 @@ package nu.validator.htmlparser.impl; -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedList; -import java.util.List; - import nu.validator.htmlparser.annotation.Inline; import nu.validator.htmlparser.annotation.Local; import nu.validator.htmlparser.annotation.NoLength; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java index f34cfb9a..5401f233 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java @@ -31,7 +31,6 @@ import nu.validator.htmlparser.annotation.NsUri; import nu.validator.htmlparser.annotation.Prefix; import nu.validator.htmlparser.annotation.QName; -import nu.validator.htmlparser.common.Interner; import nu.validator.htmlparser.common.XmlViolationPolicy; /** diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java index 475cf626..21c2469c 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java @@ -35,7 +35,6 @@ package nu.validator.htmlparser.impl; -import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -54,7 +53,6 @@ import nu.validator.htmlparser.annotation.NsUri; import nu.validator.htmlparser.common.DocumentMode; import nu.validator.htmlparser.common.DocumentModeHandler; -import nu.validator.htmlparser.common.Interner; import nu.validator.htmlparser.common.TokenHandler; import nu.validator.htmlparser.common.XmlViolationPolicy; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java index 493c6879..c240bc3c 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java @@ -35,7 +35,6 @@ import nu.validator.htmlparser.common.ByteReadable; import nu.validator.htmlparser.common.Heuristics; -import nu.validator.htmlparser.common.XmlViolationPolicy; import nu.validator.htmlparser.extra.ChardetSniffer; import nu.validator.htmlparser.extra.IcuDetectorSniffer; import nu.validator.htmlparser.impl.Tokenizer; From eabf3dd2bc28238b7d671fe54360c1c212d16589 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:59:15 +0200 Subject: [PATCH 24/30] Upgrade to a module-aware Java SE version This sets the default language level to Java SE 11. However, the main sources (except for module-info.java) are still compiled with Java SE 8. --- htmlparser/pom.xml | 4 ++-- .../htmlparser/extra/NormalizationChecker.java | 10 +++++----- pom.xml | 17 +++++++++++++++-- xom/pom.xml | 2 +- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/htmlparser/pom.xml b/htmlparser/pom.xml index e6364e4c..08b0d378 100644 --- a/htmlparser/pom.xml +++ b/htmlparser/pom.xml @@ -41,7 +41,7 @@ com.ibm.icu icu4j - 4.0.1 + 67.1 true @@ -53,7 +53,7 @@ com.sdicons.jsontools jsontools-core - 1.4 + 1.7 test diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java index 973b9698..9adc4a17 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java @@ -31,7 +31,7 @@ import org.xml.sax.SAXParseException; import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.Normalizer; +import com.ibm.icu.text.Normalizer2; import com.ibm.icu.text.UnicodeSet; /** @@ -47,7 +47,7 @@ public final class NormalizationChecker implements CharacterHandler { /** * A thread-safe set of composing characters as per Charmod Norm. */ - private static final UnicodeSet COMPOSING_CHARACTERS = (UnicodeSet) new UnicodeSet( + private static final UnicodeSet COMPOSING_CHARACTERS = new UnicodeSet( "[[:nfc_qc=maybe:][:^ccc=0:]]").freeze(); // see http://sourceforge.net/mailarchive/message.php?msg_id=37279908 @@ -186,7 +186,7 @@ public void characters(char[] ch, int start, int length) if (i == stop) { return; } else { - if (!Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) { + if (!Normalizer2.getNFCInstance().isNormalized(new String(buf, 0, pos))) { errAboutTextRun(); } pos = 0; @@ -199,7 +199,7 @@ public void characters(char[] ch, int start, int length) i--; } if (i > start) { - if (!Normalizer.isNormalized(ch, start, i, Normalizer.NFC, 0)) { + if (!Normalizer2.getNFCInstance().isNormalized(new String(ch, start, i))) { errAboutTextRun(); } } @@ -248,7 +248,7 @@ private void appendToBuf(char[] ch, int start, int end) { */ public void end() throws SAXException { if (!alreadyComplainedAboutThisRun - && !Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) { + && !Normalizer2.getNFCInstance().isNormalized(new String(buf, 0, pos))) { errAboutTextRun(); } if (bufHolder != null) { diff --git a/pom.xml b/pom.xml index d2963348..bd683ef2 100644 --- a/pom.xml +++ b/pom.xml @@ -82,8 +82,7 @@
    - 1.5 - 1.5 + 11 true UTF-8 @@ -118,6 +117,20 @@ maven-compiler-plugin 3.8.1 + + + base-compile + + compile + + + 8 + + module-info.java + + + + maven-javadoc-plugin diff --git a/xom/pom.xml b/xom/pom.xml index 7f46fec0..4a12ff07 100644 --- a/xom/pom.xml +++ b/xom/pom.xml @@ -41,7 +41,7 @@ xom xom - 1.1 + 1.3.5 From f2c31263d7e4f299b990b1f8355ecd1a83e4a03a Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 17:54:39 +0200 Subject: [PATCH 25/30] Modularize --- htmlparser/src/main/java/module-info.java | 43 +++++++++++++++++++ .../htmlparser/extra/ChardetSniffer.java | 1 + .../htmlparser/impl/StateSnapshot.java | 1 + .../htmlparser/impl/TreeBuilder.java | 1 + .../htmlparser/impl/TreeBuilderState.java | 1 + saxtree/src/main/java/module-info.java | 30 +++++++++++++ xom/pom.xml | 6 +++ xom/src/main/java/module-info.java | 35 +++++++++++++++ 8 files changed, 118 insertions(+) create mode 100644 htmlparser/src/main/java/module-info.java create mode 100644 saxtree/src/main/java/module-info.java create mode 100644 xom/src/main/java/module-info.java diff --git a/htmlparser/src/main/java/module-info.java b/htmlparser/src/main/java/module-info.java new file mode 100644 index 00000000..d2f6b24f --- /dev/null +++ b/htmlparser/src/main/java/module-info.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020 Anthony Vanelverdinghe + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * Provides an implementation of the HTML5 parsing algorithm in Java for applications. + * The parser is designed to work as a drop-in replacement for the XML parser in applications + * that already support XHTML 1.x content with an XML parser and use SAX or DOM to interface with the parser. + */ +@SuppressWarnings("requires-automatic") +module nu.validator.htmlparser { + requires transitive java.xml; + requires nu.validator.saxtree; + requires static com.ibm.icu; + requires static jchardet; + + exports nu.validator.htmlparser.annotation; + exports nu.validator.htmlparser.common; + exports nu.validator.htmlparser.dom; + exports nu.validator.htmlparser.extra; + exports nu.validator.htmlparser.impl; + exports nu.validator.htmlparser.io; + exports nu.validator.htmlparser.rewindable; + exports nu.validator.htmlparser.sax; +} diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java index a7575039..8153b4fa 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java @@ -33,6 +33,7 @@ import com.ibm.icu.text.CharsetDetector; +@SuppressWarnings("exports") public class ChardetSniffer implements nsICharsetDetectionObserver { private final byte[] source; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java index cba711f7..f6c3359a 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java @@ -25,6 +25,7 @@ import nu.validator.htmlparser.annotation.Auto; +@SuppressWarnings("exports") public class StateSnapshot implements TreeBuilderState { private final @Auto StackNode[] stack; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java index 21c2469c..784ab921 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java @@ -56,6 +56,7 @@ import nu.validator.htmlparser.common.TokenHandler; import nu.validator.htmlparser.common.XmlViolationPolicy; +@SuppressWarnings("exports") public abstract class TreeBuilder implements TokenHandler, TreeBuilderState { diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java index 5b7a9dcf..42c019fc 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java @@ -29,6 +29,7 @@ * @version $Id$ * @author hsivonen */ +@SuppressWarnings("exports") public interface TreeBuilderState { /** diff --git a/saxtree/src/main/java/module-info.java b/saxtree/src/main/java/module-info.java new file mode 100644 index 00000000..077e6379 --- /dev/null +++ b/saxtree/src/main/java/module-info.java @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2020 Anthony Vanelverdinghe + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * Provides SAX Tree: a tree model optimized for creation from SAX events and replay as SAX events. + */ +module nu.validator.saxtree { + requires transitive java.xml; + + exports nu.validator.saxtree; +} diff --git a/xom/pom.xml b/xom/pom.xml index 4a12ff07..eab9c5c1 100644 --- a/xom/pom.xml +++ b/xom/pom.xml @@ -42,6 +42,12 @@ xom xom 1.3.5 + + + xml-apis + xml-apis + + diff --git a/xom/src/main/java/module-info.java b/xom/src/main/java/module-info.java new file mode 100644 index 00000000..b42cd04f --- /dev/null +++ b/xom/src/main/java/module-info.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Anthony Vanelverdinghe + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * Provides an implementation of the HTML5 parsing algorithm in Java for applications. + * The parser is designed to work as a drop-in replacement for the XML parser in applications + * that already support XHTML 1.x content with an XML parser and use XOM to interface with the parser. + */ +@SuppressWarnings({"requires-automatic", "requires-transitive-automatic"}) +module nu.validator.htmlparser.xom { + requires transitive java.xml; + requires transitive nu.xom; + requires transitive nu.validator.htmlparser; + + exports nu.validator.htmlparser.xom; +} From 283955bb673f656ae1953a14ef3bf6659f631e3a Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:59:32 +0200 Subject: [PATCH 26/30] Rename package.html to package-info.java --- .../htmlparser/annotation/{package.html => package-info.java} | 0 .../htmlparser/common/{package.html => package-info.java} | 0 .../validator/htmlparser/dom/{package.html => package-info.java} | 0 .../validator/htmlparser/impl/{package.html => package-info.java} | 0 .../validator/htmlparser/sax/{package.html => package-info.java} | 0 .../validator/htmlparser/test/{package.html => package-info.java} | 0 .../nu/validator/saxtree/test/{package.html => package-info.java} | 0 .../java/nu/validator/saxtree/{package.html => package-info.java} | 0 .../validator/htmlparser/xom/{package.html => package-info.java} | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename htmlparser/src/main/java/nu/validator/htmlparser/annotation/{package.html => package-info.java} (100%) rename htmlparser/src/main/java/nu/validator/htmlparser/common/{package.html => package-info.java} (100%) rename htmlparser/src/main/java/nu/validator/htmlparser/dom/{package.html => package-info.java} (100%) rename htmlparser/src/main/java/nu/validator/htmlparser/impl/{package.html => package-info.java} (100%) rename htmlparser/src/main/java/nu/validator/htmlparser/sax/{package.html => package-info.java} (100%) rename htmlparser/src/test/java/nu/validator/htmlparser/test/{package.html => package-info.java} (100%) rename htmlparser/src/test/java/nu/validator/saxtree/test/{package.html => package-info.java} (100%) rename saxtree/src/main/java/nu/validator/saxtree/{package.html => package-info.java} (100%) rename xom/src/main/java/nu/validator/htmlparser/xom/{package.html => package-info.java} (100%) diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/annotation/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/package-info.java similarity index 100% rename from htmlparser/src/main/java/nu/validator/htmlparser/annotation/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/package-info.java diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/common/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/common/package-info.java similarity index 100% rename from htmlparser/src/main/java/nu/validator/htmlparser/common/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/common/package-info.java diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/dom/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/dom/package-info.java similarity index 100% rename from htmlparser/src/main/java/nu/validator/htmlparser/dom/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/dom/package-info.java diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/impl/package-info.java similarity index 100% rename from htmlparser/src/main/java/nu/validator/htmlparser/impl/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/package-info.java diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/sax/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/sax/package-info.java similarity index 100% rename from htmlparser/src/main/java/nu/validator/htmlparser/sax/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/package-info.java diff --git a/htmlparser/src/test/java/nu/validator/htmlparser/test/package.html b/htmlparser/src/test/java/nu/validator/htmlparser/test/package-info.java similarity index 100% rename from htmlparser/src/test/java/nu/validator/htmlparser/test/package.html rename to htmlparser/src/test/java/nu/validator/htmlparser/test/package-info.java diff --git a/htmlparser/src/test/java/nu/validator/saxtree/test/package.html b/htmlparser/src/test/java/nu/validator/saxtree/test/package-info.java similarity index 100% rename from htmlparser/src/test/java/nu/validator/saxtree/test/package.html rename to htmlparser/src/test/java/nu/validator/saxtree/test/package-info.java diff --git a/saxtree/src/main/java/nu/validator/saxtree/package.html b/saxtree/src/main/java/nu/validator/saxtree/package-info.java similarity index 100% rename from saxtree/src/main/java/nu/validator/saxtree/package.html rename to saxtree/src/main/java/nu/validator/saxtree/package-info.java diff --git a/xom/src/main/java/nu/validator/htmlparser/xom/package.html b/xom/src/main/java/nu/validator/htmlparser/xom/package-info.java similarity index 100% rename from xom/src/main/java/nu/validator/htmlparser/xom/package.html rename to xom/src/main/java/nu/validator/htmlparser/xom/package-info.java From 9376c4f302e55405e698597fb9aa432741058a10 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Thu, 23 Apr 2020 19:59:36 +0200 Subject: [PATCH 27/30] Fix package Javadoc after rename --- .../htmlparser/annotation/package-info.java | 19 +++---- .../htmlparser/common/package-info.java | 17 +++--- .../htmlparser/dom/package-info.java | 17 +++--- .../htmlparser/impl/package-info.java | 19 +++---- .../htmlparser/sax/package-info.java | 17 +++--- .../htmlparser/test/package-info.java | 17 +++--- .../validator/saxtree/test/package-info.java | 17 +++--- .../nu/validator/saxtree/package-info.java | 48 +++++++--------- .../htmlparser/xom/package-info.java | 55 +++++++++---------- 9 files changed, 98 insertions(+), 128 deletions(-) diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/annotation/package-info.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/package-info.java index af15d382..ca70c00b 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/annotation/package-info.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    This package provides annotations for facilitating automated translation -of the source code into other programming languages.

    - - \ No newline at end of file + */ + +/** + * This package provides annotations for facilitating automated translation + * of the source code into other programming languages. + */ +package nu.validator.htmlparser.annotation; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/common/package-info.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/package-info.java index 43f141cd..0d04ee70 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/common/package-info.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/common/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    This package provides common interfaces and enumerations.

    - - \ No newline at end of file + */ + +/** + * This package provides common interfaces and enumerations. + */ +package nu.validator.htmlparser.common; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/dom/package-info.java b/htmlparser/src/main/java/nu/validator/htmlparser/dom/package-info.java index d793bcf8..8d874fd0 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/dom/package-info.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/dom/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    This package provides an HTML5 parser that exposes the document using the DOM API.

    - - \ No newline at end of file + */ + +/** + * This package provides an HTML5 parser that exposes the document using the DOM API. + */ +package nu.validator.htmlparser.dom; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/impl/package-info.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/package-info.java index 6d029a13..dfa43eec 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/impl/package-info.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    This package contains the bulk of parser internals. Only implementors of -additional tree builders or token handlers should look here.

    - - \ No newline at end of file + */ + +/** + * This package contains the bulk of parser internals. + * Only implementors of additional tree builders or token handlers should look here. + */ +package nu.validator.htmlparser.impl; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/sax/package-info.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/package-info.java index 60532962..7ade27d9 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/sax/package-info.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/sax/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    This package provides an HTML5 parser that exposes the document through the SAX API.

    - - \ No newline at end of file + */ + +/** + * This package provides an HTML5 parser that exposes the document through the SAX API. + */ +package nu.validator.htmlparser.sax; diff --git a/htmlparser/src/test/java/nu/validator/htmlparser/test/package-info.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/package-info.java index 57809b84..82d43a7d 100644 --- a/htmlparser/src/test/java/nu/validator/htmlparser/test/package-info.java +++ b/htmlparser/src/test/java/nu/validator/htmlparser/test/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    Test drivers.

    - - \ No newline at end of file + */ + +/** + * Test drivers. + */ +package nu.validator.htmlparser.test; diff --git a/htmlparser/src/test/java/nu/validator/saxtree/test/package-info.java b/htmlparser/src/test/java/nu/validator/saxtree/test/package-info.java index 57809b84..ad8ce418 100644 --- a/htmlparser/src/test/java/nu/validator/saxtree/test/package-info.java +++ b/htmlparser/src/test/java/nu/validator/saxtree/test/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    Test drivers.

    - - \ No newline at end of file + */ + +/** + * Test drivers. + */ +package nu.validator.saxtree.test; diff --git a/saxtree/src/main/java/nu/validator/saxtree/package-info.java b/saxtree/src/main/java/nu/validator/saxtree/package-info.java index 0c34dad8..54af5a4c 100644 --- a/saxtree/src/main/java/nu/validator/saxtree/package-info.java +++ b/saxtree/src/main/java/nu/validator/saxtree/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    This package provides SAX Tree: a tree model optimized for creation from SAX -events and replay as SAX events.

    -

    Design Principles

    -
      -
    1. Preserve information exposed through ContentHandler, -LexicalHandler and Locator. -
    2. Creation from SAX events or as part of the parse of a conforming -HTML5 document should be fast.
    3. -
    4. Emitting SAX events based on the tree should be fast.
    5. -
    6. Mutations should be possible but should not make the above -"fast" cases slower.
    7. -
    8. Concurrent reads should work without locking when there are no -concurrent mutations.
    9. -
    10. The user of the API has the responsibility of using the API properly: -for the sake of performance, the model does not check if it is being -used properly. Improper use may, therefore, put the model in and -inconsistent state.
    11. -
    - - \ No newline at end of file + */ + +/** + * This package provides SAX Tree: a tree model optimized for creation from SAX events and replay as SAX events. + * + *

    Design Principles

    + *
      + *
    1. Preserve information exposed through ContentHandler, + * LexicalHandler and Locator. + *
    2. Creation from SAX events or as part of the parse of a conforming + * HTML5 document should be fast. + *
    3. Emitting SAX events based on the tree should be fast. + *
    4. Mutations should be possible but should not make the above "fast" cases slower. + *
    5. Concurrent reads should work without locking when there are no concurrent mutations. + *
    6. The user of the API has the responsibility of using the API properly: + * for the sake of performance, the model does not check if it is being used properly. + * Improper use may, therefore, put the model in and inconsistent state. + *
    + */ +package nu.validator.saxtree; diff --git a/xom/src/main/java/nu/validator/htmlparser/xom/package-info.java b/xom/src/main/java/nu/validator/htmlparser/xom/package-info.java index a936d5e3..6fde4174 100644 --- a/xom/src/main/java/nu/validator/htmlparser/xom/package-info.java +++ b/xom/src/main/java/nu/validator/htmlparser/xom/package-info.java @@ -1,29 +1,26 @@ - - -Package Overview - - - -

    This package provides an HTML5 parser that exposes the document through the XOM API.

    - - \ No newline at end of file +/* + Copyright (c) 2007 Henri Sivonen + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + */ + +/** + * This package provides an HTML5 parser that exposes the document through the XOM API. + */ +package nu.validator.htmlparser.xom; From 2ce23df04713faf3ec3f7da8fab1af1d5a4118e2 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 17:54:40 +0200 Subject: [PATCH 28/30] Make NormalizationChecker independent of ICU4J --- .../extra/NormalizationChecker.java | 108 +++++++++++++++--- 1 file changed, 92 insertions(+), 16 deletions(-) diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java index 9adc4a17..44016348 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java @@ -25,15 +25,16 @@ import nu.validator.htmlparser.common.CharacterHandler; +import java.text.Normalizer; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + import org.xml.sax.ErrorHandler; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.Normalizer2; -import com.ibm.icu.text.UnicodeSet; - /** * @version $Id$ * @author hsivonen @@ -45,10 +46,79 @@ public final class NormalizationChecker implements CharacterHandler { private Locator locator; /** - * A thread-safe set of composing characters as per Charmod Norm. + * A set of composing characters as per Charmod Norm. + * + * Generated with ICU4J 67.1 using: new UnicodeSet("[[:nfc_qc=maybe:][:^ccc=0:]]").freeze() */ - private static final UnicodeSet COMPOSING_CHARACTERS = new UnicodeSet( - "[[:nfc_qc=maybe:][:^ccc=0:]]").freeze(); + private static final Set COMPOSING_CHARACTERS = new HashSet(Arrays.asList( + 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, + 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, + 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, + 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, + 840, 841, 842, 843, 844, 845, 846, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, + 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, + 877, 878, 879, 1155, 1156, 1157, 1158, 1159, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, + 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, + 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, + 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1471, 1473, 1474, 1476, 1477, 1479, 1552, 1553, + 1554, 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, 1611, 1612, 1613, 1614, 1615, 1616, + 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, + 1648, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1759, 1760, 1761, 1762, 1763, 1764, 1767, + 1768, 1770, 1771, 1772, 1773, 1809, 1840, 1841, 1842, 1843, 1844, 1845, 1846, 1847, 1848, + 1849, 1850, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, 1859, 1860, 1861, 1862, 1863, + 1864, 1865, 1866, 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2045, 2070, 2071, + 2072, 2073, 2075, 2076, 2077, 2078, 2079, 2080, 2081, 2082, 2083, 2085, 2086, 2087, 2089, + 2090, 2091, 2092, 2093, 2137, 2138, 2139, 2259, 2260, 2261, 2262, 2263, 2264, 2265, 2266, + 2267, 2268, 2269, 2270, 2271, 2272, 2273, 2275, 2276, 2277, 2278, 2279, 2280, 2281, 2282, + 2283, 2284, 2285, 2286, 2287, 2288, 2289, 2290, 2291, 2292, 2293, 2294, 2295, 2296, 2297, + 2298, 2299, 2300, 2301, 2302, 2303, 2364, 2381, 2385, 2386, 2387, 2388, 2492, 2494, 2509, + 2519, 2558, 2620, 2637, 2748, 2765, 2876, 2878, 2893, 2902, 2903, 3006, 3021, 3031, 3149, + 3157, 3158, 3260, 3266, 3277, 3285, 3286, 3387, 3388, 3390, 3405, 3415, 3530, 3535, 3551, + 3640, 3641, 3642, 3656, 3657, 3658, 3659, 3768, 3769, 3770, 3784, 3785, 3786, 3787, 3864, + 3865, 3893, 3895, 3897, 3953, 3954, 3956, 3962, 3963, 3964, 3965, 3968, 3970, 3971, 3972, + 3974, 3975, 4038, 4142, 4151, 4153, 4154, 4237, 4449, 4450, 4451, 4452, 4453, 4454, 4455, + 4456, 4457, 4458, 4459, 4460, 4461, 4462, 4463, 4464, 4465, 4466, 4467, 4468, 4469, 4520, + 4521, 4522, 4523, 4524, 4525, 4526, 4527, 4528, 4529, 4530, 4531, 4532, 4533, 4534, 4535, + 4536, 4537, 4538, 4539, 4540, 4541, 4542, 4543, 4544, 4545, 4546, 4957, 4958, 4959, 5908, + 5940, 6098, 6109, 6313, 6457, 6458, 6459, 6679, 6680, 6752, 6773, 6774, 6775, 6776, 6777, + 6778, 6779, 6780, 6783, 6832, 6833, 6834, 6835, 6836, 6837, 6838, 6839, 6840, 6841, 6842, + 6843, 6844, 6845, 6847, 6848, 6964, 6965, 6980, 7019, 7020, 7021, 7022, 7023, 7024, 7025, + 7026, 7027, 7082, 7083, 7142, 7154, 7155, 7223, 7376, 7377, 7378, 7380, 7381, 7382, 7383, + 7384, 7385, 7386, 7387, 7388, 7389, 7390, 7391, 7392, 7394, 7395, 7396, 7397, 7398, 7399, + 7400, 7405, 7412, 7416, 7417, 7616, 7617, 7618, 7619, 7620, 7621, 7622, 7623, 7624, 7625, + 7626, 7627, 7628, 7629, 7630, 7631, 7632, 7633, 7634, 7635, 7636, 7637, 7638, 7639, 7640, + 7641, 7642, 7643, 7644, 7645, 7646, 7647, 7648, 7649, 7650, 7651, 7652, 7653, 7654, 7655, + 7656, 7657, 7658, 7659, 7660, 7661, 7662, 7663, 7664, 7665, 7666, 7667, 7668, 7669, 7670, + 7671, 7672, 7673, 7675, 7676, 7677, 7678, 7679, 8400, 8401, 8402, 8403, 8404, 8405, 8406, + 8407, 8408, 8409, 8410, 8411, 8412, 8417, 8421, 8422, 8423, 8424, 8425, 8426, 8427, 8428, + 8429, 8430, 8431, 8432, 11503, 11504, 11505, 11647, 11744, 11745, 11746, 11747, 11748, 11749, + 11750, 11751, 11752, 11753, 11754, 11755, 11756, 11757, 11758, 11759, 11760, 11761, 11762, + 11763, 11764, 11765, 11766, 11767, 11768, 11769, 11770, 11771, 11772, 11773, 11774, 11775, + 12330, 12331, 12332, 12333, 12334, 12335, 12441, 12442, 42607, 42612, 42613, 42614, 42615, + 42616, 42617, 42618, 42619, 42620, 42621, 42654, 42655, 42736, 42737, 43014, 43052, 43204, + 43232, 43233, 43234, 43235, 43236, 43237, 43238, 43239, 43240, 43241, 43242, 43243, 43244, + 43245, 43246, 43247, 43248, 43249, 43307, 43308, 43309, 43347, 43443, 43456, 43696, 43698, + 43699, 43700, 43703, 43704, 43710, 43711, 43713, 43766, 44013, 64286, 65056, 65057, 65058, + 65059, 65060, 65061, 65062, 65063, 65064, 65065, 65066, 65067, 65068, 65069, 65070, 65071, + 66045, 66272, 66422, 66423, 66424, 66425, 66426, 68109, 68111, 68152, 68153, 68154, 68159, + 68325, 68326, 68900, 68901, 68902, 68903, 69291, 69292, 69446, 69447, 69448, 69449, 69450, + 69451, 69452, 69453, 69454, 69455, 69456, 69702, 69759, 69817, 69818, 69888, 69889, 69890, + 69927, 69939, 69940, 70003, 70080, 70090, 70197, 70198, 70377, 70378, 70459, 70460, 70462, + 70477, 70487, 70502, 70503, 70504, 70505, 70506, 70507, 70508, 70512, 70513, 70514, 70515, + 70516, 70722, 70726, 70750, 70832, 70842, 70845, 70850, 70851, 71087, 71103, 71104, 71231, + 71350, 71351, 71467, 71737, 71738, 71984, 71997, 71998, 72003, 72160, 72244, 72263, 72345, + 72767, 73026, 73028, 73029, 73111, 92912, 92913, 92914, 92915, 92916, 92976, 92977, 92978, + 92979, 92980, 92981, 92982, 94192, 94193, 113822, 119141, 119142, 119143, 119144, 119145, + 119149, 119150, 119151, 119152, 119153, 119154, 119163, 119164, 119165, 119166, 119167, + 119168, 119169, 119170, 119173, 119174, 119175, 119176, 119177, 119178, 119179, 119210, + 119211, 119212, 119213, 119362, 119363, 119364, 122880, 122881, 122882, 122883, 122884, + 122885, 122886, 122888, 122889, 122890, 122891, 122892, 122893, 122894, 122895, 122896, + 122897, 122898, 122899, 122900, 122901, 122902, 122903, 122904, 122907, 122908, 122909, + 122910, 122911, 122912, 122913, 122915, 122916, 122918, 122919, 122920, 122921, 122922, + 123184, 123185, 123186, 123187, 123188, 123189, 123190, 123628, 123629, 123630, 123631, + 125136, 125137, 125138, 125139, 125140, 125141, 125142, 125252, 125253, 125254, 125255, + 125256, 125257, 125258 + )); // see http://sourceforge.net/mailarchive/message.php?msg_id=37279908 @@ -102,7 +172,7 @@ public void err(String message) throws SAXException { * or a surrogate and false otherwise */ private static boolean isComposingCharOrSurrogate(char c) { - if (UCharacter.isHighSurrogate(c) || UCharacter.isLowSurrogate(c)) { + if (Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) { return true; } return isComposingChar(c); @@ -151,19 +221,18 @@ public void characters(char[] ch, int start, int length) char c = ch[start]; if (pos == 1) { // there's a single high surrogate in buf - if (isComposingChar(UCharacter.getCodePoint(buf[0], c))) { + if (isComposingChar(getCodePoint(buf[0], c))) { err("Text run starts with a composing character."); } atStartOfRun = false; } else { - if (length == 1 && UCharacter.isHighSurrogate(c)) { + if (length == 1 && Character.isHighSurrogate(c)) { buf[0] = c; pos = 1; return; } else { - if (UCharacter.isHighSurrogate(c)) { - if (isComposingChar(UCharacter.getCodePoint(c, - ch[start + 1]))) { + if (Character.isHighSurrogate(c)) { + if (isComposingChar(getCodePoint(c, ch[start + 1]))) { err("Text run starts with a composing character."); } } else { @@ -186,7 +255,7 @@ public void characters(char[] ch, int start, int length) if (i == stop) { return; } else { - if (!Normalizer2.getNFCInstance().isNormalized(new String(buf, 0, pos))) { + if (!Normalizer.isNormalized(new String(buf, 0, pos), Normalizer.Form.NFC)) { errAboutTextRun(); } pos = 0; @@ -199,7 +268,7 @@ public void characters(char[] ch, int start, int length) i--; } if (i > start) { - if (!Normalizer2.getNFCInstance().isNormalized(new String(ch, start, i))) { + if (!Normalizer.isNormalized(new String(ch, start, i), Normalizer.Form.NFC)) { errAboutTextRun(); } } @@ -207,6 +276,13 @@ public void characters(char[] ch, int start, int length) } } + private static int getCodePoint(char lead, char trail) { + if (Character.isSurrogatePair(lead, trail)) { + return Character.toCodePoint(lead, trail); + } + throw new IllegalArgumentException("Illegal surrogate characters"); + } + /** * Emits an error stating that the current text run or the source * text is not in NFC. @@ -248,7 +324,7 @@ private void appendToBuf(char[] ch, int start, int end) { */ public void end() throws SAXException { if (!alreadyComplainedAboutThisRun - && !Normalizer2.getNFCInstance().isNormalized(new String(buf, 0, pos))) { + && !Normalizer.isNormalized(new String(buf, 0, pos), Normalizer.Form.NFC)) { errAboutTextRun(); } if (bufHolder != null) { From bd6aae9b3f6d4bb47487fd2141343a9619233da4 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Wed, 12 Aug 2020 17:54:41 +0200 Subject: [PATCH 29/30] Make jchardet & ICU4J heuristics a no-op --- htmlparser/pom.xml | 12 --- htmlparser/src/main/java/module-info.java | 3 - .../htmlparser/extra/ChardetSniffer.java | 85 ------------------- .../htmlparser/extra/IcuDetectorSniffer.java | 77 ----------------- .../htmlparser/io/HtmlInputStreamReader.java | 11 --- 5 files changed, 188 deletions(-) delete mode 100644 htmlparser/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java delete mode 100644 htmlparser/src/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java diff --git a/htmlparser/pom.xml b/htmlparser/pom.xml index 08b0d378..c05bdab9 100644 --- a/htmlparser/pom.xml +++ b/htmlparser/pom.xml @@ -38,18 +38,6 @@ nu.validator.htmlparser saxtree - - com.ibm.icu - icu4j - 67.1 - true - - - net.sourceforge.jchardet - jchardet - 1.0 - true - com.sdicons.jsontools jsontools-core diff --git a/htmlparser/src/main/java/module-info.java b/htmlparser/src/main/java/module-info.java index d2f6b24f..a9e5f2ef 100644 --- a/htmlparser/src/main/java/module-info.java +++ b/htmlparser/src/main/java/module-info.java @@ -25,12 +25,9 @@ * The parser is designed to work as a drop-in replacement for the XML parser in applications * that already support XHTML 1.x content with an XML parser and use SAX or DOM to interface with the parser. */ -@SuppressWarnings("requires-automatic") module nu.validator.htmlparser { requires transitive java.xml; requires nu.validator.saxtree; - requires static com.ibm.icu; - requires static jchardet; exports nu.validator.htmlparser.annotation; exports nu.validator.htmlparser.common; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java deleted file mode 100644 index 8153b4fa..00000000 --- a/htmlparser/src/main/java/nu/validator/htmlparser/extra/ChardetSniffer.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.extra; - -import java.io.IOException; -import java.nio.charset.UnsupportedCharsetException; - -import nu.validator.htmlparser.io.Encoding; - -import org.mozilla.intl.chardet.nsDetector; -import org.mozilla.intl.chardet.nsICharsetDetectionObserver; -import org.mozilla.intl.chardet.nsPSMDetector; - -import com.ibm.icu.text.CharsetDetector; - -@SuppressWarnings("exports") -public class ChardetSniffer implements nsICharsetDetectionObserver { - - private final byte[] source; - - private final int length; - - private Encoding returnValue = null; - - /** - * @param source - */ - public ChardetSniffer(final byte[] source, final int length) { - this.source = source; - this.length = length; - } - - public Encoding sniff() throws IOException { - nsDetector detector = new nsDetector(nsPSMDetector.ALL); - detector.Init(this); - detector.DoIt(source, length, false); - detector.DataEnd(); - if (returnValue != null && returnValue != Encoding.WINDOWS1252 && returnValue.isAsciiSuperset()) { - return returnValue; - } else { - return null; - } - } - - public static void main(String[] args) { - String[] detectable = CharsetDetector.getAllDetectableCharsets(); - for (int i = 0; i < detectable.length; i++) { - String charset = detectable[i]; - System.out.println(charset); - } - } - - public void Notify(String charsetName) { - try { - Encoding enc = Encoding.forName(charsetName); - Encoding actual = enc.getActualHtmlEncoding(); - if (actual != null) { - enc = actual; - } - returnValue = enc; - } catch (UnsupportedCharsetException e) { - returnValue = null; - } - } -} diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java deleted file mode 100644 index f3caab5c..00000000 --- a/htmlparser/src/main/java/nu/validator/htmlparser/extra/IcuDetectorSniffer.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.extra; - -import java.io.IOException; -import java.io.InputStream; - -import nu.validator.htmlparser.common.ByteReadable; -import nu.validator.htmlparser.io.Encoding; - -import com.ibm.icu.text.CharsetDetector; -import com.ibm.icu.text.CharsetMatch; - -public class IcuDetectorSniffer extends InputStream { - - private final ByteReadable source; - - /** - * @param source - */ - public IcuDetectorSniffer(final ByteReadable source) { - this.source = source; - } - - @Override - public int read() throws IOException { - return source.readByte(); - } - - public Encoding sniff() throws IOException { - try { - CharsetDetector detector = new CharsetDetector(); - detector.setText(this); - CharsetMatch match = detector.detect(); - Encoding enc = Encoding.forName(match.getName()); - Encoding actual = enc.getActualHtmlEncoding(); - if (actual != null) { - enc = actual; - } - if (enc != Encoding.WINDOWS1252 && enc.isAsciiSuperset()) { - return enc; - } else { - return null; - } - } catch (Exception e) { - return null; - } - } - - public static void main(String[] args) { - String[] detectable = CharsetDetector.getAllDetectableCharsets(); - for (int i = 0; i < detectable.length; i++) { - String charset = detectable[i]; - System.out.println(charset); - } - } -} diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java index c240bc3c..b7d7e14b 100644 --- a/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java @@ -35,8 +35,6 @@ import nu.validator.htmlparser.common.ByteReadable; import nu.validator.htmlparser.common.Heuristics; -import nu.validator.htmlparser.extra.ChardetSniffer; -import nu.validator.htmlparser.extra.IcuDetectorSniffer; import nu.validator.htmlparser.impl.Tokenizer; import org.xml.sax.ErrorHandler; @@ -135,15 +133,6 @@ public HtmlInputStreamReader(InputStream inputStream, + encoding.getCanonName() + "\u201D used. Documents must use UTF-8."); } - if (encoding == null - && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) { - encoding = (new ChardetSniffer(byteArray, limit)).sniff(); - } - if (encoding == null - && (heuristics == Heuristics.ICU || heuristics == Heuristics.ALL)) { - position = 0; - encoding = (new IcuDetectorSniffer(this)).sniff(); - } sniffing = false; if (encoding == null) { encoding = Encoding.WINDOWS1252; From 51b5c920e2bf3cfb1a790290c2a1f24738c27761 Mon Sep 17 00:00:00 2001 From: Anthony Vanelverdinghe Date: Fri, 14 Aug 2020 15:28:14 +0200 Subject: [PATCH 30/30] Bump version number to 2.0 --- htmlparser/pom.xml | 2 +- pom.xml | 8 ++++---- saxtree/pom.xml | 2 +- xom/pom.xml | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/htmlparser/pom.xml b/htmlparser/pom.xml index c05bdab9..3922ae75 100644 --- a/htmlparser/pom.xml +++ b/htmlparser/pom.xml @@ -26,7 +26,7 @@ nu.validator.htmlparser parent - 1.4 + 2.0 htmlparser diff --git a/pom.xml b/pom.xml index bd683ef2..8720018f 100644 --- a/pom.xml +++ b/pom.xml @@ -42,7 +42,7 @@ nu.validator.htmlparser parent - 1.4 + 2.0 pom parent @@ -92,17 +92,17 @@ nu.validator.htmlparser htmlparser - 1.4 + 2.0 nu.validator.htmlparser saxtree - 1.4 + 2.0 nu.validator.htmlparser xom - 1.4 + 2.0 diff --git a/saxtree/pom.xml b/saxtree/pom.xml index b51ca7bf..fe31a185 100644 --- a/saxtree/pom.xml +++ b/saxtree/pom.xml @@ -26,7 +26,7 @@ nu.validator.htmlparser parent - 1.4 + 2.0 saxtree diff --git a/xom/pom.xml b/xom/pom.xml index eab9c5c1..4f3f6521 100644 --- a/xom/pom.xml +++ b/xom/pom.xml @@ -26,7 +26,7 @@ nu.validator.htmlparser parent - 1.4 + 2.0 xom