1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package com.quiotix.html.parser;
17
18 import java.util.ArrayList;
19 import java.util.Iterator;
20 import java.util.List;
21
22
23
24
25
26
27
28
29
30
31
32
33
34 public class HtmlDocument implements Visitable {
35 ElementSequence elements;
36
37
38 public HtmlDocument(ElementSequence s) {
39 elements = s;
40 }
41
42 public void accept(HtmlVisitor v) {
43 v.visit(this);
44 }
45
46 private static String dequote(String s) {
47 if (s == null)
48 return "";
49 if ((s.startsWith("\"") && s.endsWith("\"")) ||
50 (s.startsWith("'") && s.endsWith("'")))
51 return s.substring(1, s.length()-1);
52 else
53 return s;
54 }
55
56
57
58
59
60
61
62
63
64
65
66
67
68 public static abstract class HtmlElement implements Visitable, Sized {
69 public abstract void accept(HtmlVisitor v);
70 }
71
72
73
74
75 public static class Tag extends HtmlElement {
76
77 public String tagName;
78
79 public AttributeList attributeList;
80
81
82
83
84
85 public boolean emptyTag = false;
86
87
88 public Tag(String t, AttributeList a) {
89 tagName = t;
90 attributeList = a;
91 }
92
93
94 public void setEmpty(boolean b) {
95 emptyTag = b;
96 }
97
98 public void accept(HtmlVisitor v) {
99 v.visit(this);
100 }
101
102
103 public boolean hasAttribute(String name) {
104 return attributeList.contains(name);
105 }
106
107
108
109
110
111 public boolean hasAttributeValue(String name) {
112 return attributeList.hasValue(name);
113 }
114
115
116
117
118 public String getAttributeValue(String name) {
119 return attributeList.getValue(name);
120 }
121
122 public int getLength() {
123 int length = 0;
124 for (Iterator iterator = attributeList.attributes.iterator(); iterator.hasNext();) {
125 Attribute attribute = (Attribute) iterator.next();
126 length += 1 + (attribute.getLength());
127 }
128 return length + tagName.length() + 2 + (emptyTag ? 1 : 0);
129 }
130
131 public String toString() {
132 StringBuffer s = new StringBuffer();
133 s.append("<");
134 s.append(tagName);
135 for (Iterator iterator = attributeList.attributes.iterator(); iterator.hasNext();) {
136 Attribute attribute = (Attribute) iterator.next();
137 s.append(" ");
138 s.append(attribute.toString());
139 }
140 if (emptyTag) s.append("/");
141 s.append(">");
142 return s.toString();
143 }
144 }
145
146
147
148
149 public static class EndTag extends HtmlElement {
150
151
152 public String tagName;
153
154
155 public EndTag(String t) {
156 tagName = t;
157 }
158
159 public void accept(HtmlVisitor v) {
160 v.visit(this);
161 }
162
163 public int getLength() {
164 return 3 + tagName.length();
165 }
166
167 public String toString() {
168 return "</" + tagName + ">";
169 }
170 }
171
172
173
174
175
176 public static class TagBlock extends HtmlElement {
177
178 public Tag startTag;
179
180 public EndTag endTag;
181
182 public ElementSequence body;
183
184
185 public TagBlock(String name, AttributeList aList, ElementSequence b) {
186 startTag = new Tag(name, aList);
187 endTag = new EndTag(name);
188 body = b;
189 }
190
191 public void accept(HtmlVisitor v) {
192 v.visit(this);
193 }
194
195 public int getLength() {
196 int bodyLength = 0;
197 for (Iterator iterator = body.iterator(); iterator.hasNext();) {
198 HtmlDocument.HtmlElement htmlElement = (HtmlDocument.HtmlElement) iterator.next();
199 bodyLength += htmlElement.getLength();
200 }
201 return startTag.getLength() + bodyLength + endTag.getLength();
202 }
203
204 public String toString() {
205 StringBuffer sb = new StringBuffer();
206 sb.append(startTag.toString());
207 for (Iterator iterator = body.iterator(); iterator.hasNext();) {
208 HtmlDocument.HtmlElement htmlElement = (HtmlDocument.HtmlElement) iterator.next();
209 sb.append(htmlElement.toString());
210 }
211 sb.append(endTag.toString());
212 return sb.toString();
213 }
214
215
216
217
218 public String text() {
219 StringBuffer sb = new StringBuffer();
220 for (Iterator iterator = body.iterator(); iterator.hasNext();) {
221 HtmlDocument.HtmlElement htmlElement = (HtmlDocument.HtmlElement) iterator.next();
222 if (htmlElement instanceof Text) {
223 sb.append(htmlElement.toString());
224 } else if(htmlElement instanceof TagBlock)
225 sb.append(((TagBlock)htmlElement).text());
226 }
227 return sb.toString();
228 }
229 }
230
231
232
233
234 public static class Comment extends HtmlElement {
235
236
237
238 public String comment;
239
240
241 public Comment(String c) {
242 comment = c;
243 }
244
245 public void accept(HtmlVisitor v) {
246 v.visit(this);
247 }
248
249 public int getLength() {
250 return 3 + comment.length();
251 }
252
253 public String toString() {
254 return "<!" + comment + ">";
255 }
256 }
257
258
259
260
261 public static class Text extends HtmlElement {
262
263 public String text;
264
265
266 public Text(String t) {
267 text = t;
268 }
269
270 public void accept(HtmlVisitor v) {
271 v.visit(this);
272 }
273
274 public int getLength() {
275 return text.length();
276 }
277
278 public String toString() {
279 return text;
280 }
281 }
282
283
284
285
286 public static class Newline extends HtmlElement {
287
288 public static final String NL = System.getProperty("line.separator");
289
290 public void accept(HtmlVisitor v) {
291 v.visit(this);
292 }
293
294 public int getLength() {
295 return NL.length();
296 }
297
298 public String toString() {
299 return NL;
300 }
301 }
302
303
304
305
306 public static class ElementSequence {
307 private List elements;
308
309
310 public ElementSequence(int n) {
311 elements = new ArrayList(n);
312 }
313
314
315 public ElementSequence() {
316 elements = new ArrayList();
317 }
318
319
320 public void addElement(HtmlElement o) {
321 elements.add(o);
322 }
323
324
325
326
327 public int size() {
328 return elements.size();
329 }
330
331
332
333
334 public Iterator iterator() {
335 return elements.iterator();
336 }
337
338
339
340
341
342
343 public void setElements(List collection) {
344 elements.clear();
345 elements.addAll(collection);
346 }
347 }
348
349
350
351
352
353
354
355
356
357
358 public static class Annotation extends HtmlElement {
359 String type, text;
360
361
362 public Annotation(String type, String text) {
363 this.type = type;
364 this.text = text;
365 }
366
367 public void accept(HtmlVisitor v) {
368 v.visit(this);
369 }
370
371 public int getLength() {
372 return 14 + type.length() + text.length();
373 }
374
375 public String toString() {
376 return "<!--NOTE(" + type + ") " + text + "-->";
377 }
378 }
379
380
381
382
383 public static class Attribute implements Sized {
384
385 public String name;
386
387 public String value;
388
389 public boolean hasValue;
390
391
392 public Attribute(String n) {
393 name = n;
394 hasValue = false;
395 }
396
397
398 public Attribute(String n, String v) {
399 name = n;
400 if (v != null) {
401 value = v;
402 hasValue = true;
403 }
404 }
405
406
407
408
409
410
411
412 public int getLength() {
413 return (hasValue ? name.length() + 1 + value.length() : name.length());
414 }
415
416 public String toString() {
417 return (hasValue ? name + "=" + value : name);
418 }
419
420
421
422
423 public String getValue() {
424 return dequote(value);
425 }
426
427
428
429
430 public void setValue(String v) {
431 value = v;
432 if (v == null)
433 hasValue = false;
434 else
435 hasValue = true;
436 }
437 }
438
439
440
441
442 public static class AttributeList {
443
444 public List attributes = new ArrayList();
445
446
447 public void addAttribute(Attribute a) {
448 attributes.add(a);
449 }
450
451
452 public boolean contains(String name) {
453 for (Iterator iterator = attributes.iterator(); iterator.hasNext();) {
454 Attribute attribute = (Attribute) iterator.next();
455 if (attribute.name.equalsIgnoreCase(name))
456 return true;
457 }
458 return false;
459 }
460
461
462
463
464
465 public boolean hasValue(String name) {
466 for (Iterator iterator = attributes.iterator(); iterator.hasNext();) {
467 Attribute attribute = (Attribute) iterator.next();
468 if (attribute.name.equalsIgnoreCase(name) && attribute.hasValue)
469 return true;
470 }
471 return false;
472 }
473
474
475
476
477
478 public String getValue(String name) {
479 for (Iterator iterator = attributes.iterator(); iterator.hasNext();) {
480 Attribute attribute = (Attribute) iterator.next();
481 if (attribute.name.equalsIgnoreCase(name) && attribute.hasValue)
482 return dequote(attribute.value);
483 }
484 return null;
485 }
486 }
487 }
488
489
490