1 /***
2 * Simple Web Spider - <http://simplewebspider.sourceforge.net/>
3 * Copyright (C) 2009 <berendona@users.sourceforge.net>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41 package simplespider.simplespider.bot.extractor.html.stream;
42
43 import java.io.IOException;
44 import java.io.Writer;
45 import java.util.Properties;
46
47 import org.apache.commons.logging.Log;
48 import org.apache.commons.logging.LogFactory;
49
50 final class TagWriter extends Writer {
51
52 private static final Log LOG = LogFactory.getLog(TagWriter.class);
53
54 private static final char singlequote = '\'';
55 private static final char doublequote = '"';
56 private static final char equal = '=';
57
58 private char[] buffer;
59 private int offset;
60 private int length;
61
62 public TagWriter() {
63 this.buffer = new char[10];
64 this.length = 0;
65 this.offset = 0;
66 }
67
68 public TagWriter(final int initLength) {
69 this.buffer = new char[initLength];
70 this.length = 0;
71 this.offset = 0;
72 }
73
74 public TagWriter(final char[] bb) {
75 this.buffer = bb;
76 this.length = bb.length;
77 this.offset = 0;
78 }
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133 public void clear() {
134 this.buffer = new char[0];
135 this.length = 0;
136 this.offset = 0;
137 }
138
139 public int length() {
140 return this.length;
141 }
142
143 private void grow() {
144 int newsize = this.buffer.length * 2 + 1;
145 if (newsize < 256) {
146 newsize = 256;
147 }
148 if (LOG.isDebugEnabled()) {
149 LOG.debug("Increase tag writer buffer: from " + this.buffer.length + " to " + newsize);
150 }
151 final char[] tmp = new char[newsize];
152 System.arraycopy(this.buffer, this.offset, tmp, 0, this.length);
153 this.buffer = tmp;
154 this.offset = 0;
155 }
156
157 @Override
158 public void write(final int b) {
159 write((char) b);
160 }
161
162 public void write(final char b) {
163 if (this.offset + this.length + 1 > this.buffer.length) {
164 grow();
165 }
166 this.buffer[this.offset + this.length++] = b;
167 }
168
169 @Override
170 public void write(final char[] bb) {
171 write(bb, 0, bb.length);
172 }
173
174 @Override
175 public void write(final char[] bb, final int of, final int le) {
176 while (this.offset + this.length + le > this.buffer.length) {
177 grow();
178 }
179 System.arraycopy(bb, of, this.buffer, this.offset + this.length, le);
180 this.length += le;
181 }
182
183
184
185
186
187
188
189
190
191 public TagWriter append(final int i) {
192 write((char) (i));
193 return this;
194 }
195
196 public TagWriter append(final char[] bb) {
197 write(bb);
198 return this;
199 }
200
201 public TagWriter append(final char[] bb, final int of, final int le) {
202 write(bb, of, le);
203 return this;
204 }
205
206 public TagWriter append(final String s) {
207 return append(s, 0, s.length());
208 }
209
210 public TagWriter append(final String s, final int off, final int len) {
211 final char[] temp = new char[len];
212 s.getChars(off, (off + len), temp, 0);
213 return append(temp);
214 }
215
216 public TagWriter append(final TagWriter bb) {
217 return append(bb.buffer, bb.offset, bb.length);
218 }
219
220
221
222
223
224
225
226 public char charAt(final int pos) {
227 if (pos < 0) {
228 throw new IndexOutOfBoundsException();
229 }
230 if (pos > this.length) {
231 throw new IndexOutOfBoundsException();
232 }
233 return this.buffer[this.offset + pos];
234 }
235
236 public void deleteCharAt(final int pos) {
237 if (pos < 0) {
238 return;
239 }
240 if (pos >= this.length) {
241 return;
242 }
243 if (pos == this.length - 1) {
244 this.length--;
245 } else {
246 System.arraycopy(this.buffer, this.offset + pos + 1, this.buffer, this.offset + pos, this.length - pos - 1);
247 }
248 }
249
250 public int indexOf(final char b) {
251 return indexOf(b, 0);
252 }
253
254 public int indexOf(final char[] bs) {
255 return indexOf(bs, 0);
256 }
257
258 public int indexOf(final char b, final int start) {
259 if (start >= this.length) {
260 return -1;
261 }
262 for (int i = start; i < this.length; i++) {
263 if (this.buffer[this.offset + i] == b) {
264 return i;
265 }
266 }
267 return -1;
268 }
269
270 public int indexOf(final char[] bs, final int start) {
271 if (start + bs.length > this.length) {
272 return -1;
273 }
274 loop: for (int i = start; i <= this.length - bs.length; i++) {
275
276 if (this.buffer[this.offset + i] != bs[0]) {
277 continue loop;
278 }
279
280
281 for (int j = 1; j < bs.length; j++) {
282 if (this.buffer[this.offset + i + j] != bs[j]) {
283 continue loop;
284 }
285 }
286
287
288 return i;
289 }
290 return -1;
291 }
292
293 public int lastIndexOf(final char b) {
294 for (int i = this.length - 1; i >= 0; i--) {
295 if (this.buffer[this.offset + i] == b) {
296 return i;
297 }
298 }
299 return -1;
300 }
301
302 public boolean startsWith(final char[] bs) {
303 if (this.length < bs.length) {
304 return false;
305 }
306 for (int i = 0; i < bs.length; i++) {
307 if (this.buffer[this.offset + i] != bs[i]) {
308 return false;
309 }
310 }
311 return true;
312 }
313
314 public char[] getChars() {
315 return getChars(0);
316 }
317
318 public char[] getChars(final int start) {
319 return getChars(start, this.length);
320 }
321
322 public char[] getChars(final int start, final int end) {
323
324 if (end > this.length) {
325 throw new IndexOutOfBoundsException("getBytes: end > length");
326 }
327 if (start > this.length) {
328 throw new IndexOutOfBoundsException("getBytes: start > length");
329 }
330 final char[] tmp = new char[end - start];
331 System.arraycopy(this.buffer, this.offset + start, tmp, 0, end - start);
332 return tmp;
333 }
334
335 public TagWriter trim(final int start) {
336
337 if (start > this.length) {
338 throw new IndexOutOfBoundsException("trim: start > length");
339 }
340 this.offset = this.offset + start;
341 this.length = this.length - start;
342 return this;
343 }
344
345 public TagWriter trim(final int start, final int end) {
346
347 if (start > this.length) {
348 throw new IndexOutOfBoundsException("trim: start > length");
349 }
350 if (end > this.length) {
351 throw new IndexOutOfBoundsException("trim: end > length");
352 }
353 if (start > end) {
354 throw new IndexOutOfBoundsException("trim: start > end");
355 }
356 this.offset = this.offset + start;
357 this.length = end - start;
358 return this;
359 }
360
361 public TagWriter trim() {
362 int l = 0;
363 while ((l < this.length) && (this.buffer[this.offset + l] <= ' ')) {
364 l++;
365 }
366 int r = this.length;
367 while ((r > 0) && (this.buffer[this.offset + r - 1] <= ' ')) {
368 r--;
369 }
370 if (l > r) {
371 r = l;
372 }
373 return trim(l, r);
374 }
375
376 public boolean isWhitespace(final boolean includeNonLetterBytes) {
377
378 if (includeNonLetterBytes) {
379 char b;
380 for (int i = 0; i < this.length; i++) {
381 b = this.buffer[this.offset + i];
382 if (((b >= '0') && (b <= '9')) || ((b >= 'A') && (b <= 'Z')) || ((b >= 'a') && (b <= 'z'))) {
383 return false;
384 }
385 }
386 } else {
387 for (int i = 0; i < this.length; i++) {
388 if (this.buffer[this.offset + i] > 32) {
389 return false;
390 }
391 }
392 }
393 return true;
394 }
395
396 public int whitespaceStart(final boolean includeNonLetterBytes) {
397
398 if (includeNonLetterBytes) {
399 char b;
400 for (int i = 0; i < this.length; i++) {
401 b = this.buffer[this.offset + i];
402 if (((b >= '0') && (b <= '9')) || ((b >= 'A') && (b <= 'Z')) || ((b >= 'a') && (b <= 'z'))) {
403 return i;
404 }
405 }
406 } else {
407 for (int i = 0; i < this.length; i++) {
408 if (this.buffer[this.offset + i] > 32) {
409 return i;
410 }
411 }
412 }
413 return this.length;
414 }
415
416 public int whitespaceEnd(final boolean includeNonLetterBytes) {
417
418 if (includeNonLetterBytes) {
419 char b;
420 for (int i = this.length - 1; i >= 0; i--) {
421 b = this.buffer[this.offset + i];
422 if (((b >= '0') && (b <= '9')) || ((b >= 'A') && (b <= 'Z')) || ((b >= 'a') && (b <= 'z'))) {
423 return i + 1;
424 }
425 }
426 } else {
427 for (int i = this.length - 1; i >= 0; i--) {
428 if (this.buffer[this.offset + i] > 32) {
429 return i + 1;
430 }
431 }
432 }
433 return 0;
434 }
435
436 @Override
437 public String toString() {
438 return new String(this.buffer, this.offset, this.length);
439 }
440
441 public String toString(final int left, final int rightbound) {
442 return new String(this.buffer, this.offset + left, rightbound - left);
443 }
444
445 public Properties propParser() {
446
447 int pos = this.offset;
448 int start;
449 String key;
450 final Properties p = new Properties();
451
452 while ((pos < this.length) && (this.buffer[pos] <= 32)) {
453 pos++;
454 }
455 while (pos < this.length) {
456
457 start = pos;
458 while ((pos < this.length) && (this.buffer[pos] != equal)) {
459 pos++;
460 }
461 if (pos >= this.length) {
462 break;
463 }
464 key = new String(this.buffer, start, pos - start).trim().toLowerCase();
465
466 pos++;
467
468 while ((pos < this.length) && (this.buffer[pos] <= 32)) {
469 pos++;
470 }
471
472
473 if (pos >= this.length) {
474
475 break;
476 } else if (this.buffer[pos] == doublequote) {
477
478 pos++;
479 start = pos;
480 while ((pos < this.length) && (this.buffer[pos] != doublequote)) {
481 pos++;
482 }
483 if (pos >= this.length) {
484 break;
485 }
486 p.setProperty(key, new String(this.buffer, start, pos - start).trim());
487 pos++;
488 } else if (this.buffer[pos] == singlequote) {
489
490 pos++;
491 start = pos;
492 while ((pos < this.length) && (this.buffer[pos] != singlequote)) {
493 pos++;
494 }
495 if (pos >= this.length) {
496 break;
497 }
498 p.setProperty(key, new String(this.buffer, start, pos - start).trim());
499 pos++;
500 } else {
501
502 start = pos;
503 while ((pos < this.length) && (this.buffer[pos] > 32)) {
504 pos++;
505 }
506 p.setProperty(key, new String(this.buffer, start, pos - start).trim());
507 }
508
509 while ((pos < this.length) && (this.buffer[pos] <= 32)) {
510 pos++;
511
512 }
513 }
514 return p;
515 }
516
517 public static boolean equals(final char[] buffer, final char[] pattern) {
518 return equals(buffer, 0, pattern);
519 }
520
521 public static boolean equals(final char[] buffer, final int offset, final char[] pattern) {
522
523 if (buffer.length < offset + pattern.length) {
524 return false;
525 }
526 for (int i = 0; i < pattern.length; i++) {
527 if (buffer[offset + i] != pattern[i]) {
528 return false;
529 }
530 }
531 return true;
532 }
533
534 public void reset() {
535 this.length = 0;
536 this.offset = 0;
537 }
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556 public char toCharArray()[] {
557 final char[] newbuf = new char[this.length];
558 System.arraycopy(this.buffer, 0, newbuf, 0, this.length);
559 return newbuf;
560 }
561
562 @Override
563 public void close() throws IOException {
564
565 }
566
567 @Override
568 public void flush() throws IOException {
569
570 }
571
572 }