1 /***
2 * Simple Web Spider - <http://simplewebspider.sourceforge.net/>
3 * Copyright (C) 2009 <berendona@users.sourceforge.net>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 package simplespider.simplespider.util;
19
20 /*** Based on de.anomic.tools.Punycode */
21 public final class Punycode {
22
23 final static int TMIN = 1;
24 final static int TMAX = 26;
25 final static int BASE = 36;
26 final static int INITIAL_N = 128;
27 final static int INITIAL_BIAS = 72;
28 final static int DAMP = 700;
29 final static int SKEW = 38;
30 final static char DELIMITER = '-';
31
32 private Punycode() {
33
34 }
35
36 /***
37 * Punycodes a unicode string.
38 *
39 * @param input
40 * Unicode string.
41 * @return Punycoded string.
42 */
43 public static String encode(final String input) throws PunycodeException {
44 int n = INITIAL_N;
45 int delta = 0;
46 int bias = INITIAL_BIAS;
47 final StringBuilder output = new StringBuilder();
48
49
50 int b = 0;
51 for (int i = 0; i < input.length(); i++) {
52 final char c = input.charAt(i);
53 if (isBasic(c)) {
54 output.append(c);
55 b++;
56 }
57 }
58
59
60 if (b > 0) {
61 output.append(DELIMITER);
62 }
63
64 int h = b;
65 while (h < input.length()) {
66 int m = Integer.MAX_VALUE;
67
68
69 for (int i = 0; i < input.length(); i++) {
70 final int c = input.charAt(i);
71 if (c >= n && c < m) {
72 m = c;
73 }
74 }
75
76 if (m - n > (Integer.MAX_VALUE - delta) / (h + 1)) {
77 throw new PunycodeException(PunycodeException.OVERFLOW);
78 }
79 delta = delta + (m - n) * (h + 1);
80 n = m;
81
82 for (int j = 0; j < input.length(); j++) {
83 final int c = input.charAt(j);
84 if (c < n) {
85 delta++;
86 if (0 == delta) {
87 throw new PunycodeException(PunycodeException.OVERFLOW);
88 }
89 }
90 if (c == n) {
91 int q = delta;
92
93 for (int k = BASE;; k += BASE) {
94 int t;
95 if (k <= bias) {
96 t = TMIN;
97 } else if (k >= bias + TMAX) {
98 t = TMAX;
99 } else {
100 t = k - bias;
101 }
102 if (q < t) {
103 break;
104 }
105 output.append((char) digit2codepoint(t + (q - t) % (BASE - t)));
106 q = (q - t) / (BASE - t);
107 }
108
109 output.append((char) digit2codepoint(q));
110 bias = adapt(delta, h + 1, h == b);
111 delta = 0;
112 h++;
113 }
114 }
115
116 delta++;
117 n++;
118 }
119
120 return output.toString();
121 }
122
123 /***
124 * Decode a punycoded string.
125 *
126 * @param input
127 * Punycode string
128 * @return Unicode string.
129 */
130 public static String decode(final String input) throws PunycodeException {
131 int n = INITIAL_N;
132 int i = 0;
133 int bias = INITIAL_BIAS;
134 final StringBuilder output = new StringBuilder();
135
136 int d = input.lastIndexOf(DELIMITER);
137 if (d > 0) {
138 for (int j = 0; j < d; j++) {
139 final char c = input.charAt(j);
140 if (!isBasic(c)) {
141 throw new PunycodeException(PunycodeException.BAD_INPUT);
142 }
143 output.append(c);
144 }
145 d++;
146 } else {
147 d = 0;
148 }
149
150 while (d < input.length()) {
151 final int oldi = i;
152 int w = 1;
153
154 for (int k = BASE;; k += BASE) {
155 if (d == input.length()) {
156 throw new PunycodeException(PunycodeException.BAD_INPUT);
157 }
158 final int c = input.charAt(d++);
159 final int digit = codepoint2digit(c);
160 if (digit > (Integer.MAX_VALUE - i) / w) {
161 throw new PunycodeException(PunycodeException.OVERFLOW);
162 }
163
164 i = i + digit * w;
165
166 int t;
167 if (k <= bias) {
168 t = TMIN;
169 } else if (k >= bias + TMAX) {
170 t = TMAX;
171 } else {
172 t = k - bias;
173 }
174 if (digit < t) {
175 break;
176 }
177 w = w * (BASE - t);
178 }
179
180 bias = adapt(i - oldi, output.length() + 1, oldi == 0);
181
182 if (i / (output.length() + 1) > Integer.MAX_VALUE - n) {
183 throw new PunycodeException(PunycodeException.OVERFLOW);
184 }
185
186 n = n + i / (output.length() + 1);
187 i = i % (output.length() + 1);
188 output.insert(i, (char) n);
189 i++;
190 }
191
192 return output.toString();
193 }
194
195 public final static int adapt(int delta, final int numpoints, final boolean first) {
196 if (first) {
197 delta = delta / DAMP;
198 } else {
199 delta = delta / 2;
200 }
201
202 delta = delta + (delta / numpoints);
203
204 int k = 0;
205 while (delta > ((BASE - TMIN) * TMAX) / 2) {
206 delta = delta / (BASE - TMIN);
207 k = k + BASE;
208 }
209
210 return k + ((BASE - TMIN + 1) * delta) / (delta + SKEW);
211 }
212
213 public final static boolean isBasic(final char c) {
214 return c < 0x80;
215 }
216
217
218 public static boolean isBasic(final String input) {
219 for (int j = 0; j < input.length(); j++) {
220 if (!isBasic(input.charAt(j))) {
221 return false;
222 }
223 }
224 return true;
225 }
226
227 public final static int digit2codepoint(final int d) throws PunycodeException {
228 if (d < 26) {
229
230 return d + 'a';
231 } else if (d < 36) {
232
233 return d - 26 + '0';
234 } else {
235 throw new PunycodeException(PunycodeException.BAD_INPUT);
236 }
237 }
238
239 public final static int codepoint2digit(final int c) throws PunycodeException {
240 if (c - '0' < 10) {
241
242 return c - '0' + 26;
243 } else if (c - 'a' < 26) {
244
245 return c - 'a';
246 } else {
247 throw new PunycodeException(PunycodeException.BAD_INPUT);
248 }
249 }
250
251 public static class PunycodeException extends Exception {
252 /***
253 *
254 */
255 private static final long serialVersionUID = 1L;
256 public static final String OVERFLOW = "Overflow.";
257 public static final String BAD_INPUT = "Bad input.";
258
259 /***
260 * Creates a new PunycodeException.
261 *
262 * @param m
263 * message.
264 */
265 public PunycodeException(final String m) {
266 super(m);
267 }
268 }
269 }