View Javadoc

1   /***
2    * Simple Web Spider - <http://simplewebspider.sourceforge.net/>
3    * Copyright (C) 2009  <berendona@users.sourceforge.net>
4    *
5    * This program is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  package simplespider.simplespider.util;
19  
20  /*** Based on de.anomic.tools.Punycode */
21  public final class Punycode {
22  	/* Punycode parameters */
23  	final static int	TMIN			= 1;
24  	final static int	TMAX			= 26;
25  	final static int	BASE			= 36;
26  	final static int	INITIAL_N		= 128;
27  	final static int	INITIAL_BIAS	= 72;
28  	final static int	DAMP			= 700;
29  	final static int	SKEW			= 38;
30  	final static char	DELIMITER		= '-';
31  
32  	private Punycode() {
33  		// Only static functions
34  	}
35  
36  	/***
37  	 * Punycodes a unicode string.
38  	 * 
39  	 * @param input
40  	 *            Unicode string.
41  	 * @return Punycoded string.
42  	 */
43  	public static String encode(final String input) throws PunycodeException {
44  		int n = INITIAL_N;
45  		int delta = 0;
46  		int bias = INITIAL_BIAS;
47  		final StringBuilder output = new StringBuilder();
48  
49  		// Copy all basic code points to the output
50  		int b = 0;
51  		for (int i = 0; i < input.length(); i++) {
52  			final char c = input.charAt(i);
53  			if (isBasic(c)) {
54  				output.append(c);
55  				b++;
56  			}
57  		}
58  
59  		// Append delimiter
60  		if (b > 0) {
61  			output.append(DELIMITER);
62  		}
63  
64  		int h = b;
65  		while (h < input.length()) {
66  			int m = Integer.MAX_VALUE;
67  
68  			// Find the minimum code point >= n
69  			for (int i = 0; i < input.length(); i++) {
70  				final int c = input.charAt(i);
71  				if (c >= n && c < m) {
72  					m = c;
73  				}
74  			}
75  
76  			if (m - n > (Integer.MAX_VALUE - delta) / (h + 1)) {
77  				throw new PunycodeException(PunycodeException.OVERFLOW);
78  			}
79  			delta = delta + (m - n) * (h + 1);
80  			n = m;
81  
82  			for (int j = 0; j < input.length(); j++) {
83  				final int c = input.charAt(j);
84  				if (c < n) {
85  					delta++;
86  					if (0 == delta) {
87  						throw new PunycodeException(PunycodeException.OVERFLOW);
88  					}
89  				}
90  				if (c == n) {
91  					int q = delta;
92  
93  					for (int k = BASE;; k += BASE) {
94  						int t;
95  						if (k <= bias) {
96  							t = TMIN;
97  						} else if (k >= bias + TMAX) {
98  							t = TMAX;
99  						} else {
100 							t = k - bias;
101 						}
102 						if (q < t) {
103 							break;
104 						}
105 						output.append((char) digit2codepoint(t + (q - t) % (BASE - t)));
106 						q = (q - t) / (BASE - t);
107 					}
108 
109 					output.append((char) digit2codepoint(q));
110 					bias = adapt(delta, h + 1, h == b);
111 					delta = 0;
112 					h++;
113 				}
114 			}
115 
116 			delta++;
117 			n++;
118 		}
119 
120 		return output.toString();
121 	}
122 
123 	/***
124 	 * Decode a punycoded string.
125 	 * 
126 	 * @param input
127 	 *            Punycode string
128 	 * @return Unicode string.
129 	 */
130 	public static String decode(final String input) throws PunycodeException {
131 		int n = INITIAL_N;
132 		int i = 0;
133 		int bias = INITIAL_BIAS;
134 		final StringBuilder output = new StringBuilder();
135 
136 		int d = input.lastIndexOf(DELIMITER);
137 		if (d > 0) {
138 			for (int j = 0; j < d; j++) {
139 				final char c = input.charAt(j);
140 				if (!isBasic(c)) {
141 					throw new PunycodeException(PunycodeException.BAD_INPUT);
142 				}
143 				output.append(c);
144 			}
145 			d++;
146 		} else {
147 			d = 0;
148 		}
149 
150 		while (d < input.length()) {
151 			final int oldi = i;
152 			int w = 1;
153 
154 			for (int k = BASE;; k += BASE) {
155 				if (d == input.length()) {
156 					throw new PunycodeException(PunycodeException.BAD_INPUT);
157 				}
158 				final int c = input.charAt(d++);
159 				final int digit = codepoint2digit(c);
160 				if (digit > (Integer.MAX_VALUE - i) / w) {
161 					throw new PunycodeException(PunycodeException.OVERFLOW);
162 				}
163 
164 				i = i + digit * w;
165 
166 				int t;
167 				if (k <= bias) {
168 					t = TMIN;
169 				} else if (k >= bias + TMAX) {
170 					t = TMAX;
171 				} else {
172 					t = k - bias;
173 				}
174 				if (digit < t) {
175 					break;
176 				}
177 				w = w * (BASE - t);
178 			}
179 
180 			bias = adapt(i - oldi, output.length() + 1, oldi == 0);
181 
182 			if (i / (output.length() + 1) > Integer.MAX_VALUE - n) {
183 				throw new PunycodeException(PunycodeException.OVERFLOW);
184 			}
185 
186 			n = n + i / (output.length() + 1);
187 			i = i % (output.length() + 1);
188 			output.insert(i, (char) n);
189 			i++;
190 		}
191 
192 		return output.toString();
193 	}
194 
195 	public final static int adapt(int delta, final int numpoints, final boolean first) {
196 		if (first) {
197 			delta = delta / DAMP;
198 		} else {
199 			delta = delta / 2;
200 		}
201 
202 		delta = delta + (delta / numpoints);
203 
204 		int k = 0;
205 		while (delta > ((BASE - TMIN) * TMAX) / 2) {
206 			delta = delta / (BASE - TMIN);
207 			k = k + BASE;
208 		}
209 
210 		return k + ((BASE - TMIN + 1) * delta) / (delta + SKEW);
211 	}
212 
213 	public final static boolean isBasic(final char c) {
214 		return c < 0x80;
215 	}
216 
217 	// the following method has been added by Michael Christen
218 	public static boolean isBasic(final String input) {
219 		for (int j = 0; j < input.length(); j++) {
220 			if (!isBasic(input.charAt(j))) {
221 				return false;
222 			}
223 		}
224 		return true;
225 	}
226 
227 	public final static int digit2codepoint(final int d) throws PunycodeException {
228 		if (d < 26) {
229 			// 0..25 : 'a'..'z'
230 			return d + 'a';
231 		} else if (d < 36) {
232 			// 26..35 : '0'..'9';
233 			return d - 26 + '0';
234 		} else {
235 			throw new PunycodeException(PunycodeException.BAD_INPUT);
236 		}
237 	}
238 
239 	public final static int codepoint2digit(final int c) throws PunycodeException {
240 		if (c - '0' < 10) {
241 			// '0'..'9' : 26..35
242 			return c - '0' + 26;
243 		} else if (c - 'a' < 26) {
244 			// 'a'..'z' : 0..25
245 			return c - 'a';
246 		} else {
247 			throw new PunycodeException(PunycodeException.BAD_INPUT);
248 		}
249 	}
250 
251 	public static class PunycodeException extends Exception {
252 		/***
253      * 
254      */
255 		private static final long	serialVersionUID	= 1L;
256 		public static final String	OVERFLOW			= "Overflow.";
257 		public static final String	BAD_INPUT			= "Bad input.";
258 
259 		/***
260 		 * Creates a new PunycodeException.
261 		 * 
262 		 * @param m
263 		 *            message.
264 		 */
265 		public PunycodeException(final String m) {
266 			super(m);
267 		}
268 	}
269 }