1 /***
2 * Simple Web Spider - <http://simplewebspider.sourceforge.net/>
3 * Copyright (C) 2009 <berendona@users.sourceforge.net>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 package simplespider.simplespider.bot.http.apache;
19
20 import org.apache.commons.httpclient.HostConfiguration;
21 import org.apache.commons.httpclient.HttpConnectionManager;
22 import org.apache.commons.httpclient.HttpState;
23 import org.apache.commons.httpclient.ProxyHost;
24 import org.apache.commons.httpclient.cookie.CookiePolicy;
25 import org.apache.commons.httpclient.params.HttpClientParams;
26 import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
27 import org.apache.commons.httpclient.protocol.Protocol;
28
29 import simplespider.simplespider.bot.http.HttpClient;
30 import simplespider.simplespider.bot.http.HttpClientFactory;
31 import simplespider.simplespider.bot.http.apache.ssl.TrustAllSSLProtocolSocketFactory;
32 import simplespider.simplespider.util.ValidityHelper;
33
34 public class ApacheHttpClientFactory implements HttpClientFactory {
35
36
37 private static final String USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0)";
38
39 private static final int CONNECTION_TIMEOUT_MILLISECONDS = 30000;
40
41 private final ProxyHost proxyHost;
42
43 public ApacheHttpClientFactory() {
44 this.proxyHost = null;
45 setupSsl();
46 }
47
48 public ApacheHttpClientFactory(final String proxyServer, final int proxyPort) {
49 ValidityHelper.checkNotEmpty("proxyServer", proxyServer);
50 this.proxyHost = new ProxyHost(proxyServer, proxyPort);
51 setupSsl();
52 }
53
54 private void setupSsl() {
55 Protocol.registerProtocol("https", new Protocol("https", new TrustAllSSLProtocolSocketFactory(), 443));
56 }
57
58
59
60
61
62 public HttpClient buildHttpClient() {
63 final org.apache.commons.httpclient.HttpClient httpClient = new org.apache.commons.httpclient.HttpClient();
64
65 if (this.proxyHost != null) {
66 final HostConfiguration hostConfiguration = httpClient.getHostConfiguration();
67 hostConfiguration.setProxyHost(this.proxyHost);
68 }
69
70 final HttpConnectionManager httpConnectionManager = httpClient.getHttpConnectionManager();
71 final HttpConnectionManagerParams httpConnectionManagerParams = httpConnectionManager.getParams();
72 httpConnectionManagerParams.setConnectionTimeout(CONNECTION_TIMEOUT_MILLISECONDS);
73
74
75 final HttpState initialState = new HttpState();
76 httpClient.setState(initialState);
77
78 final HttpClientParams clientParams = httpClient.getParams();
79
80 clientParams.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
81
82 clientParams.makeLenient();
83
84 clientParams.setSoTimeout(CONNECTION_TIMEOUT_MILLISECONDS);
85
86 clientParams.setParameter("http.useragent", USER_AGENT);
87
88 return new ApacheHttpClient(httpClient);
89 }
90 }