View Javadoc

1   /***
2    * Simple Web Spider - <http://simplewebspider.sourceforge.net/>
3    * Copyright (C) 2009  <berendona@users.sourceforge.net>
4    *
5    * This program is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  package simplespider.simplespider.bot.http.apache;
19  
20  import org.apache.commons.httpclient.HostConfiguration;
21  import org.apache.commons.httpclient.HttpConnectionManager;
22  import org.apache.commons.httpclient.HttpState;
23  import org.apache.commons.httpclient.ProxyHost;
24  import org.apache.commons.httpclient.cookie.CookiePolicy;
25  import org.apache.commons.httpclient.params.HttpClientParams;
26  import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
27  import org.apache.commons.httpclient.protocol.Protocol;
28  
29  import simplespider.simplespider.bot.http.HttpClient;
30  import simplespider.simplespider.bot.http.HttpClientFactory;
31  import simplespider.simplespider.bot.http.apache.ssl.TrustAllSSLProtocolSocketFactory;
32  import simplespider.simplespider.util.ValidityHelper;
33  
34  public class ApacheHttpClientFactory implements HttpClientFactory {
35  
36  	// TODO Configure this
37  	private static final String	USER_AGENT						= "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0)";
38  	// TODO Configure this
39  	private static final int	CONNECTION_TIMEOUT_MILLISECONDS	= 30000;
40  
41  	private final ProxyHost		proxyHost;
42  
43  	public ApacheHttpClientFactory() {
44  		this.proxyHost = null;
45  		setupSsl();
46  	}
47  
48  	public ApacheHttpClientFactory(final String proxyServer, final int proxyPort) {
49  		ValidityHelper.checkNotEmpty("proxyServer", proxyServer);
50  		this.proxyHost = new ProxyHost(proxyServer, proxyPort);
51  		setupSsl();
52  	}
53  
54  	private void setupSsl() {
55  		Protocol.registerProtocol("https", new Protocol("https", new TrustAllSSLProtocolSocketFactory(), 443));
56  	}
57  
58  	/*
59  	 * (non-Javadoc)
60  	 * @see simplespider.simplespider_core.http.HttpClientFactory#buildHttpClient()
61  	 */
62  	public HttpClient buildHttpClient() {
63  		final org.apache.commons.httpclient.HttpClient httpClient = new org.apache.commons.httpclient.HttpClient();
64  
65  		if (this.proxyHost != null) {
66  			final HostConfiguration hostConfiguration = httpClient.getHostConfiguration();
67  			hostConfiguration.setProxyHost(this.proxyHost);
68  		}
69  
70  		final HttpConnectionManager httpConnectionManager = httpClient.getHttpConnectionManager();
71  		final HttpConnectionManagerParams httpConnectionManagerParams = httpConnectionManager.getParams();
72  		httpConnectionManagerParams.setConnectionTimeout(CONNECTION_TIMEOUT_MILLISECONDS);
73  
74  		// Get initial state object
75  		final HttpState initialState = new HttpState();
76  		httpClient.setState(initialState);
77  
78  		final HttpClientParams clientParams = httpClient.getParams();
79  		// More browser like behavior
80  		clientParams.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
81  		// More browser like behavior
82  		clientParams.makeLenient();
83  		// Setting client global socket timeout
84  		clientParams.setSoTimeout(CONNECTION_TIMEOUT_MILLISECONDS);
85  		// Setting user agent
86  		clientParams.setParameter("http.useragent", USER_AGENT);
87  
88  		return new ApacheHttpClient(httpClient);
89  	}
90  }