View Javadoc

1   /***
2    * Simple Web Spider - <http://simplewebspider.sourceforge.net/>
3    * Copyright (C) 2009  <berendona@users.sourceforge.net>
4    *
5    * This program is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  package simplespider.simplespider;
19  
20  import java.util.Calendar;
21  import java.util.Date;
22  import java.util.Iterator;
23  import java.util.LinkedList;
24  import java.util.List;
25  import java.util.concurrent.TimeUnit;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  
30  public class LimitThroughPut {
31  
32  	private static final Log	LOG		= LogFactory.getLog(LimitThroughPut.class);
33  
34  	final private int			maxPerMinute;
35  	final private List<Date>	times	= new LinkedList<Date>();
36  
37  	public LimitThroughPut(final int maxPerMinute) {
38  		this.maxPerMinute = maxPerMinute;
39  	}
40  
41  	public void next() {
42  		final long wait = cleanup();
43  		if (LOG.isDebugEnabled()) {
44  			LOG.debug("Wait for milliseconds: " + wait);
45  		}
46  		try {
47  			TimeUnit.MILLISECONDS.sleep(wait);
48  		} catch (final InterruptedException e) {
49  			LOG.warn("Sleep was interrupted", e);
50  		}
51  		put();
52  	}
53  
54  	private void put() {
55  		this.times.add(new Date());
56  	}
57  
58  	private long cleanup() {
59  		final Date beforeOneMinute = getDateBeforeOneMinute();
60  
61  		Date mostBlocking = null;
62  
63  		final int beforeCleanup = this.times.size();
64  
65  		for (final Iterator<Date> iterator = this.times.iterator(); iterator.hasNext();) {
66  			final Date item = iterator.next();
67  
68  			if (item.before(beforeOneMinute)) {
69  				// Removing all timestamps before the last minute
70  				iterator.remove();
71  			} else if (this.times.size() < this.maxPerMinute) {
72  				// If less than maximum allowed after removing all old timestamp no job to do
73  				break;
74  			} else {
75  				// Remove timestamp, that is already in time span, but hold it
76  				iterator.remove();
77  				mostBlocking = item;
78  			}
79  		}
80  
81  		if (LOG.isDebugEnabled()) {
82  			LOG.debug("Cleanup - before: " + beforeCleanup + ", after: " + this.times.size() + ", max per minute: " + this.maxPerMinute);
83  		}
84  
85  		if (mostBlocking == null) {
86  			// If there is no blocking, return zero for sleeping
87  			return 0;
88  		}
89  
90  		return mostBlocking.getTime() - beforeOneMinute.getTime();
91  	}
92  
93  	private Date getDateBeforeOneMinute() {
94  		final Calendar calendar = Calendar.getInstance();
95  		calendar.add(Calendar.MINUTE, -1);
96  		final Date current = calendar.getTime();
97  		return current;
98  	}
99  
100 }