1 /***
2 * Simple Web Spider - <http://simplewebspider.sourceforge.net/>
3 * Copyright (C) 2009 <berendona@users.sourceforge.net>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 package simplespider.simplespider.importing.simplefile;
20
21 import java.io.BufferedReader;
22 import java.io.FileNotFoundException;
23 import java.io.FileReader;
24 import java.io.IOException;
25 import java.sql.SQLException;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29
30 import simplespider.simplespider.dao.DbHelper;
31 import simplespider.simplespider.dao.DbHelperFactory;
32 import simplespider.simplespider.dao.LinkDao;
33 import simplespider.simplespider.importing.EntityImporter;
34 import simplespider.simplespider.util.SimpleUrl;
35
36 public class SimpleFileImporter implements EntityImporter {
37
38 private static final Log LOG = LogFactory.getLog(SimpleFileImporter.class);
39
40 private final String filename;
41
42 public SimpleFileImporter(final String filename) {
43 this.filename = filename;
44 }
45
46 @Override
47 public long importLink(final DbHelperFactory dbHelperFactory) {
48
49
50 final FileReader fstream;
51 try {
52 fstream = new FileReader(this.filename);
53 } catch (final FileNotFoundException e) {
54 LOG.warn("Import links fails: Failed to open file \"" + this.filename + "\"", e);
55 return 0;
56 }
57
58 long count = 0;
59
60 try {
61 final DbHelper dbHelper = dbHelperFactory.buildDbHelper();
62 try {
63 final BufferedReader br = new BufferedReader(fstream);
64 try {
65
66 final LinkDao linkDao = dbHelper.getLinkDao();
67
68 String strLine;
69
70 while ((strLine = br.readLine()) != null) {
71 if (LOG.isDebugEnabled()) {
72 LOG.debug("Try to import link \"" + strLine + "\"");
73 }
74
75 final SimpleUrl simpleUrl;
76 try {
77 simpleUrl = new SimpleUrl(strLine);
78 } catch (final RuntimeException e) {
79 LOG.warn("Skipping link \"" + strLine + "\": Not valid", e);
80 continue;
81 }
82
83 final String normalizedUrl = simpleUrl.toNormalform(false, true);
84
85 try {
86 linkDao.saveForced(normalizedUrl);
87 count++;
88 if (LOG.isInfoEnabled()) {
89 LOG.info("Import link \"" + strLine + "\" (normalized: \"" + normalizedUrl + "\")");
90 }
91 } catch (final RuntimeException e) {
92 LOG.warn("Failed to import \"" + strLine + "\" (normalized: \"" + normalizedUrl + "\")", e);
93 try {
94 dbHelper.rollbackTransaction();
95 } catch (final Exception e2) {
96 LOG.warn("Failed to rollback database transaction", e2);
97 }
98 }
99 }
100 } catch (final IOException e) {
101 LOG.warn("Failure to read line of file \"" + this.filename + "\"", e);
102 } finally {
103 try {
104 br.close();
105 } catch (final IOException e) {
106 LOG.warn("Failed to close buffer of file \"" + this.filename + "\"", e);
107 }
108 }
109 } finally {
110 try {
111 dbHelper.close();
112 } catch (final Exception e) {
113 LOG.warn("Failed to close database connection", e);
114 }
115 }
116 } catch (final SQLException e) {
117 LOG.error("Failed to open conenction to database", e);
118 } finally {
119 try {
120 fstream.close();
121 } catch (final IOException e) {
122 LOG.warn("Failed to close file \"" + this.filename + "\"", e);
123 }
124 }
125
126 return count;
127 }
128 }