View Javadoc

1   /* $Id: DownloadURLConnection.java 4510 2006-08-18 16:13:32Z stack-sf $
2    *
3    * Created August 11th, 2006
4    *
5    * Copyright (C) 2006 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.net;
24  
25  import java.io.BufferedInputStream;
26  import java.io.File;
27  import java.io.FileInputStream;
28  import java.io.IOException;
29  import java.io.InputStream;
30  import java.net.URL;
31  import java.net.URLConnection;
32  import java.util.logging.Level;
33  import java.util.logging.Logger;
34  
35  import org.archive.util.ProcessUtils;
36  import org.archive.util.ProcessUtils.ProcessResult;
37  
38  /***
39   * An URL Connection that pre-downloads URL reference before passing back a
40   * Stream reference.  When closed, it removes the local download file.
41   * @author stack
42   * @version $Date: 2006-08-18 16:13:32 +0000 (Fri, 18 Aug 2006) $, $Revision: 4510 $
43   */
44  public abstract class DownloadURLConnection extends URLConnection {
45      private final String CLASSNAME = DownloadURLConnection.class.getName();
46      private final Logger LOGGER = Logger.getLogger(CLASSNAME);
47      private static final File TMPDIR =
48          new File(System.getProperty("java.io.tmpdir", "/tmp"));
49      private File downloadFile = null;
50  
51      protected DownloadURLConnection(URL u) {
52          super(u);
53      }
54      
55      protected String getScript() {
56      	return System.getProperty(this.getClass().getName() + ".path",
57      		"UNDEFINED");
58      }
59      
60      protected String [] getCommand(final URL thisUrl,
61      		final File downloadFile) {
62      	return new String[] {getScript(), thisUrl.getPath(),
63          	downloadFile.getAbsolutePath()};  
64      }
65  
66      /***
67       * Do script copy to local file.
68       * File is available via {@link #getFile()}.
69       * @throws IOException 
70       */
71      public void connect() throws IOException {
72          if (this.connected) {
73              return;
74          }
75          
76          this.downloadFile = File.createTempFile(CLASSNAME, null, TMPDIR);
77          try {
78              String [] cmd = getCommand(this.url, this.downloadFile);    
79              if (LOGGER.isLoggable(Level.FINE)) {
80                  StringBuffer buffer = new StringBuffer();
81                  for (int i = 0; i < cmd.length; i++) {
82                      if (i > 0) {
83                          buffer.append(" ");
84                      }
85                      buffer.append(cmd[i]);
86                  }
87                  LOGGER.fine("Command: " + buffer.toString());
88              }
89              ProcessResult pr = ProcessUtils.exec(cmd);
90              if (pr.getResult() != 0) {
91                  LOGGER.info(cmd + " returned non-null " + pr.getResult());
92              }
93              // Assume download went smoothly.
94              this.connected = true;
95          } catch (IOException ioe) {
96              // Clean up my tmp file.
97              this.downloadFile.delete();
98              this.downloadFile = null;
99              // Rethrow.
100             throw ioe;
101         }
102     }
103     
104     public File getFile() {
105         return this.downloadFile;
106     }
107     
108     protected void setFile(final File f) {
109         this.downloadFile = f;
110     }
111 
112     public InputStream getInputStream() throws IOException {
113         if (!this.connected) {
114             connect();
115         }
116         
117         // Return BufferedInputStream so 'delegation' is done for me, so
118         // I don't have to implement all IS methods and pass to my
119         // 'delegate' instance.
120         final DownloadURLConnection connection = this;
121         return new BufferedInputStream(new FileInputStream(this.downloadFile)) {
122             private DownloadURLConnection ruc = connection;
123 
124             public void close() throws IOException {
125                 super.close();
126                 if (this.ruc != null && this.ruc.getFile()!= null &&
127                     this.ruc.getFile().exists()) {
128                     this.ruc.getFile().delete();
129                     this.ruc.setFile(null);
130                 }
131             }
132         };
133     }
134 }