View Javadoc

1   /* BackgroundImageExtractionSelfTest
2    *
3    * Created on Jan 29, 2004
4    *
5    * Copyright (C) 2004 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.selftest;
24  
25  
26  import java.io.File;
27  import java.util.Iterator;
28  import java.util.List;
29  
30  import org.archive.io.arc.ARCRecordMetaData;
31  
32  
33  /***
34   * Test the crawler can find background images in pages.
35   *
36   * @author stack
37   * @version $Id: BackgroundImageExtractionSelfTestCase.java 4931 2007-02-21 18:48:17Z gojomo $
38   */
39  public class BackgroundImageExtractionSelfTestCase
40      extends SelfTestCase
41  {
42      /***
43       * The name of the background image the crawler is supposed to find.
44       */
45      private static final String IMAGE_NAME = "example-background-image.jpeg";
46  
47      private static final String JPEG = "image/jpeg";
48  
49  
50      /***
51       * Read ARC file for the background image the file that contained it.
52       *
53       * Look that there is only one instance of the background image in the
54       * ARC and that it is of the same size as the image in the webapp dir.
55       */
56      public void stestBackgroundImageExtraction()
57      {
58          assertInitialized();
59          String relativePath = getTestName() + '/' + IMAGE_NAME;
60          String url = getSelftestURLWithTrailingSlash() + relativePath;
61          File image = new File(getHtdocs(), relativePath);
62          assertTrue("Image exists", image.exists());
63          List [] metaDatas = getMetaDatas();
64          boolean found = false;
65          ARCRecordMetaData metaData = null;
66          for (int mi = 0; mi < metaDatas.length; mi++) {
67  			List list = metaDatas[mi];
68  			for (final Iterator i = list.iterator(); i.hasNext();) {
69  				metaData = (ARCRecordMetaData) i.next();
70  				if (metaData.getUrl().equals(url)
71  						&& metaData.getMimetype().equalsIgnoreCase(JPEG)) {
72  					if (!found) {
73  						found = true;
74  					} else {
75  						fail("Found a 2nd instance of " + url);
76  					}
77  				}
78  			}
79  		}
80      }
81  }