org.archive.crawler.extractor
Class ExtractorHTMLTest
java.lang.Object
junit.framework.Assert
junit.framework.TestCase
org.archive.util.TmpDirTestCase
org.archive.crawler.extractor.ExtractorHTMLTest
- All Implemented Interfaces:
- CoreAttributeConstants, junit.framework.Test
- public class ExtractorHTMLTest
- extends TmpDirTestCase
- implements CoreAttributeConstants
Test html extractor.
- Version:
- $Revision: 1.18 $, $Date: 2005/05/05 23:10:42 $
- Author:
- stack
Fields inherited from interface org.archive.crawler.datamodel.CoreAttributeConstants |
A_ANNOTATIONS, A_CONTENT_TYPE, A_DELAY_FACTOR, A_DISTANCE_FROM_SEED, A_DNS_FETCH_TIME, A_DNS_SERVER_IP_LABEL, A_FETCH_BEGAN_TIME, A_FETCH_COMPLETED_TIME, A_HTML_BASE, A_HTTP_TRANSACTION, A_LOCALIZED_ERRORS, A_META_ROBOTS, A_MINIMUM_DELAY, A_MIRROR_PATH, A_PREREQUISITE_URI, A_RETRY_DELAY, A_RRECORD_SET_LABEL, A_RUNTIME_EXCEPTION |
Methods inherited from class junit.framework.TestCase |
countTestCases, createResult, getName, run, run, runBare, runTest, setName, toString |
Methods inherited from class junit.framework.Assert |
assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertEquals, assertFalse, assertFalse, assertNotNull, assertNotNull, assertNotSame, assertNotSame, assertNull, assertNull, assertSame, assertSame, assertTrue, assertTrue, fail, fail |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
ExtractorHTMLTest
public ExtractorHTMLTest()
createExtractor
protected ExtractorHTML createExtractor()
throws javax.management.InvalidAttributeValueException,
javax.management.AttributeNotFoundException,
javax.management.MBeanException,
javax.management.ReflectionException
- Throws:
javax.management.InvalidAttributeValueException
javax.management.AttributeNotFoundException
javax.management.MBeanException
javax.management.ReflectionException
setUp
protected void setUp()
throws java.lang.Exception
- Overrides:
setUp
in class TmpDirTestCase
- Throws:
java.lang.Exception
tearDown
protected void tearDown()
throws java.lang.Exception
- Overrides:
tearDown
in class TmpDirTestCase
- Throws:
java.lang.Exception
testInnerProcess
public void testInnerProcess()
throws java.io.IOException
- Throws:
java.io.IOException
testPageParse
public void testPageParse()
throws javax.management.InvalidAttributeValueException,
javax.management.AttributeNotFoundException,
javax.management.MBeanException,
javax.management.ReflectionException,
java.io.IOException
- Test single net or local filesystem page parse.
Set the uuri to be a net url or instead put in place a file
named for this class under the unit test directory.
- Throws:
java.io.IOException
javax.management.ReflectionException
javax.management.MBeanException
javax.management.AttributeNotFoundException
javax.management.InvalidAttributeValueException
getUURI
protected UURI getUURI(java.lang.String url)
throws org.apache.commons.httpclient.URIException
- Throws:
org.apache.commons.httpclient.URIException
runExtractor
protected void runExtractor(UURI baseUURI)
throws javax.management.InvalidAttributeValueException,
javax.management.AttributeNotFoundException,
javax.management.MBeanException,
javax.management.ReflectionException,
java.io.IOException
- Throws:
javax.management.InvalidAttributeValueException
javax.management.AttributeNotFoundException
javax.management.MBeanException
javax.management.ReflectionException
java.io.IOException
runExtractor
protected void runExtractor(UURI baseUURI,
java.lang.String encoding)
throws java.io.IOException,
javax.management.InvalidAttributeValueException,
javax.management.AttributeNotFoundException,
javax.management.MBeanException,
javax.management.ReflectionException
- Throws:
java.io.IOException
javax.management.InvalidAttributeValueException
javax.management.AttributeNotFoundException
javax.management.MBeanException
javax.management.ReflectionException
testEmbedSrc
public void testEmbedSrc()
throws org.apache.commons.httpclient.URIException
- Test a particular
testHrefWhitespace
public void testHrefWhitespace()
throws org.apache.commons.httpclient.URIException
- Test a whitespace issue found in href.
See [ 963965 ] Either UURI or ExtractHTML should strip whitespace better.
https://sourceforge.net/tracker/?func=detail&atid=539099&aid=963965&group_id=73833
- Throws:
org.apache.commons.httpclient.URIException
main
public static void main(java.lang.String[] args)
throws java.lang.Exception
- Throws:
java.lang.Exception
Copyright © 2003-2005 Internet Archive. All Rights Reserved.