View Javadoc

1   /* UURIFactoryTest
2    *
3    * $Id: UURIFactoryTest.java 6707 2009-11-25 02:36:10Z gojomo $
4    *
5    * Created on Apr 2, 2004
6    *
7    * Copyright (C) 2004 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  
26  package org.archive.net;
27  
28  import java.util.Iterator;
29  import java.util.TreeMap;
30  
31  import junit.framework.TestCase;
32  
33  import org.apache.commons.httpclient.URIException;
34  
35  /***
36   * Test UURIFactory for proper UURI creation across variety of
37   * important/tricky cases.
38   * 
39   * Be careful writing this file.  Make sure you write it with UTF-8 encoding.
40   *
41   * @author igor stack gojomo
42   */
43  public class UURIFactoryTest extends TestCase {
44  	
45  	public final void testEscaping() throws URIException {
46  		// Note: single quote is not being escaped by URI class.
47  		final String ESCAPED_URISTR = "http://archive.org/" +
48  		    UURIFactory.ESCAPED_SPACE +
49  			UURIFactory.ESCAPED_SPACE +
50  			UURIFactory.ESCAPED_CIRCUMFLEX +
51  			UURIFactory.ESCAPED_QUOT +
52  			UURIFactory.SQUOT +
53  			UURIFactory.ESCAPED_APOSTROPH +
54  			UURIFactory.ESCAPED_LSQRBRACKET +
55  			UURIFactory.ESCAPED_RSQRBRACKET +
56  			UURIFactory.ESCAPED_LCURBRACKET +
57  			UURIFactory.ESCAPED_RCURBRACKET +
58  			UURIFactory.SLASH + "a.gif"; // NBSP and SPACE should be trimmed;
59  		
60  		final String URISTR = "http://archive.org/.././" + "\u00A0" +
61  		    UURIFactory.SPACE + UURIFactory.CIRCUMFLEX +
62  			UURIFactory.QUOT + UURIFactory.SQUOT +
63  			UURIFactory.APOSTROPH + UURIFactory.LSQRBRACKET +
64  			UURIFactory.RSQRBRACKET + UURIFactory.LCURBRACKET +
65  			UURIFactory.RCURBRACKET + UURIFactory.BACKSLASH +
66  			"test/../a.gif" + "\u00A0" + UURIFactory.SPACE;
67  		
68  		UURI uuri = UURIFactory.getInstance(URISTR);
69  		final String uuriStr = uuri.toString();
70  		assertEquals("expected escaping", ESCAPED_URISTR, uuriStr);
71  	}
72  
73      public final void testUnderscoreMakesPortParseFail() throws URIException {
74          UURI uuri = UURIFactory.getInstance("http://one-two_three:8080/index.html");
75          int port = uuri.getPort();
76          assertTrue("Failed find of port " + uuri, port == 8080);
77      }
78      
79      public final void testRelativeURIWithTwoSlashes() throws URIException {
80          UURI base = UURIFactory.getInstance("http://www.archive.org");
81          UURI uuri = UURIFactory.getInstance(base, "one//index.html");
82          assertTrue("Doesn't do right thing with two slashes " + uuri,
83              uuri.toString().equals(
84                  "http://www.archive.org/one//index.html"));
85      }
86      
87      public final void testTrailingEncodedSpace() throws URIException {
88          UURI uuri = UURIFactory.getInstance("http://www.nps-shoes.co.uk%20");
89          assertTrue("Doesn't strip trailing encoded space 1 " + uuri,
90              uuri.toString().equals("http://www.nps-shoes.co.uk/"));
91          uuri = UURIFactory.getInstance("http://www.nps-shoes.co.uk%20%20%20");
92          assertTrue("Doesn't strip trailing encoded space 2 " + uuri,
93              uuri.toString().equals("http://www.nps-shoes.co.uk/"));
94      }
95      
96      public final void testPort0080is80() throws URIException {
97          UURI uuri = UURIFactory.getInstance("http://archive.org:0080");
98          assertTrue("Doesn't strip leading zeros " + uuri,
99              uuri.toString().equals("http://archive.org/"));
100     }
101     
102 // DISABLING TEST AS PRECURSOR TO ELIMINATION
103 // the problematic input given -- specifically the "%6s" incomplete uri-escape,
104 // shouldn't necessarily be rejected as a bad URI. IE and Firefox, at least, 
105 // will  attempt to fetch such an URL (getting, in this case against that ad 
106 // server, a bad-request error). Ideally, we'd generate exactly the same 
107 // request against the server as they do. However, with the most recent 
108 // fixup for stray '%' signs, we come close, but not exactly. That's enough
109 // to cause this test to fail (it's not getting the expected exception) but
110 // our almost-URI, which might be what was intended, is better than trying 
111 // nothing.
112 //    public final void testBadPath() {
113 //        String message = null;
114 //        try {
115 //            UURIFactory.getInstance("http://ads.as4x.tmcs.net/" +
116 //                "html.ng/site=cs&pagepos=102&page=home&adsize=1x1&context=" +
117 //                "generic&Params.richmedia=yes%26city%3Dseattle%26" +
118 //                "rstid%3D2415%26market_id%3D86%26brand%3Dcitysearch" +
119 //                "%6state%3DWA");
120 //        } catch (URIException e) {
121 //            message = e.getMessage();
122 //        }
123 //        assertNotNull("Didn't get expected exception.", message);
124 //    }   
125     
126     public final void testEscapeEncoding() throws URIException {
127         UURI uuri = UURIFactory.getInstance("http://www.y1y1.com/" +
128             "albums/userpics/11111/normal_%E3%E4%EC%EC%EC.jpg", "windows-1256");
129         uuri.getPath();
130     }   
131     
132     public final void testTooLongAfterEscaping() {
133         StringBuffer buffer = new StringBuffer("http://www.archive.org/a/");
134         // Append bunch of spaces.  When escaped, they'll triple in size.
135         for (int i = 0; i < 1024; i++) {
136         	buffer.append(" ");
137         }
138         buffer.append("/index.html");
139         String message = null;
140         try {
141         	UURIFactory.getInstance(buffer.toString());
142         } catch (URIException e) {
143             message = e.getMessage();
144         }
145         assertTrue("Wrong or no exception: " + message, (message != null) &&
146             message.startsWith("Created (escaped) uuri >"));
147     }
148 	
149 	public final void testFtpUris() throws URIException {
150 		final String FTP = "ftp";
151 		final String AUTHORITY = "pfbuser:pfbuser@mprsrv.agri.gov.cn";
152 		final String PATH = "/clzreceive/";
153 		final String uri = FTP + "://" + AUTHORITY + PATH;
154 		UURI uuri = UURIFactory.getInstance(uri);
155 		assertTrue("Failed to get matching scheme: " + uuri.getScheme(),
156 				(uuri.getScheme()).equals(FTP));
157 		assertTrue("Failed to get matching authority: " +
158 				uuri.getAuthority(), (uuri.getAuthority()).equals(AUTHORITY));
159 		assertTrue("Failed to get matching path: " +
160 				uuri.getPath(), (uuri.getPath()).equals(PATH));       
161 	}
162     
163     public final void testWhitespaceEscaped() throws URIException {
164         // Test that we get all whitespace even if the uri is
165         // already escaped.
166         String uri = "http://archive.org/index%25 .html";
167         String tgtUri = "http://archive.org/index%25%20.html";
168         UURI uuri = UURIFactory.getInstance(uri);
169         assertTrue("Not equal " + uuri.toString(),
170                 uuri.toString().equals(tgtUri));     
171         uri = "http://archive.org/index%25\u001D.html";
172         tgtUri = "http://archive.org/index%25%1D.html".toLowerCase();
173         uuri = UURIFactory.getInstance(uri);
174         assertEquals("whitespace escaping", tgtUri, uuri.toString());
175         uri = "http://gemini.info.usaid.gov/directory/" +
176             "pbResults.cfm?&urlNameLast=Rumplestiltskin";
177         tgtUri = "http://gemini.info.usaid.gov/directory/faxResults.cfm?" +
178             "name=Ebenezer%20+Rumplestiltskin,&location=RRB%20%20%20%205%2E08%2D006";
179         uuri = UURIFactory.getInstance(UURIFactory.getInstance(uri),
180             "faxResults.cfm?name=Ebenezer +Rumplestiltskin,&location=" +
181             "RRB%20%20%20%205%2E08%2D006");
182         assertEquals("whitespace escaping", tgtUri, uuri.toString());
183     }
184     
185 //	public final void testFailedGetPath() throws URIException {
186 //		final String path = "/RealMedia/ads/" +
187 //		"click_lx.ads/%%PAGE%%/%%RAND%%/%%POS%%/%%CAMP%%/empty";
188 //        // decoding in getPath will interpret %CA as 8-bit escaped char,
189 //        // possibly incomplete
190 //		final String uri = "http://ads.nandomedia.com" + path;
191 //		final UURI uuri = UURIFactory.getInstance(uri);
192 //		String foundPath = uuri.getPath();
193 //		assertEquals("unexpected path", path, foundPath);
194 //	}
195     
196     public final void testDnsHost() throws URIException {
197         String uri = "dns://ads.nandomedia.com:81/one.html";
198         UURI uuri = UURIFactory.getInstance(uri);
199         String host = uuri.getReferencedHost();
200         assertTrue("Host is wrong " + host, host.equals("ads.nandomedia.com"));
201         uri = "dns:ads.nandomedia.com";
202         uuri = UURIFactory.getInstance(uri);
203         host = uuri.getReferencedHost();
204         assertTrue("Host is wrong " + host, host.equals("ads.nandomedia.com"));
205         uri = "dns:ads.nandomedia.com?a=b";
206         uuri = UURIFactory.getInstance(uri);
207         host = uuri.getReferencedHost();
208         assertTrue("Host is wrong " + host, host.equals("ads.nandomedia.com"));
209     }
210 	
211 	public final void testPercentEscaping() throws URIException {
212 		final String uri = "http://archive.org/%a%%%%%.html";
213         // tests indicate firefox (1.0.6) does not encode '%' at all
214         final String tgtUri = "http://archive.org/%a%%%%%.html";
215 		UURI uuri = UURIFactory.getInstance(uri);
216 		assertEquals("Not equal",tgtUri, uuri.toString());
217 	}
218     
219 	public final void testRelativeDblPathSlashes() throws URIException {
220 		UURI base = UURIFactory.getInstance("http://www.archive.org/index.html");
221 		UURI uuri = UURIFactory.getInstance(base, "JIGOU//KYC//INDEX.HTM");
222         assertTrue("Double slash not working " + uuri.toString(),
223                 uuri.getPath().equals("/JIGOU//KYC//INDEX.HTM"));
224 	}
225     
226     public final void testRelativeWithScheme() throws URIException {
227         UURI base = UURIFactory.getInstance("http://www.example.com/some/page");
228         UURI uuri = UURIFactory.getInstance(base, "http:boo");
229         assertTrue("Relative with scheme not working " + uuri.toString(),
230                 uuri.toString().equals("http://www.example.com/some/boo"));
231     }
232     
233     public final void testBadBaseResolve() throws URIException {
234         UURI base = UURIFactory.getInstance("http://license.joins.com/board/" +
235             "etc_board_list.asp?board_name=new_main&b_type=&nPage=" +
236             "2&category=G&lic_id=70&site=changeup&g_page=changeup&g_sPage=" +
237             "notice&gate=02");
238         UURIFactory.getInstance(base, "http://www.changeup.com/...</a");
239     }
240     
241     public final void testTilde() throws URIException {
242         noChangeExpected("http://license.joins.com/~igor");
243     }
244     
245     public final void testCurlies() throws URIException {
246         // Firefox allows curlies in the query string portion of a URL only
247         // (converts curlies if they are in the path portion ahead of the
248         // query string).
249         UURI uuri =
250             noChangeExpected("http://license.joins.com/igor?one={curly}");
251         assertEquals(uuri.getQuery(), "one={curly}");
252         assertEquals(UURIFactory.
253                 getInstance("http://license.joins.com/igor{curly}.html").
254                     toString(),
255             "http://license.joins.com/igor%7Bcurly%7D.html");
256         boolean exception = false;
257         try {
258             UURIFactory.getInstance("http://license.{curly}.com/igor.html");
259         } catch (URIException u) {
260             exception = true;
261         }
262         assertTrue("Did not get exception.", exception);
263     }
264     
265     protected UURI noChangeExpected(final String original)
266     throws URIException {
267         UURI uuri = UURIFactory.getInstance(original);
268         assertEquals(original, uuri.toString());
269         return uuri;
270     }
271     
272 	public final void testTrimSpaceNBSP() throws URIException {
273 		final String uri = "   http://archive.org/DIR WITH SPACES/" +
274 		UURIFactory.NBSP + "home.html    " + UURIFactory.NBSP + "   ";
275 		final String tgtUri =
276 			"http://archive.org/DIR%20WITH%20SPACES/%20home.html";
277 		UURI uuri = UURIFactory.getInstance(uri);
278 		assertTrue("Not equal " + uuri.toString(),
279 				uuri.toString().equals(tgtUri));
280 	}
281 	
282 	/***
283 	 * Test space plus encoding ([ 1010966 ] crawl.log has URIs with spaces in them).
284 	 * See <a href="http://sourceforge.net/tracker/index.php?func=detail&aid=1010966&group_id=73833&atid=539099">[ 1010966 ] crawl.log has URIs with spaces in them</a>.
285 	 * @throws URIException
286 	 */
287 	public final void testSpaceDoubleEncoding() throws URIException {
288 		final String uri = "http://www.brook.edu/i.html? %20taxonomy=Politics";
289 		final String encodedUri =
290 			"http://www.brook.edu/i.html?%20%20taxonomy=Politics";
291 		UURI uuri = UURIFactory.getInstance(uri, "ISO-8859-1");
292 		assertTrue("Not equal " + uuri.toString(),
293 				uuri.toString().equals(encodedUri));
294 	}
295 	
296 	/***
297 	 * Test for doubly-encoded sequences.
298 	 * See <a href="https://sourceforge.net/tracker/index.php?func=detail&aid=966219&group_id=73833&atid=539099">[ 966219 ] UURI doubly-encodes %XX sequences</a>.
299 	 * @throws URIException
300 	 */
301 	public final void testDoubleEncoding() throws URIException {
302 		final char ae = '\u00E6';
303 		final String uri = "http://archive.org/DIR WITH SPACES/home" +
304 		    ae + ".html";
305 		final String encodedUri =
306 			"http://archive.org/DIR%20WITH%20SPACES/home%E6.html";
307 		UURI uuri = UURIFactory.getInstance(uri, "ISO-8859-1");
308 		assertEquals("single encoding", encodedUri, uuri.toString());
309 		// Dbl-encodes.
310 		uuri = UURIFactory.getInstance(uuri.toString(), "ISO-8859-1");
311 		uuri = UURIFactory.getInstance(uuri.toString(), "ISO-8859-1");
312 		assertEquals("double encoding", encodedUri, uuri.toString());
313 		// Do default utf-8 test.
314 		uuri = UURIFactory.getInstance(uri);
315 		final String encodedUtf8Uri =
316 			"http://archive.org/DIR%20WITH%20SPACES/home%C3%A6.html";
317 		assertEquals("Not equal utf8", encodedUtf8Uri, uuri.toString());      
318 		// Now dbl-encode.
319 		uuri = UURIFactory.getInstance(uuri.toString());
320 		uuri = UURIFactory.getInstance(uuri.toString());
321 		assertEquals("Not equal (dbl-encoding) utf8", encodedUtf8Uri, uuri.toString());
322 	}
323 	
324 	/***
325 	 * Test for syntax errors stop page parsing.
326 	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=788219&group_id=73833&atid=539099">[ 788219 ] URI Syntax Errors stop page parsing</a>
327 	 * @throws URIException
328 	 */
329 	public final void testThreeSlashes() throws URIException {
330 		UURI goodURI = UURIFactory.
331 		getInstance("http://lcweb.loc.gov/rr/goodtwo.html");
332 		String uuri = "http:///lcweb.loc.gov/rr/goodtwo.html";
333 		UURI rewrittenURI = UURIFactory.getInstance(uuri);
334 		assertTrue("Not equal " + goodURI + ", " + uuri,
335 				goodURI.toString().equals(rewrittenURI.toString()));
336 		uuri = "http:////lcweb.loc.gov/rr/goodtwo.html";
337 		rewrittenURI = UURIFactory.getInstance(uuri);
338 		assertTrue("Not equal " + goodURI + ", " + uuri,
339 				goodURI.toString().equals(rewrittenURI.toString()));
340 		// Check https.
341 		goodURI = UURIFactory.
342 		getInstance("https://lcweb.loc.gov/rr/goodtwo.html");
343 		uuri = "https:////lcweb.loc.gov/rr/goodtwo.html";
344 		rewrittenURI = UURIFactory.getInstance(uuri);
345 		assertTrue("Not equal " + goodURI + ", " + uuri,
346 				goodURI.toString().equals(rewrittenURI.toString()));
347 	}
348 	
349 	public final void testNoScheme() {
350 		boolean expectedException = false;
351 		String uuri = "www.loc.gov/rr/european/egw/polishex.html";
352 		try {
353 			UURIFactory.getInstance(uuri);
354 		} catch (URIException e) {
355 			// Expected exception.
356 			expectedException = true;
357 		}
358 		assertTrue("Didn't get expected exception: " + uuri, 
359 				expectedException); 
360 	}
361 	
362 	public final void testRelative() throws URIException {
363 		UURI uuriTgt = UURIFactory.
364 		getInstance("http://archive.org:83/home.html");
365 		UURI uri = UURIFactory.
366 		getInstance("http://archive.org:83/one/two/three.html");
367 		UURI uuri = UURIFactory.
368 		getInstance(uri, "/home.html");
369 		assertTrue("Not equal",
370 				uuriTgt.toString().equals(uuri.toString()));
371 	}
372 	
373 	/***
374 	 * Test that an empty uuri does the right thing -- that we get back the
375 	 * base.
376 	 *
377 	 * @throws URIException
378 	 */
379 	public final void testRelativeEmpty() throws URIException {
380 		UURI uuriTgt = UURIFactory.
381 		getInstance("http://archive.org:83/one/two/three.html");
382 		UURI uri = UURIFactory.
383 		getInstance("http://archive.org:83/one/two/three.html");
384 		UURI uuri = UURIFactory.
385 		getInstance(uri, "");
386 		assertTrue("Empty length don't work",
387 				uuriTgt.toString().equals(uuri.toString()));
388 	}
389 	
390 	public final void testAbsolute() throws URIException {
391 		UURI uuriTgt = UURIFactory.
392 		getInstance("http://archive.org:83/home.html");
393 		UURI uri = UURIFactory.
394 		getInstance("http://archive.org:83/one/two/three.html");
395 		UURI uuri = UURIFactory.
396 		getInstance(uri, "http://archive.org:83/home.html");
397 		assertTrue("Not equal",
398 				uuriTgt.toString().equals(uuri.toString()));
399 	}
400 	
401 	/***
402 	 * Test for [ 962892 ] UURI accepting/creating unUsable URIs (bad hosts).
403 	 * @see <a href="https://sourceforge.net/tracker/?func=detail&atid=539099&aid=962892&group_id=73833">[ 962892 ] UURI accepting/creating unUsable URIs (bad hosts)</a>
404 	 */
405 	public final void testHostWithLessThan() {
406 		checkExceptionOnIllegalDomainlabel("http://www.betamobile.com</A");
407 		checkExceptionOnIllegalDomainlabel(
408 		"http://C|/unzipped/426/spacer.gif");
409 		checkExceptionOnIllegalDomainlabel("http://www.lycos.co.uk\"/l/b/\"");
410 	}    
411 	
412 	/***
413 	 * Test for [ 1012520 ] UURI.length() &gt; 2k.
414 	 * @throws URIException
415 	 * @see <a href="http://sourceforge.net/tracker/index.php?func=detail&aid=1012520&group_id=73833&atid=539099">[ 1012520 ] UURI.length() &gt; 2k</a>
416 	 */
417 	public final void test2kURI() throws URIException {
418 		final StringBuffer buffer = new StringBuffer("http://a.b");
419 		final String subPath = "/123456789";
420 		for (int i = 0; i < 207; i++) {
421 			buffer.append(subPath);
422 		}
423 		// String should be 2080 characters long.  Legal.
424 		UURIFactory.getInstance(buffer.toString());
425 		boolean gotException = false;
426 		// Add ten more characters and make size illegal.
427 		buffer.append(subPath);
428 		try {
429 			UURIFactory.getInstance(buffer.toString()); 
430 		} catch (URIException e) {
431 			gotException = true;
432 		}
433 		assertTrue("No expected exception complaining about long URI",
434 				gotException);
435 	} 
436 	
437 	private void checkExceptionOnIllegalDomainlabel(String uuri) {
438 		boolean expectedException = false;
439         try {
440 			UURIFactory.getInstance(uuri);
441 		} catch (URIException e) {
442 			// Expected exception.
443 			expectedException = true;
444 		}
445 		assertTrue("Didn't get expected exception: " + uuri, 
446 				expectedException); 
447 	}
448 	
449 	/***
450 	 * Test for doing separate DNS lookup for same host
451 	 *
452 	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=788277&group_id=73833&atid=539099">[ 788277 ] Doing separate DNS lookup for same host</a>
453 	 * @throws URIException
454 	 */
455 	public final void testHostWithPeriod() throws URIException {
456 		UURI uuri1 = UURIFactory.
457 		getInstance("http://www.loc.gov./index.html");
458 		UURI uuri2 = UURIFactory.
459 		getInstance("http://www.loc.gov/index.html");
460 		assertEquals("Failed equating hosts with dot",
461 				uuri1.getHost(), uuri2.getHost());
462 	}
463 	
464 	/***
465 	 * Test for NPE in java.net.URI.encode
466 	 *
467 	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=874220&group_id=73833&atid=539099">[ 874220 ] NPE in java.net.URI.encode</a>
468 	 * @throws URIException
469 	 */
470 	public final void testHostEncodedChars() throws URIException {
471 		String s = "http://g.msn.co.kr/0nwkokr0/00/19??" +
472 		"PS=10274&NC=10009&CE=42&CP=949&HL=" +
473 		"&#65533;&#65533;&#65533;?&#65533;&#65533;";
474 		assertNotNull("Encoded chars " + s, 
475 				UURIFactory.getInstance(s));
476 	}
477 	
478 	/***
479 	 * Test for java.net.URI parses %20 but getHost null
480 	 *
481 	 * See <a href="https://sourceforge.net/tracker/?func=detail&aid=927940&group_id=73833&atid=539099">[ 927940 ] java.net.URI parses %20 but getHost null</a>
482 	 */
483 	public final void testSpaceInHost() {
484 		boolean expectedException = false;
485 		try {
486 			UURIFactory.getInstance(
487 					"http://www.local-regions.odpm%20.gov.uk" +
488 			"/lpsa/challenge/pdf/propect.pdf");
489 		} catch (URIException e) {
490 			expectedException = true;
491 		}
492 		assertTrue("Did not fail with escaped space.", expectedException);
493 		
494 		expectedException = false;
495 		try {
496 			UURIFactory.getInstance(
497 					"http://www.local-regions.odpm .gov.uk" +
498 			"/lpsa/challenge/pdf/propect.pdf");
499 		} catch (URIException e) {
500 			expectedException = true;
501 		}
502 		assertTrue("Did not fail with real space.", expectedException);
503 	}
504 	
505 	/***
506 	 * Test for java.net.URI chokes on hosts_with_underscores.
507 	 *
508 	 * @see  <a href="https://sourceforge.net/tracker/?func=detail&aid=808270&group_id=73833&atid=539099">[ 808270 ] java.net.URI chokes on hosts_with_underscores</a>
509 	 * @throws URIException
510 	 */
511 	public final void testHostWithUnderscores() throws URIException {
512 		UURI uuri = UURIFactory.getInstance(
513 		"http://x_underscore_underscore.2u.com.tw/nonexistent_page.html");
514 		assertEquals("Failed get of host with underscore",
515 				"x_underscore_underscore.2u.com.tw", uuri.getHost());
516 	}
517 	
518 	
519 	/***
520 	 * Two dots for igor.
521 	 */
522 	public final void testTwoDots() {
523 		boolean expectedException = false;
524 		try {
525 			UURIFactory.getInstance(
526 			"http://x_underscore_underscore..2u.com/nonexistent_page.html");
527 		} catch (URIException e) {
528 			expectedException = true;
529 		}
530 		assertTrue("Two dots did not throw exception", expectedException);
531 	}
532 	
533 	/***
534 	 * Test for java.net.URI#getHost fails when leading digit.
535 	 *
536 	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=910120&group_id=73833&atid=539099">[ 910120 ] java.net.URI#getHost fails when leading digit.</a>
537 	 * @throws URIException
538 	 */
539 	public final void testHostWithDigit() throws URIException {
540 		UURI uuri = UURIFactory.
541 		getInstance("http://0204chat.2u.com.tw/nonexistent_page.html");
542 		assertEquals("Failed get of host with digit",
543 				"0204chat.2u.com.tw", uuri.getHost());
544 	}
545 	
546 	/***
547 	 * Test for Constraining java URI class.
548 	 *
549 	 * @see <a href="https://sourceforge.net/tracker/?func=detail&aid=949548&group_id=73833&atid=539099">[ 949548 ] Constraining java URI class</a>
550 	 */
551 	public final void testPort() {
552 		checkBadPort("http://www.tyopaikat.com:a/robots.txt");
553 		checkBadPort("http://158.144.21.3:80808/robots.txt");
554 		checkBadPort("http://pdb.rutgers.edu:81.rutgers.edu/robots.txt");
555 		checkBadPort(
556 		    "https://webmail.gse.harvard.edu:9100robots.txt/robots.txt");
557 		checkBadPort(
558 		    "https://webmail.gse.harvard.edu:0/robots.txt/robots.txt");
559 	}
560 	
561 	/***
562 	 * Test bad port throws exception.
563 	 * @param uri URI with bad port to check.
564 	 */
565 	private void checkBadPort(String uri) {
566 		boolean exception = false;
567 		try {
568 			UURIFactory.getInstance(uri);
569 		}
570 		catch (URIException e) {
571 			exception = true;
572 		}
573 		assertTrue("Didn't throw exception: " + uri, exception);
574 	}
575 	
576 	/***
577 	 * Preserve userinfo capitalization.
578 	 * @throws URIException
579 	 */
580 	public final void testUserinfo() throws URIException {
581         final String authority = "stack:StAcK@www.tyopaikat.com";
582         final String uri = "http://" + authority + "/robots.txt";
583 		UURI uuri = UURIFactory.getInstance(uri);
584 		assertEquals("Authority not equal", uuri.getAuthority(),
585             authority);
586         /*
587         String tmp = uuri.toString();
588         assertTrue("URI not equal", tmp.equals(uri));
589         */
590 	}
591 
592 	/***
593 	 * Test user info + port
594 	 * @throws URIException
595 	 */
596 	public final void testUserinfoPlusPort() throws URIException {
597 		final String userInfo = "stack:StAcK";
598         final String authority = "www.tyopaikat.com";
599         final int port = 8080;
600         final String uri = "http://" + userInfo + "@" + authority + ":" + port 
601         	+ "/robots.txt";
602 		UURI uuri = UURIFactory.getInstance(uri);
603 		assertEquals("Host not equal", authority,uuri.getHost());
604 		assertEquals("Userinfo Not equal",userInfo,uuri.getUserinfo());
605 		assertEquals("Port not equal",port,uuri.getPort());
606 		assertEquals("Authority wrong","stack:StAcK@www.tyopaikat.com:8080",
607 				uuri.getAuthority());
608 		assertEquals("AuthorityMinusUserinfo wrong","www.tyopaikat.com:8080",
609 				uuri.getAuthorityMinusUserinfo());
610 		
611 	}
612     
613     public final void testRFC3986RelativeChange() throws URIException {
614         UURI base = UURIFactory.getInstance("http://a/b/c/d;p?q");
615         tryRelative(base, "?y",     "http://a/b/c/d;p?y");
616     }
617         
618     /***
619      * Tests from rfc3986
620      *
621      * <pre>
622      *       "g:h"           =  "g:h"
623      *       "g"             =  "http://a/b/c/g"
624      *       "./g"           =  "http://a/b/c/g"
625      *       "g/"            =  "http://a/b/c/g/"
626      *       "/g"            =  "http://a/g"
627      *       "//g"           =  "http://g"
628      *       "?y"            =  "http://a/b/c/d;p?y"
629      *       "g?y"           =  "http://a/b/c/g?y"
630      *       "#s"            =  "http://a/b/c/d;p?q#s"
631      *       "g#s"           =  "http://a/b/c/g#s"
632      *       "g?y#s"         =  "http://a/b/c/g?y#s"
633      *       ";x"            =  "http://a/b/c/;x"
634      *       "g;x"           =  "http://a/b/c/g;x"
635      *       "g;x?y#s"       =  "http://a/b/c/g;x?y#s"
636      *       ""              =  "http://a/b/c/d;p?q"
637      *       "."             =  "http://a/b/c/"
638      *       "./"            =  "http://a/b/c/"
639      *       ".."            =  "http://a/b/"
640      *       "../"           =  "http://a/b/"
641      *       "../g"          =  "http://a/b/g"
642      *       "../.."         =  "http://a/"
643      *       "../../"        =  "http://a/"
644      *       "../../g"       =  "http://a/g"
645      * </pre>
646      *
647      * @throws URIException
648      */
649     public final void testRFC3986Relative() throws URIException {
650         UURI base = UURIFactory.getInstance("http://a/b/c/d;p?q");
651         tryRelative(base, "g:h",    "g:h");
652         tryRelative(base, "g",      "http://a/b/c/g");
653         tryRelative(base, "./g",    "http://a/b/c/g");
654         tryRelative(base, "g/",     "http://a/b/c/g/");
655         tryRelative(base, "/g",     "http://a/g");
656         tryRelative(base, "//g",    "http://g");
657         tryRelative(base, "?y",     "http://a/b/c/d;p?y");
658         tryRelative(base, "g?y",    "http://a/b/c/g?y");
659         tryRelative(base, "#s",     "http://a/b/c/d;p?q#s");
660         tryRelative(base, "g#s",    "http://a/b/c/g#s");
661         tryRelative(base, "g?y#s",  "http://a/b/c/g?y#s");
662         tryRelative(base, ";x",     "http://a/b/c/;x");
663         tryRelative(base, "g;x",    "http://a/b/c/g;x");
664         tryRelative(base, "g;x?y#s","http://a/b/c/g;x?y#s");
665         tryRelative(base, "",       "http://a/b/c/d;p?q");
666         tryRelative(base, ".",      "http://a/b/c/");
667         tryRelative(base, "./",     "http://a/b/c/");
668         tryRelative(base, "..",     "http://a/b/");
669         tryRelative(base, "../",    "http://a/b/");
670         tryRelative(base, "../g",   "http://a/b/g");
671         tryRelative(base, "../..",  "http://a/");
672         tryRelative(base, "../../", "http://a/");
673         tryRelative(base, "../../g","http://a/g");
674     }
675     
676 	protected void tryRelative(UURI base, String relative, String expected) 
677     throws URIException {
678         UURI uuri = UURIFactory.getInstance(base, relative);
679         assertEquals("Derelativized " + relative + " gave " 
680                 + uuri + " not " + expected,
681                 uuri,UURIFactory.getInstance(expected));
682     }
683 
684     /***
685 	 * Tests from rfc2396 with amendments to accomodate differences
686 	 * intentionally added to make our URI handling like IEs.
687 	 *
688 	 * <pre>
689 	 *       g:h           =  g:h
690 	 *       g             =  http://a/b/c/g
691 	 *       ./g           =  http://a/b/c/g
692 	 *       g/            =  http://a/b/c/g/
693 	 *       /g            =  http://a/g
694 	 *       //g           =  http://g
695 	 *       ?y            =  http://a/b/c/?y
696 	 *       g?y           =  http://a/b/c/g?y
697 	 *       #s            =  (current document)#s
698 	 *       g#s           =  http://a/b/c/g#s
699 	 *       g?y#s         =  http://a/b/c/g?y#s
700 	 *       ;x            =  http://a/b/c/;x
701 	 *       g;x           =  http://a/b/c/g;x
702 	 *       g;x?y#s       =  http://a/b/c/g;x?y#s
703 	 *       .             =  http://a/b/c/
704 	 *       ./            =  http://a/b/c/
705 	 *       ..            =  http://a/b/
706 	 *       ../           =  http://a/b/
707 	 *       ../g          =  http://a/b/g
708 	 *       ../..         =  http://a/
709 	 *       ../../        =  http://a/
710 	 *       ../../g       =  http://a/g
711 	 * </pre>
712 	 *
713 	 * @throws URIException
714 	 */
715 	public final void testRFC2396Relative() throws URIException {
716 		UURI base = UURIFactory.
717 		getInstance("http://a/b/c/d;p?q");
718 		TreeMap<String,String> m = new TreeMap<String,String>();
719 		m.put("..", "http://a/b/");
720 		m.put("../", "http://a/b/");
721 		m.put("../g", "http://a/b/g");
722 		m.put("../..", "http://a/");
723 		m.put("../../", "http://a/");
724 		m.put("../../g", "http://a/g");
725 		m.put("g#s", "http://a/b/c/g#s");
726 		m.put("g?y#s ", "http://a/b/c/g?y#s");
727 		m.put(";x", "http://a/b/c/;x");
728 		m.put("g;x", "http://a/b/c/g;x");
729 		m.put("g;x?y#s", "http://a/b/c/g;x?y#s");
730 		m.put(".", "http://a/b/c/");
731 		m.put("./", "http://a/b/c/");
732 		m.put("g", "http://a/b/c/g");
733 		m.put("./g", "http://a/b/c/g");
734 		m.put("g/", "http://a/b/c/g/");
735 		m.put("/g", "http://a/g");
736 		m.put("//g", "http://g");
737         // CHANGED BY RFC3986
738 		// m.put("?y", "http://a/b/c/?y");
739 		m.put("g?y", "http://a/b/c/g?y");
740 		// EXTRAS beyond the RFC set.
741 		// TODO: That these resolve to a path of /a/g might be wrong.  Perhaps
742 		// it should be '/g'?.
743 		m.put("/../../../../../../../../g", "http://a/g");
744 		m.put("../../../../../../../../g", "http://a/g");
745 		m.put("../G", "http://a/b/G");
746 		for (Iterator i = m.keySet().iterator(); i.hasNext();) {
747 			String key = (String)i.next();
748 			String value = (String)m.get(key);
749 			UURI uuri = UURIFactory.getInstance(base, key);
750 			assertTrue("Unexpected " + key + " " + value + " " + uuri,
751 					uuri.equals(UURIFactory.getInstance(value)));
752 		}
753 	}
754 	
755 	/***
756 	 * A UURI should always be without a 'fragment' segment, which is
757 	 * unused and irrelevant for network fetches. 
758 	 *  
759 	 * See [ 970666 ] #anchor links not trimmed, and thus recrawled 
760 	 * 
761 	 * @throws URIException
762 	 */
763 	public final void testAnchors() throws URIException {
764 		UURI uuri = UURIFactory.
765 		getInstance("http://www.example.com/path?query#anchor");
766 		assertEquals("Not equal", "http://www.example.com/path?query",
767 				uuri.toString());
768 	}
769     
770 
771     /***
772      * Ensure that URI strings beginning with a colon are treated
773      * the same as browsers do (as relative, rather than as absolute
774      * with zero-length scheme). 
775      * 
776      * @throws URIException
777      */
778     public void testStartsWithColon() throws URIException {
779         UURI base = UURIFactory.getInstance("http://www.example.com/path/page");
780         UURI uuri = UURIFactory.getInstance(base,":foo");
781         assertEquals("derelativize starsWithColon",
782                 uuri.getURI(),
783                 "http://www.example.com/path/:foo");
784     }
785     
786     /***
787      * Ensure that relative URIs with colons in late positions 
788      * aren't mistakenly interpreted as absolute URIs with long, 
789      * illegal schemes. 
790      * 
791      * @throws URIException
792      */
793     public void testLateColon() throws URIException {
794         UURI base = UURIFactory.getInstance("http://www.example.com/path/page");
795         UURI uuri1 = UURIFactory.getInstance(base,"example.html;jsessionid=deadbeef:deadbeed?parameter=this:value");
796         assertEquals("derelativize lateColon",
797                 uuri1.getURI(),
798                 "http://www.example.com/path/example.html;jsessionid=deadbeef:deadbeed?parameter=this:value");
799         UURI uuri2 = UURIFactory.getInstance(base,"example.html?parameter=this:value");
800         assertEquals("derelativize lateColon",
801                 uuri2.getURI(),
802                 "http://www.example.com/path/example.html?parameter=this:value");
803     }
804     
805     /***
806      * Ensure that stray trailing '%' characters do not prevent
807      * UURI instances from being created, and are reasonably 
808      * escaped when encountered. 
809      *
810      * @throws URIException
811      */
812     public void testTrailingPercents() throws URIException {
813         String plainPath = "http://www.example.com/path%";
814         UURI plainPathUuri = UURIFactory.getInstance(plainPath);
815         assertEquals("plainPath getURI", plainPath, plainPathUuri.getURI());
816         assertEquals("plainPath getEscapedURI", 
817                 "http://www.example.com/path%", // browsers don't escape '%'
818                 plainPathUuri.getEscapedURI());
819         
820         String partiallyEscapedPath = "http://www.example.com/pa%20th%";
821         UURI partiallyEscapedPathUuri = UURIFactory.getInstance(
822                 partiallyEscapedPath);
823 //        assertEquals("partiallyEscapedPath getURI", 
824 //                "http://www.example.com/pa th%", // TODO: is this desirable?
825 ////              partiallyEscapedPath,
826 //                partiallyEscapedPathUuri.getURI());
827         assertEquals("partiallyEscapedPath getEscapedURI", 
828                 "http://www.example.com/pa%20th%",
829                 partiallyEscapedPathUuri.getEscapedURI());
830         
831         String plainQueryString = "http://www.example.com/path?q=foo%";
832         UURI plainQueryStringUuri = UURIFactory.getInstance(
833                 plainQueryString);
834 //        assertEquals("plainQueryString getURI", 
835 //                plainQueryString,
836 //                plainQueryStringUuri.getURI());
837         assertEquals("plainQueryString getEscapedURI", 
838                 "http://www.example.com/path?q=foo%",
839                 plainQueryStringUuri.getEscapedURI());        
840         
841         String partiallyEscapedQueryString = 
842             "http://www.example.com/pa%20th?q=foo%";
843         UURI partiallyEscapedQueryStringUuri = UURIFactory.getInstance(
844                 partiallyEscapedQueryString);
845         assertEquals("partiallyEscapedQueryString getURI", 
846                 "http://www.example.com/pa th?q=foo%",
847                 partiallyEscapedQueryStringUuri.getURI());
848         assertEquals("partiallyEscapedQueryString getEscapedURI", 
849                 "http://www.example.com/pa%20th?q=foo%",
850                 partiallyEscapedQueryStringUuri.getEscapedURI());  
851     }
852     
853     /***
854      * Ensure that stray '%' characters do not prevent
855      * UURI instances from being created, and are reasonably 
856      * escaped when encountered. 
857      *
858      * @throws URIException
859      */
860     public void testStrayPercents() throws URIException {
861         String oneStray = "http://www.example.com/pa%th";
862         UURI oneStrayUuri = UURIFactory.getInstance(oneStray);
863         assertEquals("oneStray getURI", oneStray, oneStrayUuri.getURI());
864         assertEquals("oneStray getEscapedURI", 
865                 "http://www.example.com/pa%th", // browsers don't escape '%'
866                 oneStrayUuri.getEscapedURI());
867         
868         String precededByValidEscape = "http://www.example.com/pa%20th%way";
869         UURI precededByValidEscapeUuri = UURIFactory.getInstance(
870                 precededByValidEscape);
871         assertEquals("precededByValidEscape getURI", 
872                 "http://www.example.com/pa th%way", // getURI interprets escapes
873                 precededByValidEscapeUuri.getURI());
874         assertEquals("precededByValidEscape getEscapedURI", 
875                 "http://www.example.com/pa%20th%way",
876                 precededByValidEscapeUuri.getEscapedURI());
877         
878         String followedByValidEscape = "http://www.example.com/pa%th%20way";
879         UURI followedByValidEscapeUuri = UURIFactory.getInstance(
880                 followedByValidEscape);
881         assertEquals("followedByValidEscape getURI", 
882                 "http://www.example.com/pa%th way", // getURI interprets escapes
883                 followedByValidEscapeUuri.getURI());
884         assertEquals("followedByValidEscape getEscapedURI", 
885                 "http://www.example.com/pa%th%20way",
886                 followedByValidEscapeUuri.getEscapedURI());        
887     }
888     
889     public void testEscapingNotNecessary() throws URIException {
890         String escapesUnnecessary = 
891             "http://www.example.com/misc;reserved:chars@that&don't=need"
892             +"+escaping$even,though!you(might)initially?think#so";
893         // expect everything but the #fragment
894         String expected = escapesUnnecessary.substring(0, escapesUnnecessary
895                 .length() - 3);
896         assertEquals("escapes unnecessary", 
897                 expected, 
898                 UURIFactory.getInstance(escapesUnnecessary).toString());
899     }
900     
901     public void testIdn() throws URIException {
902         // See http://www.josefsson.org/idn.php.
903         String idn1 = new String("http://räksmörgås.josefßon.org/");
904         String puny1 = "http://xn--rksmrgs-5wao1o.josefsson.org/";
905         assertEquals("encoding of " + idn1, puny1, UURIFactory
906                 .getInstance(idn1).toString());
907         String idn2 = "http://www.pølse.dk/";
908         String puny2 = "http://www.xn--plse-gra.dk/";
909         assertEquals("encoding of " + idn2, puny2, UURIFactory
910                 .getInstance(idn2).toString());
911         String idn3 = "http://例子.測試";
912         String puny3 = "http://xn--fsqu00a.xn--g6w251d/";
913         assertEquals("encoding of " + idn3, puny3, UURIFactory
914                 .getInstance(idn3).toString());
915         
916     }
917     
918     public void testNewLineInURL() throws URIException {
919     	UURI uuri = UURIFactory.getInstance("http://www.ar\rchive\n." +
920     	    "org/i\n\n\r\rndex.html");
921     	assertEquals("http://www.archive.org/index.html", uuri.toString());
922     }
923     
924     public void testTabsInURL() throws URIException {
925         UURI uuri = UURIFactory.getInstance("http://www.ar\tchive\t." +
926             "org/i\t\r\n\tndex.html");
927         assertEquals("http://www.archive.org/index.html", uuri.toString());
928     }
929     
930     public void testQueryEscaping() throws URIException {
931         UURI uuri = UURIFactory.getInstance(
932             "http://www.yahoo.com/foo?somechars!@$%^&*()_-+={[}]|\'\";:/?.>,<");
933         assertEquals(
934             // tests in FF1.5 indicate it only escapes " < > 
935             "http://www.yahoo.com/foo?somechars!@$%^&*()_-+={[}]|\'%22;:/?.%3E,%3C",
936             uuri.toString());
937     }
938     
939     /***
940      * Check that our 'normalization' does same as Nutch's
941      * Below before-and-afters were taken from the nutch urlnormalizer-basic
942      * TestBasicURLNormalizer class  (December 2006, Nutch 0.9-dev).
943      * @throws URIException
944      */
945     public void testSameAsNutchURLFilterBasic() throws URIException {
946         assertEquals(UURIFactory.getInstance(" http://foo.com/ ").toString(),
947             "http://foo.com/");
948 
949         // check that protocol is lower cased
950         assertEquals(UURIFactory.getInstance("HTTP://foo.com/").toString(),
951             "http://foo.com/");
952         
953         // check that host is lower cased
954         assertEquals(UURIFactory.
955                 getInstance("http://Foo.Com/index.html").toString(),
956             "http://foo.com/index.html");
957         assertEquals(UURIFactory.
958                 getInstance("http://Foo.Com/index.html").toString(),
959             "http://foo.com/index.html");
960 
961         // check that port number is normalized
962         assertEquals(UURIFactory.
963                 getInstance("http://foo.com:80/index.html").toString(),
964             "http://foo.com/index.html");
965         assertEquals(UURIFactory.getInstance("http://foo.com:81/").toString(),
966             "http://foo.com:81/");
967 
968         // check that null path is normalized
969         assertEquals(UURIFactory.getInstance("http://foo.com").toString(),
970             "http://foo.com/");
971 
972         // check that references are removed
973         assertEquals(UURIFactory.
974                 getInstance("http://foo.com/foo.html#ref").toString(),
975             "http://foo.com/foo.html");
976 
977         //     // check that encoding is normalized
978         //     normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
979 
980         // check that unnecessary "../" are removed
981         assertEquals(UURIFactory.
982                 getInstance("http://foo.com/aa/../").toString(),
983             "http://foo.com/" );
984         assertEquals(UURIFactory.
985                 getInstance("http://foo.com/aa/bb/../").toString(),
986             "http://foo.com/aa/");
987 
988         /* We fail this one.  Here we produce: 'http://foo.com/'." target="alexandria_uri">http://foo.com/'.
989         assertEquals(UURIFactory.
990                 getInstance("http://foo.com/aa/..").toString(),
991             "http://foo.com/aa/..");
992          */
993         
994         assertEquals(UURIFactory.
995             getInstance("http://foo.com/aa/bb/cc/../../foo.html").toString(),
996                 "http://foo.com/aa/foo.html");
997         assertEquals(UURIFactory.
998             getInstance("http://foo.com/aa/bb/../cc/dd/../ee/foo.html").
999                 toString(),
1000                     "http://foo.com/aa/cc/ee/foo.html");
1001         assertEquals(UURIFactory.
1002             getInstance("http://foo.com/../foo.html").toString(),
1003                 "http://foo.com/foo.html" );
1004         assertEquals(UURIFactory.
1005             getInstance("http://foo.com/../../foo.html").toString(),
1006                 "http://foo.com/foo.html" );
1007         assertEquals(UURIFactory.
1008             getInstance("http://foo.com/../aa/../foo.html").toString(),
1009                 "http://foo.com/foo.html" );
1010         assertEquals(UURIFactory.
1011             getInstance("http://foo.com/aa/../../foo.html").toString(),
1012                 "http://foo.com/foo.html" );
1013         assertEquals(UURIFactory.
1014                 getInstance("http://foo.com/aa/../bb/../foo.html/../../").
1015                     toString(),
1016             "http://foo.com/" );
1017         assertEquals(UURIFactory.getInstance("http://foo.com/../aa/foo.html").
1018             toString(), "http://foo.com/aa/foo.html" );
1019         assertEquals(UURIFactory.
1020                 getInstance("http://foo.com/../aa/../foo.html").toString(),
1021             "http://foo.com/foo.html" );
1022         assertEquals(UURIFactory.
1023                 getInstance("http://foo.com/a..a/foo.html").toString(),
1024             "http://foo.com/a..a/foo.html" );
1025         assertEquals(UURIFactory.
1026                 getInstance("http://foo.com/a..a/../foo.html").toString(),
1027             "http://foo.com/foo.html" );
1028         assertEquals(UURIFactory.
1029             getInstance("http://foo.com/foo.foo/../foo.html").toString(),
1030                  "http://foo.com/foo.html" );
1031     }
1032     
1033     public void testHttpSchemeColonSlash() {
1034     	boolean exception = false;
1035     	try {
1036     		UURIFactory.getInstance("https:/");
1037     	} catch (URIException e) {
1038     		exception = true;
1039     	}
1040     	assertTrue("Didn't throw exception when one expected", exception);
1041     	exception = false;
1042     	try {
1043     		UURIFactory.getInstance("http://");
1044     	} catch (URIException e) {
1045     		exception = true;
1046     	}
1047     	assertTrue("Didn't throw exception when one expected", exception);
1048     }
1049     
1050     public void testNakedHttpsSchemeColon() {
1051         boolean exception = false;
1052         try {
1053             UURIFactory.getInstance("https:");
1054         } catch (URIException e) {
1055             exception = true;
1056         }
1057         assertTrue("Didn't throw exception when one expected", exception);
1058         exception = false;
1059         try {
1060             UURI base = UURIFactory.getInstance("http://www.example.com");
1061             UURIFactory.getInstance(base, "https:");
1062         } catch (URIException e) {
1063             exception = true;
1064         }
1065         assertTrue("Didn't throw exception when one expected", exception);
1066     }
1067     
1068     /***
1069      * Test motivated by [#HER-616] The UURI class may throw 
1070      * NullPointerException in getReferencedHost()
1071      * 
1072      * @throws URIException
1073      */
1074     public void testMissingHttpColon() throws URIException {
1075         String suspectUri = "http//www.test.foo";
1076         UURI base = UURIFactory.getInstance("http://www.example.com");
1077         boolean exceptionThrown = false; 
1078         try {
1079             UURI badUuri = UURIFactory.getInstance(suspectUri);
1080             badUuri.getReferencedHost(); // not reached
1081         } catch (URIException e) {
1082             // should get relative-uri-no-base exception
1083             exceptionThrown = true;
1084         } finally {
1085             assertTrue("expected exception not thrown",exceptionThrown);
1086         }
1087         UURI goodUuri = UURIFactory.getInstance(base,suspectUri);
1088         goodUuri.getReferencedHost();
1089     }
1090     
1091     /***
1092      * Test bad port throws URIException not NumberFormatException
1093      */
1094     public void testExtremePort() {
1095         try {
1096             UURI uuri = UURIFactory.getInstance("http://Tel.:016099117464");
1097             System.out.println(uuri); 
1098             fail("expected exception not thrown");
1099         } catch (URIException ue){
1100             // expected
1101         }
1102     }
1103 }