View Javadoc

1   /* MemFPMergeUriUniqFilter
2   *
3   * $Id: MemFPMergeUriUniqFilter.java 4647 2006-09-22 18:39:39Z paul_jack $
4   *
5   * Created on Dec 14, 2005
6   *
7   * Copyright (C) 2005 Internet Archive.
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */ 
25  package org.archive.crawler.util;
26  
27  import it.unimi.dsi.fastutil.longs.LongArrayList;
28  import it.unimi.dsi.fastutil.longs.LongIterator;
29  
30  /***
31   * Crude all-in-memory FP-merging UriUniqFilter. 
32   * 
33   * @author gojomo
34   */
35  public class MemFPMergeUriUniqFilter extends FPMergeUriUniqFilter {
36      protected LongArrayList allFps = new LongArrayList();
37      protected LongArrayList newFps;
38      
39      /* (non-Javadoc)
40       * @see org.archive.crawler.util.FPMergeUriUniqFilter#beginFpMerge()
41       */
42      protected LongIterator beginFpMerge() {
43          newFps = new LongArrayList((int) (allFps.size()+(pending()/2)));
44          return allFps.iterator();
45      }
46  
47      /* (non-Javadoc)
48       * @see org.archive.crawler.util.FPMergeUriUniqFilter#addNewFp(java.lang.Long)
49       */
50      protected void addNewFp(long currFp) {
51          newFps.add(currFp);
52      }
53  
54      /* (non-Javadoc)
55       * @see org.archive.crawler.util.FPMergeUriUniqFilter#finishFpMerge()
56       */
57      protected void finishFpMerge() {
58          allFps = newFps;
59          newFps = null; 
60      }
61  
62      /* (non-Javadoc)
63       * @see org.archive.crawler.datamodel.UriUniqFilter#count()
64       */
65      public long count() {
66          return allFps.size();
67      }
68  
69  }