org.apache.nutch.crawl
Class GeneratorJob

java.lang.Object
  extended by org.apache.hadoop.conf.Configured
      extended by org.apache.nutch.util.NutchTool
          extended by org.apache.nutch.crawl.GeneratorJob
All Implemented Interfaces:
Configurable, Tool

public class GeneratorJob
extends NutchTool
implements Tool


Nested Class Summary
static class GeneratorJob.SelectorEntry
           
static class GeneratorJob.SelectorEntryComparator
           
 
Field Summary
static String BATCH_ID
           
static String GENERATE_UPDATE_CRAWLDB
           
static String GENERATOR_COUNT_MODE
           
static String GENERATOR_COUNT_VALUE_DOMAIN
           
static String GENERATOR_COUNT_VALUE_HOST
           
static String GENERATOR_COUNT_VALUE_IP
           
static String GENERATOR_CUR_TIME
           
static String GENERATOR_DELAY
           
static String GENERATOR_FILTER
           
static String GENERATOR_MAX_COUNT
           
static String GENERATOR_MIN_SCORE
           
static String GENERATOR_NORMALISE
           
static String GENERATOR_RANDOM_SEED
           
static String GENERATOR_TOP_N
           
static org.slf4j.Logger LOG
           
 
Fields inherited from class org.apache.nutch.util.NutchTool
currentJob, currentJobNum, numJobs, results, status
 
Constructor Summary
GeneratorJob()
           
GeneratorJob(Configuration conf)
           
 
Method Summary
 String generate(long topN, long curTime, boolean filter, boolean norm)
          Mark URLs ready for fetching.
static void main(String[] args)
           
 Map<String,Object> run(Map<String,Object> args)
          Runs the tool, using a map of arguments.
 int run(String[] args)
           
 
Methods inherited from class org.apache.nutch.util.NutchTool
getProgress, getStatus, killJob, stopJob
 
Methods inherited from class org.apache.hadoop.conf.Configured
getConf, setConf
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface org.apache.hadoop.conf.Configurable
getConf, setConf
 

Field Detail

GENERATE_UPDATE_CRAWLDB

public static final String GENERATE_UPDATE_CRAWLDB
See Also:
Constant Field Values

GENERATOR_MIN_SCORE

public static final String GENERATOR_MIN_SCORE
See Also:
Constant Field Values

GENERATOR_FILTER

public static final String GENERATOR_FILTER
See Also:
Constant Field Values

GENERATOR_NORMALISE

public static final String GENERATOR_NORMALISE
See Also:
Constant Field Values

GENERATOR_MAX_COUNT

public static final String GENERATOR_MAX_COUNT
See Also:
Constant Field Values

GENERATOR_COUNT_MODE

public static final String GENERATOR_COUNT_MODE
See Also:
Constant Field Values

GENERATOR_COUNT_VALUE_DOMAIN

public static final String GENERATOR_COUNT_VALUE_DOMAIN
See Also:
Constant Field Values

GENERATOR_COUNT_VALUE_HOST

public static final String GENERATOR_COUNT_VALUE_HOST
See Also:
Constant Field Values

GENERATOR_COUNT_VALUE_IP

public static final String GENERATOR_COUNT_VALUE_IP
See Also:
Constant Field Values

GENERATOR_TOP_N

public static final String GENERATOR_TOP_N
See Also:
Constant Field Values

GENERATOR_CUR_TIME

public static final String GENERATOR_CUR_TIME
See Also:
Constant Field Values

GENERATOR_DELAY

public static final String GENERATOR_DELAY
See Also:
Constant Field Values

GENERATOR_RANDOM_SEED

public static final String GENERATOR_RANDOM_SEED
See Also:
Constant Field Values

BATCH_ID

public static final String BATCH_ID
See Also:
Constant Field Values

LOG

public static final org.slf4j.Logger LOG
Constructor Detail

GeneratorJob

public GeneratorJob()

GeneratorJob

public GeneratorJob(Configuration conf)
Method Detail

run

public Map<String,Object> run(Map<String,Object> args)
                       throws Exception
Description copied from class: NutchTool
Runs the tool, using a map of arguments. May return results, or null.

Specified by:
run in class NutchTool
Throws:
Exception

generate

public String generate(long topN,
                       long curTime,
                       boolean filter,
                       boolean norm)
                throws Exception
Mark URLs ready for fetching.

Throws:
ClassNotFoundException
InterruptedException
Exception

run

public int run(String[] args)
        throws Exception
Specified by:
run in interface Tool
Throws:
Exception

main

public static void main(String[] args)
                 throws Exception
Throws:
Exception


Copyright © 2012 The Apache Software Foundation