|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.apache.nutch.protocol.http.api.HttpBase
public abstract class HttpBase
Field Summary | |
---|---|
protected String |
accept
The "Accept" request header value. |
protected String |
acceptLanguage
The "Accept-Language" request header value. |
static int |
BUFFER_SIZE
|
protected boolean |
ip_header
The "_ip" request header value. |
protected int |
maxContent
The length limit for downloaded content, in bytes. |
protected String |
proxyHost
The proxy hostname. |
protected int |
proxyPort
The proxy port. |
protected int |
timeout
The network timeout in millisecond |
protected boolean |
useHttp11
Do we use HTTP/1.1? |
protected boolean |
useProxy
Indicates if a proxy is used |
protected String |
userAgent
The Nutch 'User-Agent' request header |
Fields inherited from interface org.apache.nutch.protocol.Protocol |
---|
CHECK_BLOCKING, CHECK_ROBOTS, X_POINT_ID |
Constructor Summary | |
---|---|
HttpBase()
Creates a new instance of HttpBase |
|
HttpBase(org.slf4j.Logger logger)
Creates a new instance of HttpBase |
Method Summary | |
---|---|
String |
getAccept()
|
String |
getAcceptLanguage()
Value of "Accept-Language" request header sent by Nutch. |
Configuration |
getConf()
|
boolean |
getIP_Header()
|
int |
getMaxContent()
|
ProtocolOutput |
getProtocolOutput(String url,
WebPage page)
Returns the Content for a fetchlist entry. |
String |
getProxyHost()
|
int |
getProxyPort()
|
protected abstract Response |
getResponse(URL url,
WebPage page,
boolean followRedirects)
|
RobotRules |
getRobotRules(String url,
WebPage page)
Retrieve robot rules applicable for this url. |
int |
getTimeout()
|
boolean |
getUseHttp11()
|
String |
getUserAgent()
|
protected void |
logConf()
|
protected static void |
main(HttpBase http,
String[] args)
|
byte[] |
processDeflateEncoded(byte[] compressed,
URL url)
|
byte[] |
processGzipEncoded(byte[] compressed,
URL url)
|
void |
setConf(Configuration conf)
|
boolean |
useProxy()
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Methods inherited from interface org.apache.nutch.plugin.FieldPluggable |
---|
getFields |
Field Detail |
---|
public static final int BUFFER_SIZE
protected String proxyHost
protected int proxyPort
protected boolean useProxy
protected int timeout
protected int maxContent
protected String userAgent
protected String acceptLanguage
protected String accept
protected boolean ip_header
protected boolean useHttp11
Constructor Detail |
---|
public HttpBase()
public HttpBase(org.slf4j.Logger logger)
Method Detail |
---|
public void setConf(Configuration conf)
setConf
in interface Configurable
public Configuration getConf()
getConf
in interface Configurable
public ProtocolOutput getProtocolOutput(String url, WebPage page)
Protocol
Content
for a fetchlist entry.
getProtocolOutput
in interface Protocol
public String getProxyHost()
public int getProxyPort()
public boolean useProxy()
public int getTimeout()
public int getMaxContent()
public String getUserAgent()
public String getAcceptLanguage()
public String getAccept()
public boolean getUseHttp11()
public boolean getIP_Header()
protected void logConf()
public byte[] processGzipEncoded(byte[] compressed, URL url) throws IOException
IOException
public byte[] processDeflateEncoded(byte[] compressed, URL url) throws IOException
IOException
protected static void main(HttpBase http, String[] args) throws Exception
Exception
protected abstract Response getResponse(URL url, WebPage page, boolean followRedirects) throws ProtocolException, IOException
ProtocolException
IOException
public RobotRules getRobotRules(String url, WebPage page)
Protocol
getRobotRules
in interface Protocol
url
- url to check
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |