org.tartarus.snowball
Class SnowballProgram

java.lang.Object
  extended by org.tartarus.snowball.SnowballProgram
Direct Known Subclasses:
ArmenianStemmer, BasqueStemmer, CatalanStemmer, DanishStemmer, DutchStemmer, EnglishStemmer, FinnishStemmer, FrenchStemmer, German2Stemmer, GermanStemmer, HungarianStemmer, IrishStemmer, ItalianStemmer, KpStemmer, LovinsStemmer, NorwegianStemmer, PorterStemmer, PortugueseStemmer, RomanianStemmer, RussianStemmer, SpanishStemmer, SwedishStemmer, TurkishStemmer

public abstract class SnowballProgram
extends Object

This is the rev 502 of the Snowball SVN trunk, but modified: made abstract and introduced abstract method stem to avoid expensive reflection in filter class. refactored StringBuffers to StringBuilder uses char[] as buffer instead of StringBuffer/StringBuilder eq_s,eq_s_b,insert,replace_s take CharSequence like eq_v and eq_v_b reflection calls (Lovins, etc) use EMPTY_ARGS/EMPTY_PARAMS


Field Summary
protected  int bra
           
protected  int cursor
           
protected  int ket
           
protected  int limit
           
protected  int limit_backward
           
 
Constructor Summary
protected SnowballProgram()
           
 
Method Summary
protected  StringBuilder assign_to(StringBuilder s)
           
protected  void copy_from(SnowballProgram other)
           
protected  boolean eq_s_b(int s_size, CharSequence s)
           
protected  boolean eq_s_b(int s_size, String s)
          Deprecated. for binary back compat. Will be removed in Lucene 4.0
protected  boolean eq_s(int s_size, CharSequence s)
           
protected  boolean eq_s(int s_size, String s)
          Deprecated. for binary back compat. Will be removed in Lucene 4.0
protected  boolean eq_v_b(CharSequence s)
           
protected  boolean eq_v_b(StringBuilder s)
          Deprecated. for binary back compat. Will be removed in Lucene 4.0
protected  boolean eq_v(CharSequence s)
           
protected  boolean eq_v(StringBuilder s)
          Deprecated. for binary back compat. Will be removed in Lucene 4.0
protected  int find_among_b(Among[] v, int v_size)
           
protected  int find_among(Among[] v, int v_size)
           
 String getCurrent()
          Get the current string.
 char[] getCurrentBuffer()
          Get the current buffer containing the stem.
 int getCurrentBufferLength()
          Get the valid length of the character array in getCurrentBuffer().
protected  boolean in_grouping_b(char[] s, int min, int max)
           
protected  boolean in_grouping(char[] s, int min, int max)
           
protected  boolean in_range_b(int min, int max)
           
protected  boolean in_range(int min, int max)
           
protected  void insert(int c_bra, int c_ket, CharSequence s)
           
protected  void insert(int c_bra, int c_ket, String s)
          Deprecated. for binary back compat. Will be removed in Lucene 4.0
protected  void insert(int c_bra, int c_ket, StringBuilder s)
          Deprecated. for binary back compat. Will be removed in Lucene 4.0
protected  boolean out_grouping_b(char[] s, int min, int max)
           
protected  boolean out_grouping(char[] s, int min, int max)
           
protected  boolean out_range_b(int min, int max)
           
protected  boolean out_range(int min, int max)
           
protected  int replace_s(int c_bra, int c_ket, CharSequence s)
           
protected  int replace_s(int c_bra, int c_ket, String s)
          Deprecated. for binary back compat. Will be removed in Lucene 4.0
 void setCurrent(char[] text, int length)
          Set the current string.
 void setCurrent(String value)
          Set the current string.
protected  void slice_check()
           
protected  void slice_del()
           
protected  void slice_from(CharSequence s)
           
protected  void slice_from(String s)
          Deprecated. for binary back compat. Will be removed in Lucene 4.0
protected  void slice_from(StringBuilder s)
          Deprecated. for binary back compat. Will be removed in Lucene 4.0
protected  StringBuilder slice_to(StringBuilder s)
           
abstract  boolean stem()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

cursor

protected int cursor

limit

protected int limit

limit_backward

protected int limit_backward

bra

protected int bra

ket

protected int ket
Constructor Detail

SnowballProgram

protected SnowballProgram()
Method Detail

stem

public abstract boolean stem()

setCurrent

public void setCurrent(String value)
Set the current string.


getCurrent

public String getCurrent()
Get the current string.


setCurrent

public void setCurrent(char[] text,
                       int length)
Set the current string.

Parameters:
text - character array containing input
length - valid length of text.

getCurrentBuffer

public char[] getCurrentBuffer()
Get the current buffer containing the stem.

NOTE: this may be a reference to a different character array than the one originally provided with setCurrent, in the exceptional case that stemming produced a longer intermediate or result string.

It is necessary to use getCurrentBufferLength() to determine the valid length of the returned buffer. For example, many words are stemmed simply by subtracting from the length to remove suffixes.

See Also:
getCurrentBufferLength()

getCurrentBufferLength

public int getCurrentBufferLength()
Get the valid length of the character array in getCurrentBuffer().

Returns:
valid length of the array.

copy_from

protected void copy_from(SnowballProgram other)

in_grouping

protected boolean in_grouping(char[] s,
                              int min,
                              int max)

in_grouping_b

protected boolean in_grouping_b(char[] s,
                                int min,
                                int max)

out_grouping

protected boolean out_grouping(char[] s,
                               int min,
                               int max)

out_grouping_b

protected boolean out_grouping_b(char[] s,
                                 int min,
                                 int max)

in_range

protected boolean in_range(int min,
                           int max)

in_range_b

protected boolean in_range_b(int min,
                             int max)

out_range

protected boolean out_range(int min,
                            int max)

out_range_b

protected boolean out_range_b(int min,
                              int max)

eq_s

protected boolean eq_s(int s_size,
                       CharSequence s)

eq_s

@Deprecated
protected boolean eq_s(int s_size,
                                  String s)
Deprecated. for binary back compat. Will be removed in Lucene 4.0


eq_s_b

protected boolean eq_s_b(int s_size,
                         CharSequence s)

eq_s_b

@Deprecated
protected boolean eq_s_b(int s_size,
                                    String s)
Deprecated. for binary back compat. Will be removed in Lucene 4.0


eq_v

protected boolean eq_v(CharSequence s)

eq_v

@Deprecated
protected boolean eq_v(StringBuilder s)
Deprecated. for binary back compat. Will be removed in Lucene 4.0


eq_v_b

protected boolean eq_v_b(CharSequence s)

eq_v_b

@Deprecated
protected boolean eq_v_b(StringBuilder s)
Deprecated. for binary back compat. Will be removed in Lucene 4.0


find_among

protected int find_among(Among[] v,
                         int v_size)

find_among_b

protected int find_among_b(Among[] v,
                           int v_size)

replace_s

protected int replace_s(int c_bra,
                        int c_ket,
                        CharSequence s)

replace_s

@Deprecated
protected int replace_s(int c_bra,
                                   int c_ket,
                                   String s)
Deprecated. for binary back compat. Will be removed in Lucene 4.0


slice_check

protected void slice_check()

slice_from

protected void slice_from(CharSequence s)

slice_from

@Deprecated
protected void slice_from(String s)
Deprecated. for binary back compat. Will be removed in Lucene 4.0


slice_from

@Deprecated
protected void slice_from(StringBuilder s)
Deprecated. for binary back compat. Will be removed in Lucene 4.0


slice_del

protected void slice_del()

insert

protected void insert(int c_bra,
                      int c_ket,
                      CharSequence s)

insert

@Deprecated
protected void insert(int c_bra,
                                 int c_ket,
                                 String s)
Deprecated. for binary back compat. Will be removed in Lucene 4.0


insert

@Deprecated
protected void insert(int c_bra,
                                 int c_ket,
                                 StringBuilder s)
Deprecated. for binary back compat. Will be removed in Lucene 4.0


slice_to

protected StringBuilder slice_to(StringBuilder s)

assign_to

protected StringBuilder assign_to(StringBuilder s)