modnlp.idx.inverted
public class TokeniserGNU extends Tokeniser
encoding, indexPuntuation, originalText, SEPTKARR, SEPTOKEN, tagIndexing, tokenMap, verbose
Constructor and Description |
---|
TokeniserGNU(java.io.File t,
java.lang.String e) |
TokeniserGNU(java.lang.String t) |
TokeniserGNU(java.net.URL t,
java.lang.String e) |
Modifier and Type | Method and Description |
---|---|
java.lang.String |
getIgnoredElements()
Get the
IgnoredElements value. |
boolean |
getTagIndexing() |
static void |
main(java.lang.String[] args) |
void |
setIgnoredElements(java.lang.String newIgnoredElements)
Set the
IgnoredElements value. |
void |
setTagIndexing(boolean v) |
void |
tokenise()
tokenise : Very basic tokenisation; Serious tokenisers
must override this method. |
disbar, fixType, getEncoding, getIndexPuntuation, getOriginalText, getTokenIndex, getTokenMap, getVerbose, isBar, setEncoding, setIndexPuntuation, setTokenMap, setVerbose, split, splitWordOnly
public TokeniserGNU(java.lang.String t)
public TokeniserGNU(java.io.File t, java.lang.String e) throws java.io.IOException
java.io.IOException
public TokeniserGNU(java.net.URL t, java.lang.String e) throws java.io.IOException
java.io.IOException
public final java.lang.String getIgnoredElements()
IgnoredElements
value.String
valuepublic final void setIgnoredElements(java.lang.String newIgnoredElements)
IgnoredElements
value.setIgnoredElements
in class Tokeniser
newIgnoredElements
- The new IgnoredElements value.public void tokenise()
Tokeniser
tokenise
: Very basic tokenisation; Serious tokenisers
must override this method. Note that positions in the tokenMap
here correspond to the ORDER in which the token appears in
originalText not its actual OFFSET.tokenise
in class Tokeniser
for a proper
implementation.
public boolean getTagIndexing()
getTagIndexing
in class Tokeniser
public void setTagIndexing(boolean v)
setTagIndexing
in class Tokeniser
public static void main(java.lang.String[] args)