modnlp.dstruct
public class BagOfWords extends java.util.HashMap
Constructor and Description |
---|
BagOfWords() |
BagOfWords(java.lang.String text) |
BagOfWords(java.lang.String text,
StopWordList swlist) |
Modifier and Type | Method and Description |
---|---|
int |
addToken(java.lang.String type) |
void |
addTokens(java.lang.String text) |
void |
addTokens(java.lang.String text,
StopWordList swlist) |
void |
addTypesToFileCount(java.lang.String text)
addToFileCount: tokenize text and add 1 for each type (not token)
to the frequency list (text is assumed to be a single file)
|
boolean |
containsTerm(java.lang.String type) |
static java.util.Set |
extractTermCollection(WordFrequencyPair[] wfp) |
static java.lang.String[] |
extractTermSet(WordFrequencyPair[] wfp) |
static java.lang.String[] |
extractTermSet(WordScorePair[] wfp) |
int |
getCount(java.lang.String type) |
java.lang.String[] |
getTermSet() |
WordFrequencyPair[] |
getWordFrequencyArray()
Return an array of comparable objects (e.g.
|
WordScorePair[] |
getWordScoreArray()
Return an array of objects comparable by double-precision
floating point numbers
|
boolean |
isIgnoreCase()
Get the value of ignoreCase.
|
java.util.Set |
keySet() |
void |
removeLessThan(int noccur) |
void |
removeStopWords(StopWordList swl) |
void |
setIgnoreCase(boolean v)
Set the value of ignoreCase.
|
clear, clone, containsKey, containsValue, entrySet, get, isEmpty, put, putAll, remove, size, values
public BagOfWords()
public BagOfWords(java.lang.String text)
public BagOfWords(java.lang.String text, StopWordList swlist)
public void addTokens(java.lang.String text)
public void addTokens(java.lang.String text, StopWordList swlist)
public void addTypesToFileCount(java.lang.String text) throws java.io.IOException
java.io.IOException
public int addToken(java.lang.String type)
public void removeStopWords(StopWordList swl)
public void removeLessThan(int noccur)
public int getCount(java.lang.String type)
public boolean containsTerm(java.lang.String type)
public WordScorePair[] getWordScoreArray()
public WordFrequencyPair[] getWordFrequencyArray()
public java.lang.String[] getTermSet()
public static java.util.Set extractTermCollection(WordFrequencyPair[] wfp)
public static java.lang.String[] extractTermSet(WordFrequencyPair[] wfp)
public static java.lang.String[] extractTermSet(WordScorePair[] wfp)
public java.util.Set keySet()
keySet
in interface java.util.Map
keySet
in class java.util.HashMap
public boolean isIgnoreCase()
public void setIgnoreCase(boolean v)
v
- Value to assign to ignoreCase.