modnlp.dstruct
public class CorpusFile extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static char |
ENSPACE |
protected boolean |
ignoreSGML
The
ignoreSGML flag controls whether
(SGML/XML-style) markup will be output. |
static char |
JPSPACE |
static byte |
WHITESPACE |
Constructor and Description |
---|
CorpusFile(java.lang.String fname,
java.lang.String e) |
Modifier and Type | Method and Description |
---|---|
void |
close() |
boolean |
getIgnoreSGML() |
java.lang.String |
getPosContext(java.lang.Integer pos,
int ctx) |
java.lang.String |
getPosContext(long offset,
int ctx) |
java.lang.String |
getPreContext(java.lang.Integer pos,
int ctx)
get a number (
ctx ) of characters before
a position |
java.lang.String |
getPreContext(long offset,
int ctx) |
java.lang.String |
getWordInContext(java.lang.Integer pos,
java.lang.String wrd,
int ctx)
get a number (
ctx ) of characters surrounding
the word (wrd ) strating at position |
java.lang.String |
getWordInContextWithTags(java.lang.Integer pos,
java.lang.String wrd,
int ctx) |
static void |
main(java.lang.String[] a) |
void |
setIgnoreSGML(boolean v) |
void |
setLangEN() |
void |
setLangJP() |
void |
setLanguage(int la) |
void |
setSGMLFlag(java.lang.String yn)
Set
ignoreSGML . |
public static final char JPSPACE
public static final char ENSPACE
public static final byte WHITESPACE
protected boolean ignoreSGML
ignoreSGML
flag controls whether
(SGML/XML-style) markup will be output. If true
force character reading methods to skip tags of the form '<.*>'setSGMLFlag
,
readNextChar
,
readPreviousCha
public CorpusFile(java.lang.String fname, java.lang.String e) throws java.io.IOException
java.io.IOException
public void setLanguage(int la)
public void setLangJP()
public void setLangEN()
public java.lang.String getWordInContext(java.lang.Integer pos, java.lang.String wrd, int ctx) throws java.io.IOException
ctx
) of characters surrounding
the word (wrd
) strating at position
wrd
- the keywordpos
- a (randomly-accessible) position in this filectx
- the number of characters before and after
wrd
to be returnedctx + ctx + wrd.length()
(Note: control characters found in the file will be
replaced by whitespaces and markup will be ignored
if ignoreSGML
is true
)java.io.IOException
ignoreSGML
,
readPreviousChar
,
readBack
,
readNextChar
,
readByteNoControl
public java.lang.String getWordInContextWithTags(java.lang.Integer pos, java.lang.String wrd, int ctx) throws java.io.IOException
java.io.IOException
public java.lang.String getPreContext(java.lang.Integer pos, int ctx) throws java.io.IOException
ctx
) of characters before
a positionpos
- a (randomly-accessible) position in this filectx
- the number of characters before
position
to be returnedctx
java.io.IOException
public java.lang.String getPreContext(long offset, int ctx) throws java.io.IOException
java.io.IOException
public java.lang.String getPosContext(java.lang.Integer pos, int ctx) throws java.io.IOException
java.io.IOException
public java.lang.String getPosContext(long offset, int ctx) throws java.io.IOException
java.io.IOException
public void setSGMLFlag(java.lang.String yn)
ignoreSGML
. Default is false
ignoreSGML
public void setIgnoreSGML(boolean v)
public boolean getIgnoreSGML()
public void close()
public static void main(java.lang.String[] a)