Skip to content

Commit

Permalink
Add servelet and static linking
Browse files Browse the repository at this point in the history
  • Loading branch information
kariminf committed Apr 4, 2016
1 parent dd33e2d commit 75d78a8
Show file tree
Hide file tree
Showing 8 changed files with 931 additions and 13 deletions.
1 change: 1 addition & 0 deletions .classpath
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry combineaccessrules="false" kind="src" path="/KToolJa"/>
<classpathentry combineaccessrules="false" kind="src" path="/LangPi"/>
<classpathentry kind="lib" path="/usr/share/tomcat7/lib/servlet-api.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>
201 changes: 201 additions & 0 deletions src/kariminf/as/preProcess/ISO639_1.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
package kariminf.as.preProcess;

public enum ISO639_1 {

AB("Abkhaz"),
AA("Afar"),
AF("Afrikaans"),
AK("Akan"),
SQ("Albanian"),
AM("Amharic"),
AR("Arabic"),
AN("Aragonese"),
HY("Armenian"),
AS("Assamese"),
AV("Avaric"),
AE("Avestan"),
AY("Aymara"),
AZ("Azerbaijani"),
BM("Bambara"),
BA("Bashkir"),
EU("Basque"),
BE("Belarusian"),
BN("Bengali, Bangla"),
BH("Bihari"),
BI("Bislama"),
BS("Bosnian"),
BR("Breton"),
BG("Bulgarian"),
MY("Burmese"),
CA("Catalan"),
CH("Chamorro"),
CE("Chechen"),
NY("Chichewa, Chewa, Nyanja"),
ZH("Chinese"),
CV("Chuvash"),
KW("Cornish"),
CO("Corsican"),
CR("Cree"),
HR("Croatian"),
CS("Czech"),
DA("Danish"),
DV("Divehi, Dhivehi, Maldivian"),
NL("Dutch"),
DZ("Dzongkha"),
EN("English"),
EO("Esperanto"),
ET("Estonian"),
EE("Ewe"),
FO("Faroese"),
FJ("Fijian"),
FI("Finnish"),
FR("French"),
FF("Fula, Fulah, Pulaar, Pular"),
GL("Galician"),
KA("Georgian"),
DE("German"),
EL("Greek (modern)"),
GN("Guaraní"),
GU("Gujarati"),
HT("Haitian, Haitian Creole"),
HA("Hausa"),
HE("Hebrew (modern)"),
HZ("Herero"),
HI("Hindi"),
HO("Hiri Motu"),
HU("Hungarian"),
IA("Interlingua"),
ID("Indonesian"),
IE("Interlingue"),
GA("Irish"),
IG("Igbo"),
IK("Inupiaq"),
IO("Ido"),
IS("Icelandic"),
IT("Italian"),
IU("Inuktitut"),
JA("Japanese"),
JV("Javanese"),
KL("Kalaallisut, Greenlandic"),
KN("Kannada"),
KR("Kanuri"),
KS("Kashmiri"),
KK("Kazakh"),
KM("Khmer"),
KI("Kikuyu, Gikuyu"),
RW("Kinyarwanda"),
KY("Kyrgyz"),
KV("Komi"),
KG("Kongo"),
KO("Korean"),
KU("Kurdish"),
KJ("Kwanyama, Kuanyama"),
LA("Latin"),
LB("Luxembourgish, Letzeburgesch"),
LG("Ganda"),
LI("Limburgish, Limburgan, Limburger"),
LN("Lingala"),
LO("Lao"),
LT("Lithuanian"),
LU("Luba-Katanga"),
LV("Latvian"),
GV("Manx"),
MK("Macedonian"),
MG("Malagasy"),
MS("Malay"),
ML("Malayalam"),
MT("Maltese"),
MI("Māori"),
MR("Marathi (Marāṭhī)"),
MH("Marshallese"),
MN("Mongolian"),
NA("Nauruan"),
NV("Navajo, Navaho"),
ND("Northern Ndebele"),
NE("Nepali"),
NG("Ndonga"),
NB("Norwegian Bokmål"),
NN("Norwegian Nynorsk"),
NO("Norwegian"),
II("Nuosu"),
NR("Southern Ndebele"),
OC("Occitan"),
OJ("Ojibwe, Ojibwa"),
CU("Old Church Slavonic, Church Slavonic, Old Bulgarian"),
OM("Oromo"),
OR("Oriya"),
OS("Ossetian, Ossetic"),
PA("Panjabi, Punjabi"),
PI("Pāli"),
FA("Persian (Farsi)"),
PL("Polish"),
PS("Pashto, Pushto"),
PT("Portuguese"),
QU("Quechua"),
RM("Romansh"),
RN("Kirundi"),
RO("Romanian"),
RU("Russian"),
SA("Sanskrit (Saṁskṛta)"),
SC("Sardinian"),
SD("Sindhi"),
SE("Northern Sami"),
SM("Samoan"),
SG("Sango"),
SR("Serbian"),
GD("Scottish Gaelic, Gaelic"),
SN("Shona"),
SI("Sinhala, Sinhalese"),
SK("Slovak"),
SL("Slovene"),
SO("Somali"),
ST("Southern Sotho"),
ES("Spanish"),
SU("Sundanese"),
SW("Swahili"),
SS("Swati"),
SV("Swedish"),
TA("Tamil"),
TE("Telugu"),
TG("Tajik"),
TH("Thai"),
TI("Tigrinya"),
BO("Tibetan Standard, Tibetan, Central"),
TK("Turkmen"),
TL("Tagalog"),
TN("Tswana"),
TO("Tonga (Tonga Islands)"),
TR("Turkish"),
TS("Tsonga"),
TT("Tatar"),
TW("Twi"),
TY("Tahitian"),
UG("Uyghur"),
UK("Ukrainian"),
UR("Urdu"),
UZ("Uzbek"),
VE("Venda"),
VI("Vietnamese"),
VO("Volapük"),
WA("Walloon"),
CY("Welsh"),
WO("Wolof"),
FY("Western Frisian"),
XH("Xhosa"),
YI("Yiddish"),
YO("Yoruba"),
ZA("Zhuang, Chuang"),
ZU("Zulu");


private String name;

private ISO639_1(String name){
this.name = name;
}

public String getName(){
return name;
}

}
25 changes: 16 additions & 9 deletions src/kariminf/as/preProcess/PreProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import kariminf.langpi.basic.def.DefSegmenter;
import kariminf.langpi.basic.def.DefStemmer;
import kariminf.langpi.basic.Normalizer;
import kariminf.langpi.basic.PreProcessInfo;
import kariminf.langpi.basic.BasicInfo;
import kariminf.langpi.basic.SWEliminator;
import kariminf.langpi.basic.Segmenter;
import kariminf.langpi.basic.Stemmer;
Expand All @@ -51,7 +51,7 @@
public class PreProcessor {

//The location of the preprocessing plugins
private final String location = "preProcess/";
private String location = "preProcess/";

// In multi-document summarization, we have many texts
private List<String> texts = new ArrayList<String>();
Expand All @@ -60,7 +60,7 @@ public class PreProcessor {
private Data data;

//Different preprocessing tasks
private PreProcessInfo info = null;
private BasicInfo info = null;
private Normalizer normalizer = null;
private Segmenter segmenter = null;
private SWEliminator sweliminator = null;
Expand All @@ -76,6 +76,13 @@ public class PreProcessor {
public PreProcessor(String lang, Data data){
setLanguage(lang);
this.data = data;

}

public PreProcessor(String lang, Data data, String location){
this.location = location;
setLanguage(lang);
this.data = data;
}


Expand All @@ -88,32 +95,32 @@ public void setLanguage(String lang){

//Search for all preprocessing plugins
JarLoader jarLoader =
new JarLoader(location, "kariminf/as/preProcess", PreProcessInfo.version);
new JarLoader(location, "kariminf/langpi/basic", BasicInfo.version);

//get the info class for the preprocessed language
info = jarLoader.getInfoService(lang, PreProcessInfo.class);
info = jarLoader.getInfoService(lang, BasicInfo.class);

//Try to get the preprocessing tasks' classes, otherwise use the default

normalizer = jarLoader.getLangService(info, Normalizer.class);
normalizer = jarLoader.getClassService(info, Normalizer.class);
if (normalizer == null){
System.out.println(lang + ": No Normalizer, using default");
normalizer = new DefNormalizer();
}

segmenter = jarLoader.getLangService(info,Segmenter.class);
segmenter = jarLoader.getClassService(info,Segmenter.class);
if (segmenter == null){
System.out.println(lang + ": No Segmenter, using default");
segmenter = new DefSegmenter();
}

sweliminator = jarLoader.getLangService(info, SWEliminator.class);
sweliminator = jarLoader.getClassService(info, SWEliminator.class);
if (sweliminator == null){
System.out.println(lang + ": No SWEliminator, using default");
sweliminator = new DefSWEliminator();
}

stemmer = jarLoader.getLangService(info, Stemmer.class);
stemmer = jarLoader.getClassService(info, Stemmer.class);

if (stemmer == null){
System.out.println(lang + ": No Stemmer, using default");
Expand Down
Loading

0 comments on commit 75d78a8

Please sign in to comment.