From 1f5ff65139bdc96f49fee8ff36e25408138af841 Mon Sep 17 00:00:00 2001 From: Yen Ching-Hsuan Date: Thu, 31 Mar 2016 07:49:37 -0700 Subject: [PATCH 1/2] Add new API to get the frequency of phrases --- include/chewingio.h | 4 ++ src/chewingio.c | 122 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/include/chewingio.h b/include/chewingio.h index b9e5455a7..eb4c81cba 100644 --- a/include/chewingio.h +++ b/include/chewingio.h @@ -529,6 +529,10 @@ CHEWING_API int chewing_userphrase_get(ChewingContext *ctx, char *phrase_buf, unsigned int phrase_len, char *bopomofo_buf, unsigned int bopomofo_len); +CHEWING_API int chewing_userphrase_get_freq(ChewingContext *ctx, + const char phrase_buf[], const char bopomofo_buf[], + unsigned int *orig_freq, unsigned int *max_freq, unsigned int *user_freq, unsigned int *time); + CHEWING_API int chewing_userphrase_add(ChewingContext *ctx, const char *phrase_buf, const char *bopomofo_buf); CHEWING_API int chewing_userphrase_remove(ChewingContext *ctx, const char *phrase_buf, const char *bopomofo_buf); diff --git a/src/chewingio.c b/src/chewingio.c index 2ea172e21..5ced9acf5 100644 --- a/src/chewingio.c +++ b/src/chewingio.c @@ -1979,6 +1979,128 @@ CHEWING_API int chewing_userphrase_get(ChewingContext *ctx, #endif } +CHEWING_API int chewing_userphrase_get_freq(ChewingContext *ctx, + const char phrase_buf[], const char bopomofo_buf[], + unsigned int *orig_freq, unsigned int *max_freq, unsigned int *user_freq, unsigned int *time) +{ + ChewingData *pgdata = NULL; + + int ret; + int phone_len; + int phrase_len; + uint16_t phone_buf[MAX_PHONE_SEQ_LEN]; +#ifdef WITH_SQLITE3 + int i; +#else + const TreeType *tree_pos; + Phrase phrase; + UserPhraseData *uphrase; +#endif + + if (!ctx || !phrase_buf || !bopomofo_buf) { + return -1; + } + + pgdata = ctx->data; + phrase_len = ueStrLen(phrase_buf); + phone_len = UintArrayFromBopomofo(NULL, 0, bopomofo_buf); + + ret = UintArrayFromBopomofo(phone_buf, phone_len + 1, bopomofo_buf); + if (ret == -1) { + return 0; + } + + if (phone_len != phrase_len) { + LOG_WARN("Do not update userphrase because phone_buf length %d != phrase_buf length %d", phone_len, phrase_len); + return USER_UPDATE_FAIL; + } + + if (phrase_len > MAX_PHRASE_LEN) { + LOG_WARN("phrase_buf length %d > MAX_PHRASE_LEN (%d)", phrase_len, MAX_PHRASE_LEN); + return -1; + } + +#if WITH_SQLITE3 + ret = sqlite3_reset(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE]); + if (ret != SQLITE_OK) { + LOG_ERROR("sqlite3_reset returns %d.", ret); + return ret; + } + + ret = sqlite3_bind_int(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE], BIND_USERPHRASE_LENGTH, phone_len); + if (ret != SQLITE_OK) { + LOG_ERROR("sqlite3_bind_int returns %d.", ret); + return ret; + } + + ret = sqlite3_bind_text(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE], BIND_USERPHRASE_PHRASE, phrase_buf, -1, SQLITE_STATIC); + if (ret != SQLITE_OK) { + LOG_ERROR("sqlite3_bind_text returns %d", ret); + return ret; + } + + for (i = 0; i < phone_len; ++i) { + ret = sqlite3_bind_int(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE], BIND_USERPHRASE_PHONE_0 + i, phone_buf[i]); + if (ret != SQLITE_OK) { + LOG_ERROR("sqlite3_bind_int returns %d", ret); + return ret; + } + } + + for (i = phone_len; i < MAX_PHRASE_LEN; ++i) { + ret = sqlite3_bind_int(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE], BIND_USERPHRASE_PHONE_0 + i, 0); + if (ret != SQLITE_OK) { + LOG_ERROR("sqlite3_bind_int returns %d", ret); + return ret; + } + } + + ret = sqlite3_step(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE]); + + if (ret != SQLITE_ROW) return -1; + + *orig_freq = sqlite3_column_int(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE], + SQL_STMT_USERPHRASE[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE].column + [COLUMN_USERPHRASE_ORIG_FREQ]); + + *max_freq = sqlite3_column_int(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE], + SQL_STMT_USERPHRASE[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE].column + [COLUMN_USERPHRASE_MAX_FREQ]); + + *user_freq = sqlite3_column_int(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE], + SQL_STMT_USERPHRASE[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE].column + [COLUMN_USERPHRASE_USER_FREQ]); + + *time = sqlite3_column_int(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE], + SQL_STMT_USERPHRASE[STMT_USERPHRASE_SELECT_BY_PHONE_PHRASE].column + [COLUMN_USERPHRASE_TIME]); +#else + tree_pos = TreeFindPhrase(pgdata, 0, phone_len - 1, phone_buf); + if (tree_pos) { + GetPhraseFirst(pgdata, &phrase, tree_pos); + do { + if (!strcmp(phrase.phrase, phrase_buf)) { + *orig_freq = phrase.freq; + *max_freq = phrase.freq; + break; + } + } while (GetVocabNext(pgdata, &phrase)); + } + + uphrase = UserGetPhraseFirst(pgdata, phone_buf); + while (uphrase) { + if (!strcmp(uphrase->wordSeq, phrase_buf)) { + *user_freq = uphrase->userfreq; + break; + } + uphrase = UserGetPhraseNext(pgdata, phone_buf); + } + +#endif + + return 0; +} + CHEWING_API int chewing_userphrase_add(ChewingContext *ctx, const char *phrase_buf, const char *bopomofo_buf) { ChewingData *pgdata; From 41a01f10579003c4430b3dbce9fc1da91a8da076 Mon Sep 17 00:00:00 2001 From: Yen Ching-Hsuan Date: Sat, 16 Apr 2016 20:41:53 -0700 Subject: [PATCH 2/2] Add a new test case for chewing_userphrase_freq() --- test/test-userphrase.c | 67 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/test/test-userphrase.c b/test/test-userphrase.c index ef063c696..55f50352f 100644 --- a/test/test-userphrase.c +++ b/test/test-userphrase.c @@ -447,11 +447,78 @@ void test_userphrase_enumerate_rewind() chewing_delete(ctx); } +void test_userphrase_enumerate_frequency() +{ + ChewingContext *ctx; + int ret; + unsigned int expect_len; + + const char phrase[] = "\xE6\xB8\xAC\xE8\xA9\xA6" /* 測試 */ ; + char phrase_buf[50]; + unsigned int phrase_len; + + const char bopomofo[] = "\xE3\x84\x98\xE3\x84\x9C\xCB\x8B \xE3\x84\x95\xCB\x8B"; /* ㄘㄜˋ ㄕˋ */ + char bopomofo_buf[50]; + unsigned int bopomofo_len; + + unsigned int orig_freq = 0, orig_freq_b = 0; + unsigned int max_freq = 0, max_freq_b = 0; + unsigned int user_freq = 0, user_freq_b = 0; + unsigned int time = 0, time_b = 0; + + int i; + + clean_userphrase(); + + ctx = chewing_new(); + start_testcase(ctx, fd); + + ret = chewing_userphrase_add(ctx, phrase, bopomofo); + ok(ret == 1, "chewing_userphrase_add() return value `%d' shall be `%d'", ret, 1); + ret = chewing_userphrase_lookup(ctx, phrase, bopomofo); + ok(ret == 1, "chewing_userphrase_lookup() return value `%d' shall be `%d'", ret, 1); + + ret = chewing_userphrase_enumerate(ctx); + ok(ret == 0, "chewing_userphrase_enumerate() return value `%d' shall be `%d'", ret, 0); + + ret = chewing_userphrase_has_next(ctx, &phrase_len, &bopomofo_len); + ok(ret == 1, "chewing_userphrase_has_next() return value `%d' shall be `%d'", ret, 1); + expect_len = strlen(phrase) + 1; + ok(phrase_len >= expect_len, "chewing_userphrase_has_next() shall set phrase_len `%d' >= `%d'", phrase_len, + expect_len); + expect_len = strlen(bopomofo) + 1; + ok(bopomofo_len >= expect_len, "chewing_userphrase_has_next() shall set bopomofo_len `%d' >= `%d'", bopomofo_len, + expect_len); + ret = chewing_userphrase_get(ctx, phrase_buf, sizeof(phrase_buf), bopomofo_buf, sizeof(bopomofo_buf)); + ok(ret == 0, "chewing_userphrase_get() return value `%d' shall be `%d'", ret, 0); + ok(strcmp(phrase_buf, phrase) == 0, "chewing_userphrase_get() shall set phrase_buf `%s' to `%s'", phrase_buf, + phrase); + ok(strcmp(bopomofo_buf, bopomofo) == 0, "chewing_userphrase_get() shall set bopomofo_buf `%s' to `%s'", + bopomofo_buf, bopomofo); + + ret = chewing_userphrase_get_freq(ctx, phrase_buf, bopomofo_buf, &orig_freq, &max_freq, &user_freq, &time); + ok(ret == 0, "chewing_userphrase_get_freq() return value `%d' shall be `%d'", ret, 0); + for( i=0 ; i < 32 ; i++ ){ // frequently type 測試 to increase the frequency of it. + type_keystroke_by_string(ctx, "hk4g4"); + } + ret = chewing_userphrase_get_freq(ctx, phrase_buf, bopomofo_buf, &orig_freq_b, &max_freq_b, &user_freq_b, &time_b); + ok(ret == 0, "chewing_userphrase_get_freq() return value `%d' shall be `%d'", ret, 0); + ok(orig_freq_b == orig_freq, "chewing_userphrase_get_freq() orig_freq `%d' shall be `%d'", orig_freq_b, orig_freq); + ok(max_freq_b == max_freq, "chewing_userphrase_get_freq() max_freq `%d' shall be `%d'", max_freq_b, max_freq); + ok(user_freq_b > user_freq, "chewing_userphrase_get_freq() shall set user_freq `%d' > `%d'", user_freq_b, user_freq); + ok(time_b >= time, "chewing_userphrase_get_freq() shall set time `%d' >= `%d'", time_b, time); + printf("ret=%d. orig=%d. max=%d. user=%d. time=%d.\n",ret,orig_freq,max_freq,user_freq,time); + + chewing_delete(ctx); + +} + void test_userphrase_enumerate() { test_userphrase_enumerate_normal(); test_userphrase_enumerate_empty(); test_userphrase_enumerate_rewind(); + test_userphrase_enumerate_frequency(); } void test_userphrase_manipulate_normal()