Commit 697658fc authored by Dmitri Plotnikov's avatar Dmitri Plotnikov Committed by Android (Google) Code Review
Browse files

Merge "Implementing traditional grouping of Hiragana characters in contacts lists"

parents 3a4f6d71 4b2aeb80
......@@ -25,7 +25,7 @@
#include "PhonebookIndex.h"
#include "PhoneticStringUtils.h"
#define SMALL_BUFFER_SIZE 10
#define MIN_OUTPUT_SIZE 6 // Minimum required size for the output buffer (in bytes)
namespace android {
......@@ -118,45 +118,79 @@ static bool is_CJK(UChar c) {
|| (0xf900 <= c && c <= 0xfaff); // CJK_COMPATIBILITY_IDEOGRAPHS
}
UChar GetPhonebookIndex(UCharIterator * iter, const char * locale) {
UChar dest[SMALL_BUFFER_SIZE];
int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size,
UBool * isError)
{
if (size < MIN_OUTPUT_SIZE) {
*isError = TRUE;
return 0;
}
// Normalize the first character to remove accents using the NFD normalization
UErrorCode errorCode = U_ZERO_ERROR;
int32_t len = unorm_next(iter, dest, SMALL_BUFFER_SIZE * sizeof(UChar), UNORM_NFD,
0 /* options */, TRUE /* normalize */, NULL, &errorCode);
if (U_FAILURE(errorCode) || len == 0) {
return 0;
}
*isError = FALSE;
UChar c = dest[0];
// Normalize the first character to remove accents using the NFD normalization
UErrorCode errorCode = U_ZERO_ERROR;
int32_t len = unorm_next(iter, out, size, UNORM_NFD,
0 /* options */, TRUE /* normalize */, NULL, &errorCode);
if (U_FAILURE(errorCode)) {
*isError = TRUE;
return 0;
}
// We are only interested in letters
if (!u_isalpha(c)) {
return 0;
}
if (len == 0) { // Empty input string
return 0;
}
c = u_toupper(c);
UChar c = out[0];
// Check for explicitly mapped characters
UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
if (c_mapped != 0) {
return c_mapped;
}
// We are only interested in letters
if (!u_isalpha(c)) {
return 0;
}
c = u_toupper(c);
// Check for explicitly mapped characters
UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
if (c_mapped != 0) {
out[0] = c_mapped;
return 1;
}
// Convert Kanas to Hiragana
UChar next = len > 2 ? dest[1] : 0;
c = android::GetNormalizedCodePoint(c, next, NULL);
// Convert Kanas to Hiragana
UChar next = len > 2 ? out[1] : 0;
c = android::GetNormalizedCodePoint(c, next, NULL);
// Traditional grouping of Hiragana characters
if (0x3042 <= c && c <= 0x309F) {
if (c < 0x304B) c = 0x3042; // a
else if (c < 0x3055) c = 0x304B; // ka
else if (c < 0x305F) c = 0x3055; // sa
else if (c < 0x306A) c = 0x305F; // ta
else if (c < 0x306F) c = 0x306A; // na
else if (c < 0x307E) c = 0x306F; // ha
else if (c < 0x3084) c = 0x307E; // ma
else if (c < 0x3089) c = 0x3084; // ya
else if (c < 0x308F) c = 0x3089; // ra
else c = 0x308F; // wa
out[0] = c;
return 1;
}
if (is_CJK(c)) {
if (strncmp(locale, "ja", 2) == 0) {
return 0x8A18; // Kanji character used as a heading in letters, notices and other documents
} else {
return 0;
}
if (is_CJK(c)) {
if (strncmp(locale, "ja", 2) == 0) {
// Japanese word meaning "misc" or "other"
out[0] = 0x305D;
out[1] = 0x306E;
out[2] = 0x4ED6;
return 3;
} else {
return 0;
}
}
return c;
out[0] = c;
return 1;
}
} // namespace android
......@@ -25,11 +25,21 @@ namespace android {
/**
* A character converter that takes a UNICODE character and produces the
* phonebook index for it in the specified locale. For example, "a" becomes "A"
* phone book index for it in the specified locale. For example, "a" becomes "A"
* and so does A with accents. Conversion rules differ from locale
* locale, which is why this function takes locale as an argument.
*
* @param iter iterator if input characters
* @param locale the string representation of the current locale, e.g. "ja"
* @param out output buffer
* @param size size of the output buffer in bytes. The buffer should be large enough
* to hold the longest phone book index (e.g. a three-char word in Japan).
* @param isError will be set to TRUE if an error occurs
*
* @return number of characters returned
*/
UChar GetPhonebookIndex(UCharIterator * iter, const char * locale);
int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size,
UBool * isError);
} // namespace android
......
......@@ -95,17 +95,25 @@ static void get_phonebook_index(
UCharIterator iter;
uiter_setUTF8(&iter, src, -1);
UChar index = android::GetPhonebookIndex(&iter, locale);
if (index == 0) {
UBool isError = FALSE;
UChar index[SMALL_BUFFER_SIZE];
uint32_t len = android::GetPhonebookIndex(&iter, locale, index, sizeof(index), &isError);
if (isError) {
sqlite3_result_null(context);
return;
}
uint32_t outlen = 0;
uint8_t out[SMALL_BUFFER_SIZE];
UBool isError = FALSE;
U8_APPEND(out, outlen, SMALL_BUFFER_SIZE * sizeof(uint8_t), index, isError);
if (isError || outlen == 0) {
for (uint32_t i = 0; i < len; i++) {
U8_APPEND(out, outlen, sizeof(out), index[i], isError);
if (isError) {
sqlite3_result_null(context);
return;
}
}
if (outlen == 0) {
sqlite3_result_null(context);
return;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment