diff --git a/app/src/main/java/com/urik/keyboard/service/SpellCheckManager.kt b/app/src/main/java/com/urik/keyboard/service/SpellCheckManager.kt index 9b6fdf8..cce5abc 100644 --- a/app/src/main/java/com/urik/keyboard/service/SpellCheckManager.kt +++ b/app/src/main/java/com/urik/keyboard/service/SpellCheckManager.kt @@ -68,10 +68,14 @@ class SpellCheckManager private val initScope = CoroutineScope(SupervisorJob() + Dispatchers.Main) private val spellCheckers = ConcurrentHashMap() + private val spellCheckerAccessOrder = ConcurrentHashMap() private var currentLanguage: String = "en" private val wordFrequencies = ConcurrentHashMap() + private val parsedDictionaryWords = ConcurrentHashMap>>() + private val indexedDictionaryWords = ConcurrentHashMap>() + private val suggestionCache: ManagedCache> = cacheMemoryManager.createCache( name = "spell_suggestions", @@ -93,15 +97,6 @@ class SpellCheckManager val accentStrippedWord: String, ) - @Volatile - private var commonWordsCache = emptyList>() - - @Volatile - private var commonWordsCacheIndexed = emptyList() - - @Volatile - private var commonWordsCacheLanguage = "" - @Volatile private var isInitialized = false @@ -210,7 +205,6 @@ class SpellCheckManager val spellChecker = createSpellChecker(context, languageCode) if (spellChecker != null) { spellCheckers[languageCode] = spellChecker - loadCommonWordsCache(languageCode) } } } catch (e: Exception) { @@ -268,18 +262,9 @@ class SpellCheckManager } } - private suspend fun loadCommonWordsCache(languageCode: String) { - if (languageCode == commonWordsCacheLanguage && commonWordsCache.isNotEmpty()) { - return - } - - try { - val words = getCommonWords(languageCode) - commonWordsCache = words - commonWordsCacheLanguage = languageCode - } catch (_: Exception) { - commonWordsCache = emptyList() - } + private suspend fun ensureDictionaryParsed(languageCode: String) { + if (parsedDictionaryWords.containsKey(languageCode)) return + getSpellCheckerForLanguage(languageCode) } private suspend fun createSpellChecker( @@ -299,6 +284,7 @@ class SpellCheckManager val symSpell = SymSpell(settings) val dictionaryFile = "dictionaries/${languageCode}_symspell.txt" val inputStream = context.assets.open(dictionaryFile) + val collectedWords = ArrayList>(INITIAL_WORD_LIST_CAPACITY) inputStream.bufferedReader().use { reader -> val lines = reader.readLines() @@ -314,9 +300,11 @@ class SpellCheckManager val frequency = parts[1].toLongOrNull() ?: 1L wordFrequencies[word.lowercase()] = frequency symSpell.createDictionaryEntry(word, frequency.toInt()) + collectedWords.add(word.lowercase() to frequency.toInt()) } else if (parts.size == 1) { wordFrequencies[parts[0].lowercase()] = 1L symSpell.createDictionaryEntry(parts[0], 1) + collectedWords.add(parts[0].lowercase() to 1) } } } @@ -324,6 +312,23 @@ class SpellCheckManager yield() } + val sorted = collectedWords.sortedByDescending { it.second } + parsedDictionaryWords[languageCode] = sorted + indexedDictionaryWords[languageCode] = + sorted.map { (word, freq) -> + CachedWord( + word = word, + frequency = freq, + strippedWord = + com.urik.keyboard.utils.TextMatchingUtils + .stripWordPunctuation(word), + accentStrippedWord = wordNormalizer.stripDiacritics(word).lowercase(), + ) + } + + evictExcessSpellCheckers(languageCode) + spellCheckerAccessOrder[languageCode] = System.nanoTime() + symSpell } } catch (e: CancellationException) { @@ -355,8 +360,26 @@ class SpellCheckManager } } + private fun evictExcessSpellCheckers(preserveLanguage: String) { + if (spellCheckers.size < MAX_CACHED_SPELL_CHECKERS) return + + val evictionTarget = + spellCheckerAccessOrder.entries + .filter { it.key != preserveLanguage && it.key != currentLanguage } + .minByOrNull { it.value } + ?.key ?: return + + spellCheckers.remove(evictionTarget) + spellCheckerAccessOrder.remove(evictionTarget) + parsedDictionaryWords.remove(evictionTarget) + indexedDictionaryWords.remove(evictionTarget) + } + private suspend fun getSpellCheckerForLanguage(languageCode: String): SpellChecker? { - spellCheckers[languageCode]?.let { return it } + spellCheckers[languageCode]?.let { + spellCheckerAccessOrder[languageCode] = System.nanoTime() + return it + } if (!isInitialized || languageCode !in KeyboardSettings.SUPPORTED_LANGUAGES) { return null @@ -364,7 +387,6 @@ class SpellCheckManager return createSpellChecker(context, languageCode)?.also { newChecker -> spellCheckers.putIfAbsent(languageCode, newChecker) - loadCommonWordsCache(languageCode) } } @@ -886,19 +908,14 @@ class SpellCheckManager prefix: String, languageCode: String, ): List> { - if (languageCode != commonWordsCacheLanguage || commonWordsCacheIndexed.isEmpty()) { - try { - getCommonWords(languageCode) - } catch (_: Exception) { - return emptyList() - } - } + ensureDictionaryParsed(languageCode) + val indexed = indexedDictionaryWords[languageCode] ?: return emptyList() val hasApostrophe = prefix.contains('\'') val apostropheMatches = if (hasApostrophe) { - commonWordsCacheIndexed + indexed .filter { cached -> cached.word.startsWith(prefix, ignoreCase = true) && cached.word.length > prefix.length @@ -918,7 +935,7 @@ class SpellCheckManager val apostropheWords = apostropheMatches.map { it.first }.toSet() val exactPrefixMatches = - commonWordsCacheIndexed + indexed .filter { cached -> cached.word !in apostropheWords && cached.strippedWord.startsWith(strippedPrefix, ignoreCase = true) && @@ -932,7 +949,7 @@ class SpellCheckManager val seenWords = combined.map { it.first }.toSet() val accentFallbackMatches = - commonWordsCacheIndexed + indexed .filter { cached -> cached.word !in seenWords && cached.accentStrippedWord.startsWith(accentStrippedPrefix) && @@ -959,31 +976,6 @@ class SpellCheckManager return hasValidChars && codePointCount in 1..MAX_INPUT_CODEPOINTS } - private fun parseDictionaryLine(line: String): Pair? { - if (line.isBlank()) return null - - val parts = line.trim().split(" ", limit = 2) - val word = parts[0].lowercase().trim() - val frequency = - if (parts.size >= 2) { - parts[1].toIntOrNull() ?: 1 - } else { - 1 - } - - val isValid = - word.length in COMMON_WORD_MIN_LENGTH..COMMON_WORD_MAX_LENGTH && - word.all { - Character.isLetter(it.code) || - Character.getType(it.code) == Character.OTHER_LETTER.toInt() || - com.urik.keyboard.utils.TextMatchingUtils - .isValidWordPunctuation(it) - } && - !isWordBlacklisted(word) - - return if (isValid) word to frequency else null - } - private fun getCurrentLanguage(): String = try { val currentLanguage = languageManager.currentLanguage.value @@ -1071,8 +1063,6 @@ class SpellCheckManager val cacheKey = buildCacheKey(normalizedWord, currentLang) dictionaryCache.invalidate(cacheKey) suggestionCache.invalidateAll() - commonWordsCache = emptyList() - commonWordsCacheIndexed = emptyList() } catch (_: Exception) { } } @@ -1095,8 +1085,6 @@ class SpellCheckManager val cacheKey = buildCacheKey(normalizedWord, currentLang) dictionaryCache.invalidate(cacheKey) suggestionCache.invalidateAll() - commonWordsCache = emptyList() - commonWordsCacheIndexed = emptyList() } } catch (_: Exception) { } @@ -1124,18 +1112,29 @@ class SpellCheckManager android.content.ComponentCallbacks2.TRIM_MEMORY_COMPLETE, -> { wordFrequencies.clear() - commonWordsCache = emptyList() - commonWordsCacheIndexed = emptyList() - commonWordsCacheLanguage = "" clearCaches() + + val keepLanguage = currentLanguage + val toEvict = spellCheckers.keys.filter { it != keepLanguage } + toEvict.forEach { lang -> + spellCheckers.remove(lang) + spellCheckerAccessOrder.remove(lang) + parsedDictionaryWords.remove(lang) + indexedDictionaryWords.remove(lang) + } } android.content.ComponentCallbacks2.TRIM_MEMORY_RUNNING_MODERATE, android.content.ComponentCallbacks2.TRIM_MEMORY_MODERATE, -> { - commonWordsCache = emptyList() - commonWordsCacheIndexed = emptyList() - commonWordsCacheLanguage = "" + val activeLangs = languageManager.activeLanguages.value.toSet() + val toEvict = parsedDictionaryWords.keys.filter { it !in activeLangs } + toEvict.forEach { lang -> + parsedDictionaryWords.remove(lang) + indexedDictionaryWords.remove(lang) + spellCheckers.remove(lang) + spellCheckerAccessOrder.remove(lang) + } } } } @@ -1159,44 +1158,13 @@ class SpellCheckManager return@withContext emptyList() } - if (targetLang == commonWordsCacheLanguage && commonWordsCache.isNotEmpty()) { - return@withContext commonWordsCache - } - - val dictionaryFile = "dictionaries/${targetLang}_symspell.txt" - val wordFrequencies = mutableListOf>() - - try { - context.assets.open(dictionaryFile).bufferedReader().use { reader -> - reader.forEachLine { line -> - parseDictionaryLine(line)?.let { wordFrequency -> - wordFrequencies.add(wordFrequency) - } - } - } - } catch (_: IOException) { - return@withContext emptyList() + if (parsedDictionaryWords[targetLang] == null) { + ensureDictionaryParsed(targetLang) } - val sortedWords = wordFrequencies.sortedByDescending { it.second } - - val sortedWordsWithStripped = - sortedWords.map { (word, freq) -> - CachedWord( - word = word, - frequency = freq, - strippedWord = - com.urik.keyboard.utils.TextMatchingUtils - .stripWordPunctuation(word), - accentStrippedWord = wordNormalizer.stripDiacritics(word).lowercase(), - ) - } - - commonWordsCache = sortedWords - commonWordsCacheIndexed = sortedWordsWithStripped - commonWordsCacheLanguage = targetLang - - return@withContext sortedWords + return@withContext parsedDictionaryWords[targetLang] + ?.filter { !isWordBlacklisted(it.first) } + ?: emptyList() } catch (_: Exception) { return@withContext emptyList() } @@ -1209,25 +1177,19 @@ class SpellCheckManager return@withContext emptyMap() } - val mergedWords = mutableMapOf() + val mergedWords = HashMap(INITIAL_WORD_LIST_CAPACITY) languages.forEach { lang -> if (lang !in KeyboardSettings.SUPPORTED_LANGUAGES) { return@forEach } - val dictionaryFile = "dictionaries/${lang}_symspell.txt" - - try { - context.assets.open(dictionaryFile).bufferedReader().use { reader -> - reader.forEachLine { line -> - parseDictionaryLine(line)?.let { (word, frequency) -> - val currentFreq = mergedWords[word] ?: 0 - mergedWords[word] = maxOf(currentFreq, frequency) - } - } + ensureDictionaryParsed(lang) + parsedDictionaryWords[lang]?.forEach { (word, frequency) -> + if (!isWordBlacklisted(word)) { + val currentFreq = mergedWords[word] ?: 0 + mergedWords[word] = maxOf(currentFreq, frequency) } - } catch (_: IOException) { } } @@ -1338,6 +1300,8 @@ class SpellCheckManager const val CONTRACTION_GUARANTEED_CONFIDENCE = 0.995 const val DICTIONARY_BATCH_SIZE = 2000 + const val INITIAL_WORD_LIST_CAPACITY = 50000 + const val MAX_CACHED_SPELL_CHECKERS = 4 const val INITIALIZATION_TIMEOUT_MS = 5000L const val FREQUENCY_BOOST_MULTIPLIER = 0.02 @@ -1366,9 +1330,6 @@ class SpellCheckManager const val MAX_PREFIX_COMPLETION_RESULTS = 10 const val MAX_INPUT_CODEPOINTS = 100 - const val COMMON_WORD_MIN_LENGTH = 2 - const val COMMON_WORD_MAX_LENGTH = 15 - const val HIGH_FREQUENCY_THRESHOLD = 10 const val MEDIUM_FREQUENCY_THRESHOLD = 3 const val HIGH_FREQUENCY_BASE_BOOST = 0.15 diff --git a/app/src/main/java/com/urik/keyboard/service/WordLearningEngine.kt b/app/src/main/java/com/urik/keyboard/service/WordLearningEngine.kt index 2f3b716..f938e27 100644 --- a/app/src/main/java/com/urik/keyboard/service/WordLearningEngine.kt +++ b/app/src/main/java/com/urik/keyboard/service/WordLearningEngine.kt @@ -12,6 +12,7 @@ import com.urik.keyboard.settings.SettingsRepository import com.urik.keyboard.utils.CacheMemoryManager import com.urik.keyboard.utils.ErrorLogger import com.urik.keyboard.utils.ManagedCache +import com.urik.keyboard.utils.MemoryPressureSubscriber import kotlinx.coroutines.CoroutineDispatcher import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers @@ -69,7 +70,7 @@ class WordLearningEngine private val ioDispatcher: CoroutineDispatcher = Dispatchers.IO, private val defaultDispatcher: CoroutineDispatcher = Dispatchers.Default, mainDispatcher: CoroutineDispatcher = Dispatchers.Main, - ) { + ) : MemoryPressureSubscriber { private val config = LearningConfig() private val lastDatabaseError = AtomicLong(0L) @@ -82,6 +83,8 @@ class WordLearningEngine maxSize = LEARNED_WORDS_CACHE_SIZE, ) + private val hotFrequencyBuffer = ConcurrentHashMap>() + private val cacheLock = Any() private val learnWordMutex = Mutex() @@ -102,6 +105,27 @@ class WordLearningEngine .onEach { newSettings -> currentSettings = newSettings }.launchIn(engineScope) + cacheMemoryManager.registerPressureSubscriber(this) + } + + override fun onMemoryPressure(level: Int) { + when (level) { + android.content.ComponentCallbacks2.TRIM_MEMORY_RUNNING_CRITICAL, + android.content.ComponentCallbacks2.TRIM_MEMORY_COMPLETE, + -> { + hotFrequencyBuffer.clear() + swipeWordsCache = emptyList() + swipeWordsCacheLanguage = "" + } + + android.content.ComponentCallbacks2.TRIM_MEMORY_RUNNING_MODERATE, + android.content.ComponentCallbacks2.TRIM_MEMORY_MODERATE, + -> { + val currentLang = languageManager.currentLanguage.value + val toEvict = hotFrequencyBuffer.keys.filter { it != currentLang } + toEvict.forEach { hotFrequencyBuffer.remove(it) } + } + } } /** @@ -121,6 +145,15 @@ class WordLearningEngine learnedWordsCache.put(languageTag, normalizedSet) } + val freqMap = ConcurrentHashMap( + minOf(learnedWords.size, HOT_BUFFER_MAX_SIZE), + ) + learnedWords + .sortedByDescending { it.frequency } + .take(HOT_BUFFER_MAX_SIZE) + .forEach { freqMap[it.wordNormalized] = it.frequency } + hotFrequencyBuffer[languageTag] = freqMap + Result.success(Unit) } catch (e: SQLiteException) { ErrorLogger.logException( @@ -219,6 +252,11 @@ class WordLearningEngine cacheSet.add(normalized) learnedWordsCache.put(currentLanguage, cacheSet) + hotFrequencyBuffer[currentLanguage]?.let { freqMap -> + val current = freqMap[normalized] ?: 0 + freqMap[normalized] = current + 1 + } + if (currentLanguage == swipeWordsCacheLanguage) { swipeWordsCache = emptyList() swipeWordsCacheLanguage = "" @@ -376,16 +414,42 @@ class WordLearningEngine com.urik.keyboard.utils.TextMatchingUtils .stripWordPunctuation(normalized) - try { - val exactMatch = - learnedWordDao.findExactWord( - languageTag = languageCode, - normalizedWord = normalized, - ) - if (exactMatch != null) { - results[exactMatch.word] = exactMatch.frequency + val hotBuffer = hotFrequencyBuffer[languageCode] + + if (hotBuffer != null) { + hotBuffer[normalized]?.let { freq -> + results[normalized] = freq + } + + if (results.size < maxResults && normalized.length >= MIN_PREFIX_MATCH_LENGTH) { + hotBuffer.entries + .filter { (w, _) -> + w.startsWith(normalized) && w != normalized + }.sortedByDescending { it.value } + .take(maxResults - results.size) + .forEach { (w, f) -> results[w] = f } + } + + if (results.size >= maxResults) { + onSuccessfulOperation() + return@withContext results.toList() + .sortedByDescending { it.second } + .take(maxResults) + } + } + + if (!results.containsKey(normalized)) { + try { + val exactMatch = + learnedWordDao.findExactWord( + languageTag = languageCode, + normalizedWord = normalized, + ) + if (exactMatch != null) { + results[exactMatch.word] = exactMatch.frequency + } + } catch (_: Exception) { } - } catch (_: Exception) { } if (results.size < maxResults && normalized.length >= MIN_PREFIX_MATCH_LENGTH) { @@ -522,6 +586,7 @@ class WordLearningEngine if (removed > 0) { learnedWordsCache.getIfPresent(currentLanguage)?.remove(normalized) + hotFrequencyBuffer[currentLanguage]?.remove(normalized) if (currentLanguage == swipeWordsCacheLanguage) { swipeWordsCache = emptyList() @@ -633,13 +698,22 @@ class WordLearningEngine } } - val allWords = learnedWordDao.getAllLearnedWordsForLanguage(languageTag).map { it.wordNormalized }.toSet() + val allWords = learnedWordDao.getAllLearnedWordsForLanguage(languageTag) val cacheSet = ConcurrentHashMap.newKeySet() - cacheSet.addAll(allWords) + allWords.forEach { cacheSet.add(it.wordNormalized) } + + val freqMap = ConcurrentHashMap( + minOf(allWords.size, HOT_BUFFER_MAX_SIZE), + ) + allWords + .sortedByDescending { it.frequency } + .take(HOT_BUFFER_MAX_SIZE) + .forEach { freqMap[it.wordNormalized] = it.frequency } synchronized(cacheLock) { learnedWordsCache.put(languageTag, cacheSet) } + hotFrequencyBuffer[languageTag] = freqMap } suspend fun areWordsLearned(words: List): Map = @@ -843,6 +917,7 @@ class WordLearningEngine fun clearCurrentLanguageCache() { val currentLanguage = languageManager.currentLanguage.value learnedWordsCache.invalidate(currentLanguage) + hotFrequencyBuffer.remove(currentLanguage) if (currentLanguage == swipeWordsCacheLanguage) { swipeWordsCache = emptyList() @@ -902,6 +977,7 @@ class WordLearningEngine private companion object { const val LEARNED_WORDS_CACHE_SIZE = 100 + const val HOT_BUFFER_MAX_SIZE = 1000 const val MAX_SIMILAR_WORD_LENGTH = 50 const val MAX_NORMALIZED_WORD_LENGTH = 50 diff --git a/app/src/main/java/com/urik/keyboard/ui/keyboard/KeyboardViewModel.kt b/app/src/main/java/com/urik/keyboard/ui/keyboard/KeyboardViewModel.kt index 8f6aad4..3c55462 100644 --- a/app/src/main/java/com/urik/keyboard/ui/keyboard/KeyboardViewModel.kt +++ b/app/src/main/java/com/urik/keyboard/ui/keyboard/KeyboardViewModel.kt @@ -89,13 +89,6 @@ class KeyboardViewModel } } - viewModelScope.launch { - themeManager.currentTheme - .drop(1) - .collect { theme -> - startLoadLayout(_state.value.currentMode) - } - } } fun onEvent(event: KeyboardEvent) { diff --git a/app/src/main/java/com/urik/keyboard/ui/keyboard/components/CharacterVariationPopup.kt b/app/src/main/java/com/urik/keyboard/ui/keyboard/components/CharacterVariationPopup.kt index 8d19399..bb4b017 100644 --- a/app/src/main/java/com/urik/keyboard/ui/keyboard/components/CharacterVariationPopup.kt +++ b/app/src/main/java/com/urik/keyboard/ui/keyboard/components/CharacterVariationPopup.kt @@ -27,6 +27,9 @@ class CharacterVariationPopup( private var onVariationSelected: ((String) -> Unit)? = null private val characterButtons = mutableListOf