Prefer Language Detector over FastText (#6526)
This commit is contained in:
parent
12b241c2cf
commit
6a37bfbf43
8
package-lock.json
generated
8
package-lock.json
generated
@ -47,6 +47,7 @@
|
||||
"@tauri-apps/cli": "^2.9.4",
|
||||
"@testing-library/jest-dom": "^6.9.1",
|
||||
"@twbs/fantasticon": "^3.1.0",
|
||||
"@types/dom-chromium-ai": "^0.0.11",
|
||||
"@types/dom-view-transitions": "^1.0.6",
|
||||
"@types/hast": "^3.0.4",
|
||||
"@types/jest": "^30.0.0",
|
||||
@ -5616,6 +5617,13 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/dom-chromium-ai": {
|
||||
"version": "0.0.11",
|
||||
"resolved": "https://registry.npmjs.org/@types/dom-chromium-ai/-/dom-chromium-ai-0.0.11.tgz",
|
||||
"integrity": "sha512-Li04Mac9ic1vbX/te9re8v1010fh5YB/30dMcJLpIuIyDoT7xE/dIdg9r9UrFZLs5Ztmonb3nP7+LhPpFuHBGw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/dom-view-transitions": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://registry.npmjs.org/@types/dom-view-transitions/-/dom-view-transitions-1.0.6.tgz",
|
||||
|
||||
@ -59,6 +59,7 @@
|
||||
"@tauri-apps/cli": "^2.9.4",
|
||||
"@testing-library/jest-dom": "^6.9.1",
|
||||
"@twbs/fantasticon": "^3.1.0",
|
||||
"@types/dom-chromium-ai": "^0.0.11",
|
||||
"@types/dom-view-transitions": "^1.0.6",
|
||||
"@types/hast": "^3.0.4",
|
||||
"@types/jest": "^30.0.0",
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
import type { FC } from '../../../lib/teact/teact';
|
||||
import {
|
||||
memo, useMemo, useState,
|
||||
} from '../../../lib/teact/teact';
|
||||
@ -8,43 +7,15 @@ import type { AccountSettings } from '../../../types';
|
||||
|
||||
import { SUPPORTED_TRANSLATION_LANGUAGES } from '../../../config';
|
||||
import buildClassName from '../../../util/buildClassName';
|
||||
import { partition } from '../../../util/iteratees';
|
||||
|
||||
import useEffectWithPrevDeps from '../../../hooks/useEffectWithPrevDeps';
|
||||
import useHistoryBack from '../../../hooks/useHistoryBack';
|
||||
import useLang from '../../../hooks/useLang';
|
||||
import useLastCallback from '../../../hooks/useLastCallback';
|
||||
import useOldLang from '../../../hooks/useOldLang';
|
||||
|
||||
import ItemPicker, { type ItemPickerOption } from '../../common/pickers/ItemPicker';
|
||||
|
||||
import styles from './SettingsDoNotTranslate.module.scss';
|
||||
|
||||
// https://fasttext.cc/docs/en/language-identification.html
|
||||
const LOCAL_SUPPORTED_DETECTION_LANGUAGES = [
|
||||
'af', 'als', 'am', 'an', 'ar', 'arz', 'as', 'ast', 'av', 'az',
|
||||
'azb', 'ba', 'bar', 'bcl', 'be', 'bg', 'bh', 'bn', 'bo', 'bpy',
|
||||
'br', 'bs', 'bxr', 'ca', 'cbk', 'ce', 'ceb', 'ckb', 'co', 'cs',
|
||||
'cv', 'cy', 'da', 'de', 'diq', 'dsb', 'dty', 'dv', 'el', 'eml',
|
||||
'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'frr', 'fy',
|
||||
'ga', 'gd', 'gl', 'gn', 'gom', 'gu', 'gv', 'he', 'hi', 'hif',
|
||||
'hr', 'hsb', 'ht', 'hu', 'hy', 'ia', 'id', 'ie', 'ilo', 'io',
|
||||
'is', 'it', 'ja', 'jbo', 'jv', 'ka', 'kk', 'km', 'kn', 'ko',
|
||||
'krc', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lez', 'li', 'lmo',
|
||||
'lo', 'lrc', 'lt', 'lv', 'mai', 'mg', 'mhr', 'min', 'mk', 'ml',
|
||||
'mn', 'mr', 'mrj', 'ms', 'mt', 'mwl', 'my', 'myv', 'mzn', 'nah',
|
||||
'nap', 'nds', 'ne', 'new', 'nl', 'nn', 'no', 'oc', 'or', 'os',
|
||||
'pa', 'pam', 'pfl', 'pl', 'pms', 'pnb', 'ps', 'pt', 'qu', 'rm',
|
||||
'ro', 'ru', 'rue', 'sa', 'sah', 'sc', 'scn', 'sco', 'sd', 'sh',
|
||||
'si', 'sk', 'sl', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te',
|
||||
'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'tyv', 'ug', 'uk', 'ur', 'uz',
|
||||
'vec', 'vep', 'vi', 'vls', 'vo', 'wa', 'war', 'wuu', 'xal', 'xmf',
|
||||
'yi', 'yo', 'yue', 'zh',
|
||||
];
|
||||
|
||||
const SUPPORTED_LANGUAGES = SUPPORTED_TRANSLATION_LANGUAGES.filter((lang: string) => (
|
||||
LOCAL_SUPPORTED_DETECTION_LANGUAGES.includes(lang)
|
||||
));
|
||||
|
||||
type OwnProps = {
|
||||
isActive?: boolean;
|
||||
onReset: () => void;
|
||||
@ -52,36 +23,35 @@ type OwnProps = {
|
||||
|
||||
type StateProps = Pick<AccountSettings, 'doNotTranslate'>;
|
||||
|
||||
const SettingsDoNotTranslate: FC<OwnProps & StateProps> = ({
|
||||
const SettingsDoNotTranslate = ({
|
||||
isActive,
|
||||
doNotTranslate,
|
||||
onReset,
|
||||
}) => {
|
||||
}: OwnProps & StateProps) => {
|
||||
const { setSettingOption } = getActions();
|
||||
|
||||
const lang = useOldLang();
|
||||
const language = lang.code || 'en';
|
||||
const [displayedOptions, setDisplayedOptions] = useState<string[]>([]);
|
||||
const lang = useLang();
|
||||
const language = lang.code;
|
||||
const [searchQuery, setSearchQuery] = useState<string>('');
|
||||
|
||||
const displayedOptionList: ItemPickerOption[] = useMemo(() => {
|
||||
const options = SUPPORTED_LANGUAGES.map((langCode: string) => {
|
||||
const translatedNames = new Intl.DisplayNames([language], { type: 'language' });
|
||||
const translatedName = translatedNames.of(langCode)!;
|
||||
const translatedNames = new Intl.DisplayNames([language], { type: 'language' });
|
||||
const options = SUPPORTED_TRANSLATION_LANGUAGES.map((langCode: string) => {
|
||||
const translatedName = translatedNames.of(langCode);
|
||||
|
||||
const originalNames = new Intl.DisplayNames([langCode], { type: 'language' });
|
||||
const originalName = originalNames.of(langCode)!;
|
||||
const originalName = new Intl.DisplayNames([langCode], { type: 'language' })
|
||||
.of(langCode);
|
||||
|
||||
if (!translatedName || !originalName) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return {
|
||||
langCode,
|
||||
translatedName,
|
||||
originalName,
|
||||
value: langCode,
|
||||
label: translatedName,
|
||||
subLabel: originalName,
|
||||
};
|
||||
}).filter(Boolean).map(({ langCode, translatedName, originalName }) => ({
|
||||
label: translatedName,
|
||||
subLabel: originalName,
|
||||
value: langCode,
|
||||
}));
|
||||
}).filter(Boolean);
|
||||
|
||||
if (!searchQuery.trim()) {
|
||||
const currentLanguageOption = options.find((option) => option.value === language);
|
||||
@ -89,17 +59,14 @@ const SettingsDoNotTranslate: FC<OwnProps & StateProps> = ({
|
||||
return currentLanguageOption ? [currentLanguageOption, ...otherOptionList] : options;
|
||||
}
|
||||
|
||||
return options?.filter((option) => option.label.toLowerCase().includes(searchQuery.toLowerCase()));
|
||||
return options?.filter((option) => (
|
||||
option.label.toLowerCase().includes(searchQuery.toLowerCase())
|
||||
|| option.subLabel?.toLowerCase().includes(searchQuery.toLowerCase())
|
||||
|| option.value.toLowerCase().includes(searchQuery.toLowerCase())
|
||||
));
|
||||
}, [language, searchQuery]);
|
||||
|
||||
useEffectWithPrevDeps(([prevIsActive, prevLanguage]) => {
|
||||
if (prevIsActive === isActive && prevLanguage?.find((option) => option === language)) return;
|
||||
const [selected] = partition(displayedOptionList, (option) => doNotTranslate.includes(option.value));
|
||||
setDisplayedOptions([...selected.map((option) => option.value)]);
|
||||
}, [isActive, doNotTranslate, displayedOptions.length, language, displayedOptionList]);
|
||||
|
||||
const handleChange = useLastCallback((newSelectedIds: string[]) => {
|
||||
setDisplayedOptions(newSelectedIds);
|
||||
setSettingOption({
|
||||
doNotTranslate: newSelectedIds,
|
||||
});
|
||||
@ -116,7 +83,7 @@ const SettingsDoNotTranslate: FC<OwnProps & StateProps> = ({
|
||||
<ItemPicker
|
||||
className={styles.picker}
|
||||
items={displayedOptionList}
|
||||
selectedValues={displayedOptions}
|
||||
selectedValues={doNotTranslate}
|
||||
onSelectedValuesChange={handleChange}
|
||||
filterValue={searchQuery}
|
||||
onFilterChange={setSearchQuery}
|
||||
|
||||
@ -782,7 +782,7 @@ const Message = ({
|
||||
|
||||
const detectedLanguage = useTextLanguage(
|
||||
text?.text,
|
||||
!(areTranslationsEnabled && shouldDetectChatLanguage),
|
||||
!(areTranslationsEnabled && shouldDetectChatLanguage) || isTypingDraft,
|
||||
getIsMessageListReady,
|
||||
);
|
||||
useDetectChatLanguage(message, detectedLanguage, !shouldDetectChatLanguage, getIsMessageListReady);
|
||||
|
||||
@ -1,20 +1,41 @@
|
||||
import { useEffect, useState } from '../lib/teact/teact';
|
||||
import { useEffect, useRef, useState } from '../lib/teact/teact';
|
||||
|
||||
import type { Signal } from '../util/signals';
|
||||
|
||||
import { detectLanguage } from '../util/languageDetection';
|
||||
|
||||
export default function useTextLanguage(text?: string, isDisabled?: boolean, getIsReady?: Signal<boolean>) {
|
||||
const [language, setLanguage] = useState<string | undefined>();
|
||||
const [language, setLanguage] = useState<string>();
|
||||
const lastTextRef = useRef<string>();
|
||||
|
||||
useEffect(() => {
|
||||
if (isDisabled || (getIsReady && !getIsReady())) return;
|
||||
if (isDisabled || (getIsReady && !getIsReady()) || lastTextRef.current === text) return;
|
||||
|
||||
if (text) {
|
||||
detectLanguage(text).then(setLanguage);
|
||||
} else {
|
||||
let isCancelled = false;
|
||||
|
||||
if (!text) {
|
||||
setLanguage(undefined);
|
||||
lastTextRef.current = undefined;
|
||||
return;
|
||||
}
|
||||
|
||||
detectLanguage(text).then((lang) => {
|
||||
if (isCancelled) {
|
||||
return;
|
||||
}
|
||||
|
||||
setLanguage(lang);
|
||||
}).finally(() => {
|
||||
if (isCancelled) {
|
||||
return;
|
||||
}
|
||||
|
||||
lastTextRef.current = text;
|
||||
});
|
||||
|
||||
return () => {
|
||||
isCancelled = true;
|
||||
};
|
||||
}, [isDisabled, text, getIsReady]);
|
||||
|
||||
return language;
|
||||
|
||||
@ -39,8 +39,8 @@ function parseLabelsWithProbabilities(labels: string) {
|
||||
.map((labelWithProb: string) => {
|
||||
const [label, prob] = labelWithProb.split(' ');
|
||||
return {
|
||||
label: parseLabel(label),
|
||||
prob: parseFloat(prob),
|
||||
detectedLanguage: parseLabel(label),
|
||||
confidence: parseFloat(prob),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
@ -111,6 +111,7 @@ export const IS_BACKDROP_BLUR_SUPPORTED = CSS.supports('backdrop-filter: blur()'
|
||||
export const IS_INSTALL_PROMPT_SUPPORTED = 'onbeforeinstallprompt' in window;
|
||||
export const IS_OPEN_IN_NEW_TAB_SUPPORTED = !(IS_PWA && IS_MOBILE);
|
||||
export const IS_TRANSLATION_SUPPORTED = !IS_TEST;
|
||||
export const IS_TRANSLATION_DETECTOR_SUPPORTED = 'LanguageDetector' in window;
|
||||
export const IS_VIEW_TRANSITION_SUPPORTED = CSS.supports('view-transition-class: test')
|
||||
&& !IS_FIREFOX; // Fix flashing elements before removing
|
||||
|
||||
|
||||
@ -1,24 +1,40 @@
|
||||
import type { FastTextApi } from '../lib/fasttextweb/fasttext.worker';
|
||||
import type { Connector } from './PostMessageConnector';
|
||||
|
||||
import { IS_TRANSLATION_SUPPORTED } from './browser/windowEnvironment';
|
||||
import { DEBUG } from '../config';
|
||||
import { IS_TRANSLATION_DETECTOR_SUPPORTED, IS_TRANSLATION_SUPPORTED } from './browser/windowEnvironment';
|
||||
|
||||
import Deferred from './Deferred';
|
||||
import { createConnector } from './PostMessageConnector';
|
||||
|
||||
const WORKER_INIT_DELAY = 4000;
|
||||
const DETECTOR_INIT_DELAY = 4000;
|
||||
|
||||
const DEFAULT_THRESHOLD = 0.2;
|
||||
const DEFAULT_LABELS_COUNT = 5;
|
||||
|
||||
const UNDEFINED_LANGUAGE = 'und';
|
||||
|
||||
let worker: Connector<FastTextApi> | undefined;
|
||||
let languageDetector: LanguageDetector | undefined;
|
||||
const initializationDeferred = new Deferred();
|
||||
|
||||
if (IS_TRANSLATION_SUPPORTED) {
|
||||
setTimeout(initWorker, WORKER_INIT_DELAY);
|
||||
setTimeout(initLanguageDetection, DETECTOR_INIT_DELAY);
|
||||
}
|
||||
|
||||
function initWorker() {
|
||||
async function initLanguageDetection() {
|
||||
if (isInitialized()) return;
|
||||
if (IS_TRANSLATION_DETECTOR_SUPPORTED) {
|
||||
try {
|
||||
languageDetector = await LanguageDetector.create();
|
||||
initializationDeferred.resolve();
|
||||
return;
|
||||
} catch (error) {
|
||||
// eslint-disable-next-line no-console
|
||||
if (DEBUG) console.error('Failed to initialize language detector: ', error);
|
||||
}
|
||||
}
|
||||
|
||||
if (!worker) {
|
||||
worker = createConnector<FastTextApi>(
|
||||
new Worker(new URL('../lib/fasttextweb/fasttext.worker.ts', import.meta.url)),
|
||||
@ -27,16 +43,53 @@ function initWorker() {
|
||||
}
|
||||
}
|
||||
|
||||
export async function detectLanguage(text: string, threshold = DEFAULT_THRESHOLD) {
|
||||
if (!worker) await initializationDeferred.promise;
|
||||
function isInitialized() {
|
||||
return Boolean(languageDetector || worker);
|
||||
}
|
||||
|
||||
export async function detectLanguage(text: string, threshold = DEFAULT_THRESHOLD): Promise<string | undefined> {
|
||||
if (!isInitialized()) await initializationDeferred.promise;
|
||||
|
||||
if (languageDetector) {
|
||||
try {
|
||||
const results = await languageDetector.detect(text);
|
||||
const first = results[0];
|
||||
if (
|
||||
!first
|
||||
|| first.detectedLanguage === UNDEFINED_LANGUAGE
|
||||
|| !first.confidence
|
||||
|| first.confidence < threshold
|
||||
) return undefined;
|
||||
|
||||
return first.detectedLanguage;
|
||||
} catch (error) {
|
||||
// eslint-disable-next-line no-console
|
||||
if (DEBUG) console.error('Failed to detect language: ', error);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
const result = await worker!.request({ name: 'detectLanguage', args: [text, threshold] });
|
||||
return result;
|
||||
}
|
||||
|
||||
export async function detectLanguageProbability(
|
||||
text: string, labelsCount = DEFAULT_LABELS_COUNT, threshold = DEFAULT_THRESHOLD,
|
||||
) {
|
||||
if (!worker) await initializationDeferred.promise;
|
||||
): Promise<LanguageDetectionResult[] | undefined> {
|
||||
if (!isInitialized()) await initializationDeferred.promise;
|
||||
if (languageDetector) {
|
||||
try {
|
||||
const results = await languageDetector.detect(text);
|
||||
return results.filter((result) => result.detectedLanguage !== UNDEFINED_LANGUAGE
|
||||
&& (result.confidence && result.confidence >= threshold))
|
||||
.slice(0, labelsCount);
|
||||
} catch (error) {
|
||||
// eslint-disable-next-line no-console
|
||||
if (DEBUG) console.error('Failed to detect language probability: ', error);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
const result = await worker!.request({ name: 'detectLanguageProbability', args: [text, labelsCount, threshold] });
|
||||
return result;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user