72 lines
2.2 KiB
TypeScript
72 lines
2.2 KiB
TypeScript
import { DEBUG } from '../../config';
|
|
import { createWorkerInterface } from '../../util/createPostMessageInterface';
|
|
import fasttextInitializer from './fasttext-wasm.cjs';
|
|
import fasttextWasmPath from './fasttext-wasm.wasm';
|
|
|
|
type FastTextMethods = {
|
|
makePrediction: (type: 'predict' | 'predict-prob', text: string, k: string, threshold: string) => Promise<string>;
|
|
};
|
|
|
|
const LABEL_PREFIX = '__label__';
|
|
|
|
// Since webpack will change the name and potentially the path of the
|
|
// `.wasm` file, we have to provide a `locateFile()` hook to redirect
|
|
// to the appropriate URL.
|
|
// More details: https://kripken.github.io/emscripten-site/docs/api_reference/module.html
|
|
let fastTextInstance: FastTextMethods;
|
|
const fastTextPromise = fasttextInitializer({
|
|
locateFile: (path: string, prefix: string) => {
|
|
if (path.endsWith('.wasm')) {
|
|
return fasttextWasmPath;
|
|
}
|
|
return prefix + path;
|
|
},
|
|
}).then((fastText: FastTextMethods) => {
|
|
fastTextInstance = fastText;
|
|
if (DEBUG) {
|
|
// eslint-disable-next-line no-console
|
|
console.log('[FASTTEXT] Worker ready');
|
|
}
|
|
});
|
|
|
|
function parseLabel(label: string) {
|
|
return label.split('\n')[0].replace(LABEL_PREFIX, '').trim();
|
|
}
|
|
|
|
function parseLabelsWithProbabilities(labels: string) {
|
|
return labels.trim()
|
|
.split('\n')
|
|
.map((labelWithProb: string) => {
|
|
const [label, prob] = labelWithProb.split(' ');
|
|
return {
|
|
detectedLanguage: parseLabel(label),
|
|
confidence: parseFloat(prob),
|
|
};
|
|
});
|
|
}
|
|
|
|
export async function detectLanguage(text: string, threshold: number) {
|
|
if (!fastTextInstance) await fastTextPromise;
|
|
const label = await fastTextInstance.makePrediction('predict', text, '1', threshold.toString());
|
|
if (!label.length) return undefined;
|
|
return parseLabel(label);
|
|
}
|
|
|
|
export async function detectLanguageProbability(text: string, labelsCount: number, threshold: number) {
|
|
if (!fastTextInstance) await fastTextPromise;
|
|
const labels = await fastTextInstance.makePrediction(
|
|
'predict-prob', text, labelsCount.toString(), threshold.toString(),
|
|
);
|
|
if (!labels.length) return undefined;
|
|
return parseLabelsWithProbabilities(labels);
|
|
}
|
|
|
|
const api = {
|
|
detectLanguage,
|
|
detectLanguageProbability,
|
|
};
|
|
|
|
createWorkerInterface(api);
|
|
|
|
export type FastTextApi = typeof api;
|