fix(cld2): stack overflow - increase WASM stack size

The CLD2 WASM module has a stack size of only 8KB (TOTAL_STACK=8192), leading to stack overflow crashes when processing long texts.

Error Manifestation:
- Abort when processing 282-character mixed English and Chinese text
- Error message: `RuntimeError: Aborted()`
- Crash location: `detectLanguageWithCLD` calling CLD2

- **Makefile**: Increase TOTAL_STACK from 8192 (8KB) ​​to 65536 (64KB)
- An 8-fold increase is sufficient for language detection of complex texts.
- The WASM module size remains unchanged at 1.1MB.

- Input sanitization: Remove null bytes and control characters
- UTF-8 byte boundary truncation: 512-byte limit
- Enhanced error logging: Record crash context
- API request limit: 10MB JSON parsing limit

- `detectLanguageWithLength` method accepts explicit length
- WebIDL interface definition completed
- Temporarily disabled; using the original interface with input sanitization

fix: detectMultipleLanguages

fix: detectMultipleLanguages
This commit is contained in:
xxnuo
2026-01-01 17:10:36 +08:00
parent b62330d2be
commit 88010e9aa7
10 changed files with 319 additions and 78 deletions

View File

@@ -20,7 +20,7 @@ WEBIDL ?= $(PYTHON2) $(EMSCRIPTEN_ROOT)/tools/webidl_binder.py
# analyzing one 20MB web page gives us a 30-40MB heap for the life of the
# worker.
FLAGS=-s -O3 -s INLINING_LIMIT=1 -s NO_FILESYSTEM=1 -s NO_EXIT_RUNTIME=1 -s INVOKE_RUN=0 \
-s TOTAL_STACK=8192 -s TOTAL_MEMORY=2097152 -s ALLOW_MEMORY_GROWTH=1 \
-s TOTAL_STACK=65536 -s TOTAL_MEMORY=2097152 -s ALLOW_MEMORY_GROWTH=1 \
-s MODULARIZE=1 -s EXPORT_NAME=loadCLD2 \
--closure 1

View File

@@ -49,6 +49,14 @@ void EMSCRIPTEN_KEEPALIVE emscripten_bind_LanguageGuess___destroy___0(LanguageGu
// Interface: LanguageInfo
LanguageInfo* EMSCRIPTEN_KEEPALIVE emscripten_bind_LanguageInfo_detectLanguageWithLength_3(char* buffer, int bufferLength, bool isPlainText) {
return LanguageInfo::detectLanguageWithLength(buffer, bufferLength, isPlainText);
}
LanguageInfo* EMSCRIPTEN_KEEPALIVE emscripten_bind_LanguageInfo_detectLanguageWithLength_6(char* buffer, int bufferLength, bool isPlainText, char* tldHint, int encodingHint, char* languageHint) {
return LanguageInfo::detectLanguageWithLength(buffer, bufferLength, isPlainText, tldHint, encodingHint, languageHint);
}
LanguageInfo* EMSCRIPTEN_KEEPALIVE emscripten_bind_LanguageInfo_detectLanguage_2(char* buffer, bool isPlainText) {
return LanguageInfo::detectLanguage(buffer, isPlainText);
}

View File

@@ -32,6 +32,12 @@ interface LanguageGuess {
};
interface LanguageInfo {
static LanguageInfo detectLanguageWithLength(DOMString buffer, long bufferLength, boolean isPlainText);
static LanguageInfo detectLanguageWithLength(DOMString buffer, long bufferLength, boolean isPlainText,
DOMString? tldHint, long encodingHint,
DOMString? languageHint);
static LanguageInfo detectLanguage(DOMString buffer, boolean isPlainText);
static LanguageInfo detectLanguage(DOMString buffer, boolean isPlainText,

View File

@@ -268,6 +268,24 @@ LanguageInfo.prototype.constructor = LanguageInfo;
LanguageInfo.prototype.__class__ = LanguageInfo;
LanguageInfo.__cache__ = {};
Module['LanguageInfo'] = LanguageInfo;
/** @suppress {undefinedVars, duplicate} @this{Object} */
LanguageInfo.prototype['detectLanguageWithLength'] = LanguageInfo.prototype.detectLanguageWithLength = function(buffer, bufferLength, isPlainText, tldHint, encodingHint, languageHint) {
ensureCache.prepare();
if (buffer && typeof buffer === 'object') buffer = buffer.ptr;
else buffer = ensureString(buffer);
if (bufferLength && typeof bufferLength === 'object') bufferLength = bufferLength.ptr;
if (isPlainText && typeof isPlainText === 'object') isPlainText = isPlainText.ptr;
if (tldHint && typeof tldHint === 'object') tldHint = tldHint.ptr;
else tldHint = ensureString(tldHint);
if (encodingHint && typeof encodingHint === 'object') encodingHint = encodingHint.ptr;
if (languageHint && typeof languageHint === 'object') languageHint = languageHint.ptr;
else languageHint = ensureString(languageHint);
if (tldHint === undefined) { return wrapPointer(_emscripten_bind_LanguageInfo_detectLanguageWithLength_3(buffer, bufferLength, isPlainText), LanguageInfo) }
if (encodingHint === undefined) { return wrapPointer(_emscripten_bind_LanguageInfo_detectLanguageWithLength_4(buffer, bufferLength, isPlainText, tldHint), LanguageInfo) }
if (languageHint === undefined) { return wrapPointer(_emscripten_bind_LanguageInfo_detectLanguageWithLength_5(buffer, bufferLength, isPlainText, tldHint, encodingHint), LanguageInfo) }
return wrapPointer(_emscripten_bind_LanguageInfo_detectLanguageWithLength_6(buffer, bufferLength, isPlainText, tldHint, encodingHint, languageHint), LanguageInfo);
};
/** @suppress {undefinedVars, duplicate} @this{Object} */
LanguageInfo.prototype['detectLanguage'] = LanguageInfo.prototype.detectLanguage = function(buffer, isPlainText, tldHint, encodingHint, languageHint) {
ensureCache.prepare();

View File

@@ -1,47 +1,48 @@
var loadCLD2=(()=>{var _scriptName=globalThis.document?.currentScript?.src;return async function(moduleArg={}){var moduleRtn;function aa(a){var b=0;return function(){return b<a.length?{done:!1,value:a[b++]}:{done:!0}}}function g(a){var b="undefined"!=typeof Symbol&&Symbol.iterator&&a[Symbol.iterator];if(b)return b.call(a);if("number"==typeof a.length)return{next:aa(a)};throw Error(String(a)+" is not an iterable or ArrayLike");}var l="function"==typeof Object.defineProperties?Object.defineProperty:function(a,b,c){if(a==Array.prototype||a==Object.prototype)return a;a[b]=c.value;return a};
function ba(a){a=["object"==typeof globalThis&&globalThis,a,"object"==typeof window&&window,"object"==typeof self&&self,"object"==typeof global&&global];for(var b=0;b<a.length;++b){var c=a[b];if(c&&c.Math==Math)return c}throw Error("Cannot find global object");}var p=ba(this);function r(a,b){if(b)a:{var c=p;a=a.split(".");for(var d=0;d<a.length-1;d++){var f=a[d];if(!(f in c))break a;c=c[f]}a=a[a.length-1];d=c[a];b=b(d);b!=d&&null!=b&&l(c,a,{configurable:!0,writable:!0,value:b})}}
function t(){this.G=!1;this.B=null;this.C=void 0;this.v=1;this.J=this.F=0;this.D=null}function u(a){if(a.G)throw new TypeError("Generator is already running");a.G=!0}t.prototype.H=function(a){this.C=a};function v(a,b){a.D={R:b,$:!0};a.v=a.F||a.J}t.prototype.return=function(a){this.D={return:a};this.v=this.J};function x(a,b,c){a.v=c;return{value:b}}function y(a){a.F=0;var b=a.D.R;a.D=null;return b}function ca(a){this.v=new t;this.B=a}
function da(a,b){u(a.v);var c=a.v.B;if(c)return z(a,"return"in c?c["return"]:function(d){return{value:d,done:!0}},b,a.v.return);a.v.return(b);return C(a)}function z(a,b,c,d){try{var f=b.call(a.v.B,c);if(!(f instanceof Object))throw new TypeError("Iterator result "+f+" is not an object");if(!f.done)return a.v.G=!1,f;var h=f.value}catch(e){return a.v.B=null,v(a.v,e),C(a)}a.v.B=null;d.call(a.v,h);return C(a)}
function C(a){for(;a.v.v;)try{var b=a.B(a.v);if(b)return a.v.G=!1,{value:b.value,done:!1}}catch(c){a.v.C=void 0,v(a.v,c)}a.v.G=!1;if(a.v.D){b=a.v.D;a.v.D=null;if(b.$)throw b.R;return{value:b.return,done:!0}}return{value:void 0,done:!0}}
function ea(a){this.next=function(b){u(a.v);a.v.B?b=z(a,a.v.B.next,b,a.v.H):(a.v.H(b),b=C(a));return b};this.throw=function(b){u(a.v);a.v.B?b=z(a,a.v.B["throw"],b,a.v.H):(v(a.v,b),b=C(a));return b};this.return=function(b){return da(a,b)};this[Symbol.iterator]=function(){return this}}function fa(a){function b(d){return a.next(d)}function c(d){return a.throw(d)}return new Promise(function(d,f){function h(e){e.done?d(e.value):Promise.resolve(e.value).then(b,c).then(h,f)}h(a.next())})}
function D(a){return fa(new ea(new ca(a)))}r("Symbol",function(a){function b(h){if(this instanceof b)throw new TypeError("Symbol is not a constructor");return new c(d+(h||"")+"_"+f++,h)}function c(h,e){this.v=h;l(this,"description",{configurable:!0,writable:!0,value:e})}if(a)return a;c.prototype.toString=function(){return this.v};var d="jscomp_symbol_"+(1E9*Math.random()>>>0)+"_",f=0;return b});
r("Symbol.iterator",function(a){if(a)return a;a=Symbol("Symbol.iterator");for(var b="Array Int8Array Uint8Array Uint8ClampedArray Int16Array Uint16Array Int32Array Uint32Array Float32Array Float64Array".split(" "),c=0;c<b.length;c++){var d=p[b[c]];"function"===typeof d&&"function"!=typeof d.prototype[a]&&l(d.prototype,a,{configurable:!0,writable:!0,value:function(){return ha(aa(this))}})}return a});function ha(a){a={next:a};a[Symbol.iterator]=function(){return this};return a}
r("Promise",function(a){function b(e){this.B=0;this.C=void 0;this.v=[];this.H=!1;var k=this.D();try{e(k.resolve,k.reject)}catch(m){k.reject(m)}}function c(){this.v=null}function d(e){return e instanceof b?e:new b(function(k){k(e)})}if(a)return a;c.prototype.B=function(e){if(null==this.v){this.v=[];var k=this;this.C(function(){k.F()})}this.v.push(e)};var f=p.setTimeout;c.prototype.C=function(e){f(e,0)};c.prototype.F=function(){for(;this.v&&this.v.length;){var e=this.v;this.v=[];for(var k=0;k<e.length;++k){var m=
e[k];e[k]=null;try{m()}catch(n){this.D(n)}}}this.v=null};c.prototype.D=function(e){this.C(function(){throw e;})};b.prototype.D=function(){function e(n){return function(q){m||(m=!0,n.call(k,q))}}var k=this,m=!1;return{resolve:e(this.V),reject:e(this.F)}};b.prototype.V=function(e){if(e===this)this.F(new TypeError("A Promise cannot resolve to itself"));else if(e instanceof b)this.X(e);else{a:switch(typeof e){case "object":var k=null!=e;break a;case "function":k=!0;break a;default:k=!1}k?this.U(e):this.G(e)}};
b.prototype.U=function(e){var k=void 0;try{k=e.then}catch(m){this.F(m);return}"function"==typeof k?this.Y(k,e):this.G(e)};b.prototype.F=function(e){this.J(2,e)};b.prototype.G=function(e){this.J(1,e)};b.prototype.J=function(e,k){if(0!=this.B)throw Error("Cannot settle("+e+", "+k+"): Promise already settled in state"+this.B);this.B=e;this.C=k;2===this.B&&this.W();this.S()};b.prototype.W=function(){var e=this;f(function(){if(e.T()){var k=p.console;"undefined"!==typeof k&&k.error(e.C)}},1)};b.prototype.T=
function(){if(this.H)return!1;var e=p.CustomEvent,k=p.Event,m=p.dispatchEvent;if("undefined"===typeof m)return!0;"function"===typeof e?e=new e("unhandledrejection",{cancelable:!0}):"function"===typeof k?e=new k("unhandledrejection",{cancelable:!0}):(e=p.document.createEvent("CustomEvent"),e.initCustomEvent("unhandledrejection",!1,!0,e));e.promise=this;e.reason=this.C;return m(e)};b.prototype.S=function(){if(null!=this.v){for(var e=0;e<this.v.length;++e)h.B(this.v[e]);this.v=null}};var h=new c;b.prototype.X=
function(e){var k=this.D();e.N(k.resolve,k.reject)};b.prototype.Y=function(e,k){var m=this.D();try{e.call(k,m.resolve,m.reject)}catch(n){m.reject(n)}};b.prototype.then=function(e,k){function m(w,A){return"function"==typeof w?function(Y){try{n(w(Y))}catch(Z){q(Z)}}:A}var n,q,B=new b(function(w,A){n=w;q=A});this.N(m(e,n),m(k,q));return B};b.prototype.catch=function(e){return this.then(void 0,e)};b.prototype.N=function(e,k){function m(){switch(n.B){case 1:e(n.C);break;case 2:k(n.C);break;default:throw Error("Unexpected state: "+
n.B);}}var n=this;null==this.v?h.B(m):this.v.push(m);this.H=!0};b.resolve=d;b.reject=function(e){return new b(function(k,m){m(e)})};b.race=function(e){return new b(function(k,m){for(var n=g(e),q=n.next();!q.done;q=n.next())d(q.value).N(k,m)})};b.all=function(e){var k=g(e),m=k.next();return m.done?d([]):new b(function(n,q){function B(Y){return function(Z){w[Y]=Z;A--;0==A&&n(w)}}var w=[],A=0;do w.push(void 0),A++,d(m.value).N(B(w.length-1),q),m=k.next();while(!m.done)})};return b});
r("globalThis",function(a){return a||p});function ia(a,b,c){if(null==a)throw new TypeError("The 'this' value for String.prototype."+c+" must not be null or undefined");if(b instanceof RegExp)throw new TypeError("First argument to String.prototype."+c+" must not be a regular expression");return a+""}
r("String.prototype.startsWith",function(a){return a?a:function(b,c){var d=ia(this,b,"startsWith"),f=d.length,h=b.length;c=Math.max(0,Math.min(c|0,d.length));for(var e=0;e<h&&c<f;)if(d[c++]!=b[e++])return!1;return e>=h}});r("String.prototype.codePointAt",function(a){return a?a:function(b){var c=ia(this,null,"codePointAt"),d=c.length;b=Number(b)||0;if(0<=b&&b<d){b|=0;var f=c.charCodeAt(b);if(55296>f||56319<f||b+1===d)return f;b=c.charCodeAt(b+1);return 56320>b||57343<b?f:1024*(f-55296)+b+9216}}});
function ja(a,b){a instanceof String&&(a+="");var c=0,d=!1,f={next:function(){if(!d&&c<a.length){var h=c++;return{value:b(h,a[h]),done:!1}}d=!0;return{done:!0,value:void 0}}};f[Symbol.iterator]=function(){return f};return f}r("Array.prototype.keys",function(a){return a?a:function(){return ja(this,function(b){return b})}});
var E=moduleArg,ka=!!globalThis.window,la=!!globalThis.WorkerGlobalScope,ma,na,oa,pa=(null==(ma=globalThis.process)?void 0:null==(na=ma.versions)?void 0:na.node)&&"renderer"!=(null==(oa=globalThis.process)?void 0:oa.type);"undefined"!=typeof __filename?_scriptName=__filename:la&&(_scriptName=self.location.href);var F="",qa,G;
if(pa){var fs=require("fs");F=__dirname+"/";G=function(a){a=H(a)?new URL(a):a;return fs.readFileSync(a)};qa=function(a){var b=void 0===b?!0:b;var c;return D(function(d){a=H(a)?new URL(a):a;c=fs.readFileSync(a,b?void 0:"utf8");return d.return(c)})};process.argv.slice(2)}else if(ka||la){try{F=(new URL(".",_scriptName)).href}catch(a){}la&&(G=function(a){var b=new XMLHttpRequest;b.open("GET",a,!1);b.responseType="arraybuffer";b.send(null);return new Uint8Array(b.response)});qa=function(a){var b;return D(function(c){if(1==
c.v)return H(a)?c.return(new Promise(function(d,f){var h=new XMLHttpRequest;h.open("GET",a,!0);h.responseType="arraybuffer";h.onload=function(){200==h.status||0==h.status&&h.response?d(h.response):f(h.status)};h.onerror=f;h.send(null)})):x(c,fetch(a,{credentials:"same-origin"}),2);b=c.C;if(b.ok)return c.return(b.arrayBuffer());throw Error(b.status+" : "+b.url);})}}var ra=console.log.bind(console),I=console.error.bind(console),J,sa=!1;function H(a){return a.startsWith("file://")}
var ta,ua,va,K,L,wa=!1;function xa(){var a=M.buffer;va=new Int8Array(a);new Int16Array(a);K=new Uint8Array(a);new Uint16Array(a);new Int32Array(a);L=new Uint32Array(a);new Float32Array(a);new Float64Array(a);new BigInt64Array(a);new BigUint64Array(a)}function N(a){var b;null==(b=E.onAbort)||b.call(E,a);a="Aborted("+a+")";I(a);sa=!0;a=new WebAssembly.RuntimeError(a+". Build with -sASSERTIONS for more info.");var c;null==(c=ua)||c(a);throw a;}var O;
function ya(a){var b;return D(function(c){switch(c.v){case 1:if(J){c.v=2;break}c.F=3;return x(c,qa(a),5);case 5:return b=c.C,c.return(new Uint8Array(b));case 3:y(c);case 2:var d=c.return;if(a==O&&J)var f=new Uint8Array(J);else if(G)f=G(a);else throw"both async and sync fetching of the wasm failed";return d.call(c,f)}})}
function za(a,b){var c,d,f;return D(function(h){switch(h.v){case 1:return h.F=2,x(h,ya(a),4);case 4:return c=h.C,x(h,WebAssembly.instantiate(c,b),5);case 5:return d=h.C,h.return(d);case 2:f=y(h),I("failed to asynchronously prepare wasm: "+f),N(f),h.v=0}})}
function Aa(a){var b=J,c=O,d,f,h;return D(function(e){switch(e.v){case 1:if(b||H(c)||pa){e.v=2;break}e.F=3;d=fetch(c,{credentials:"same-origin"});return x(e,WebAssembly.instantiateStreaming(d,a),5);case 5:return f=e.C,e.return(f);case 3:h=y(e),I("wasm streaming compile failed: "+h),I("falling back to ArrayBuffer instantiation");case 2:return e.return(za(c,a))}})}function Ba(a){for(;0<a.length;)a.shift()(E)}var Ca=[],Da=[];function Ea(){var a=E.preRun.shift();Da.push(a)}
function Fa(a){this.A=a-24}var Ga=0,Ha=0,Ia=[null,[],[]],Ja=globalThis.TextDecoder&&new TextDecoder;
function Ka(a,b){b=void 0===b?0:b;var c=b;for(var d=c+void 0;a[c]&&!(c>=d);)++c;if(16<c-b&&a.buffer&&Ja)return Ja.decode(a.subarray(b,c));for(d="";b<c;){var f=a[b++];if(f&128){var h=a[b++]&63;if(192==(f&224))d+=String.fromCharCode((f&31)<<6|h);else{var e=a[b++]&63;f=224==(f&240)?(f&15)<<12|h<<6|e:(f&7)<<18|h<<12|e<<6|a[b++]&63;65536>f?d+=String.fromCharCode(f):(f-=65536,d+=String.fromCharCode(55296|f>>10,56320|f&1023))}}else d+=String.fromCharCode(f)}return d}function La(a){return a?Ka(K,a):""}
var Ma=[];E.print&&(ra=E.print);E.printErr&&(I=E.printErr);E.wasmBinary&&(J=E.wasmBinary);if(E.preInit)for("function"==typeof E.preInit&&(E.preInit=[E.preInit]);0<E.preInit.length;)E.preInit.shift()();
var Na,Oa,Pa,Qa,Ra,Sa,Ta,Ua,Va,Wa,Xa,Ya,M,Za={a:function(a,b,c){var d=new Fa(a);L[d.A+16>>2]=0;L[d.A+4>>2]=b;L[d.A+8>>2]=c;Ga=a;Ha++;throw Ga;},c:function(){return N("")},e:function(a,b){throw"Array index "+a+" out of bounds: [0,"+b+")";},d:function(a){var b=K.length;a>>>=0;if(2147483648<a)return!1;for(var c=1;4>=c;c*=2){var d=b*(1+.2/c);d=Math.min(d,a+100663296);a:{d=(Math.min(2147483648,65536*Math.ceil(Math.max(a,d)/65536))-M.buffer.byteLength+65535)/65536|0;try{M.grow(d);xa();var f=1;break a}catch(h){}f=
void 0}if(f)return!0}return!1},b:function(a,b,c,d){for(var f=0,h=0;h<c;h++){var e=L[b>>2],k=L[b+4>>2];b+=8;for(var m=0;m<k;m++){var n=a,q=K[e+m],B=Ia[n];0===q||10===q?((1===n?ra:I)(Ka(B)),B.length=0):B.push(q)}f+=k}L[d>>2]=f;return 0}},P;
P=await (function(){function a(f){f=P=f.exports;E._webidl_free=f.h;E._webidl_malloc=f.i;Na=E._emscripten_bind_Language_getLanguageCode_0=f.j;Oa=E._emscripten_bind_Language___destroy___0=f.k;Pa=E._emscripten_bind_VoidPtr___destroy___0=f.l;Qa=E._emscripten_bind_LanguageGuess_getPercent_0=f.m;Ra=E._emscripten_bind_LanguageGuess_getLanguageCode_0=f.n;Sa=E._emscripten_bind_LanguageGuess___destroy___0=f.o;Ta=E._emscripten_bind_LanguageInfo_detectLanguage_2=f.p;Ua=E._emscripten_bind_LanguageInfo_detectLanguage_5=
f.q;Va=E._emscripten_bind_LanguageInfo_getIsReliable_0=f.r;Wa=E._emscripten_bind_LanguageInfo_getLanguageCode_0=f.s;Xa=E._emscripten_bind_LanguageInfo_get_languages_1=f.t;Ya=E._emscripten_bind_LanguageInfo___destroy___0=f.u;M=f.f;xa();return P}var b,c,d;return D(function(f){if(1==f.v){b={a:Za};if(E.instantiateWasm)return f.return(new Promise(function(h){E.instantiateWasm(b,function(e,k){h(a(e,k))})}));null!=O||(O=E.locateFile?E.locateFile("cld2.wasm",F):F+"cld2.wasm");return x(f,Aa(b),2)}c=f.C;d=
a(c.instance);return f.return(d)})}());
(function(){function a(){E.calledRun=!0;if(!sa){wa=!0;Ba(Ma);P.g();var b;null==(b=ta)||b(E);var c;null==(c=E.onRuntimeInitialized)||c.call(E);if(E.postRun)for("function"==typeof E.postRun&&(E.postRun=[E.postRun]);E.postRun.length;)b=E.postRun.shift(),Ca.push(b);Ba(Ca)}}if(E.preRun)for("function"==typeof E.preRun&&(E.preRun=[E.preRun]);E.preRun.length;)Ea();Ba(Da);E.setStatus?(E.setStatus("Running..."),setTimeout(function(){setTimeout(function(){return E.setStatus("")},1);a()},1)):a()})();
function Q(){}Q.prototype=Object.create(Q.prototype);Q.prototype.constructor=Q;Q.prototype.I=Q;Q.K={};E.WrapperObject=Q;function $a(a){return(a||Q).K}E.getCache=$a;function R(a,b){var c=$a(b),d=c[a];if(d)return d;d=Object.create((b||Q).prototype);d.A=a;return c[a]=d}E.wrapPointer=R;E.castObject=function(a,b){return R(a.A,b)};E.NULL=R(0);E.destroy=function(a){if(!a.__destroy__)throw"Error: Cannot destroy object. (Did you create it yourself?)";a.__destroy__();delete $a(a.I)[a.A]};
E.compare=function(a,b){return a.A===b.A};E.getPointer=function(a){return a.A};E.getClass=function(a){return a.I};
var S={buffer:0,size:0,M:0,O:[],L:0,P:function(){if(S.L){for(var a=0;a<S.O.length;a++)E._webidl_free(S.O[a]);S.O.length=0;E._webidl_free(S.buffer);S.buffer=0;S.size+=S.L;S.L=0}S.buffer||(S.size+=128,S.buffer=E._webidl_malloc(S.size),S.buffer||N());S.M=0},alloc:function(a,b){S.buffer||N();a=a.length*b.BYTES_PER_ELEMENT;a=8*Math.ceil(a/8);S.M+a>=S.size?(0<a||N(),S.L+=a,b=E._webidl_malloc(a),S.O.push(b)):(b=S.buffer+S.M,S.M+=a);return b}};
function ab(a){if("string"===typeof a){for(var b=0,c=0;c<a.length;++c){var d=a.charCodeAt(c);127>=d?b++:2047>=d?b+=2:55296<=d&&57343>=d?(b+=4,++c):b+=3}b=Array(b+1);d=b.length;c=0;if(0<d){d=c+d-1;for(var f=0;f<a.length;++f){var h=a.codePointAt(f);if(127>=h){if(c>=d)break;b[c++]=h}else if(2047>=h){if(c+1>=d)break;b[c++]=192|h>>6;b[c++]=128|h&63}else if(65535>=h){if(c+2>=d)break;b[c++]=224|h>>12;b[c++]=128|h>>6&63;b[c++]=128|h&63}else{if(c+3>=d)break;b[c++]=240|h>>18;b[c++]=128|h>>12&63;b[c++]=128|
h>>6&63;b[c++]=128|h&63;f++}}b[c]=0}a=S.alloc(b,va);for(c=0;c<b.length;c++)va[a+c]=b[c];return a}return a}function T(){throw"cannot construct a Language, no constructor in IDL";}T.prototype=Object.create(Q.prototype);T.prototype.constructor=T;T.prototype.I=T;T.K={};E.Language=T;T.prototype.getLanguageCode=function(){return La(Na(this.A))};T.prototype.__destroy__=function(){Oa(this.A)};function U(){throw"cannot construct a VoidPtr, no constructor in IDL";}U.prototype=Object.create(Q.prototype);
U.prototype.constructor=U;U.prototype.I=U;U.K={};E.VoidPtr=U;U.prototype.__destroy__=function(){Pa(this.A)};function V(){throw"cannot construct a LanguageGuess, no constructor in IDL";}V.prototype=Object.create(T.prototype);V.prototype.constructor=V;V.prototype.I=V;V.K={};E.LanguageGuess=V;V.prototype.getPercent=function(){return Qa(this.A)};V.prototype.getLanguageCode=function(){return La(Ra(this.A))};V.prototype.__destroy__=function(){Sa(this.A)};
function W(){throw"cannot construct a LanguageInfo, no constructor in IDL";}W.prototype=Object.create(T.prototype);W.prototype.constructor=W;W.prototype.I=W;W.K={};E.LanguageInfo=W;
W.prototype.detectLanguage=function(a,b,c,d,f){S.P();a=a&&"object"===typeof a?a.A:ab(a);b&&"object"===typeof b&&(b=b.A);c=c&&"object"===typeof c?c.A:ab(c);d&&"object"===typeof d&&(d=d.A);f=f&&"object"===typeof f?f.A:ab(f);return void 0===c?R(Ta(a,b),W):void 0===d?R(_emscripten_bind_LanguageInfo_detectLanguage_3(a,b,c),W):void 0===f?R(_emscripten_bind_LanguageInfo_detectLanguage_4(a,b,c,d),W):R(Ua(a,b,c,d,f),W)};W.prototype.getIsReliable=function(){return!!Va(this.A)};W.prototype.getLanguageCode=function(){return La(Wa(this.A))};
W.prototype.get_languages=W.prototype.Z=function(a){var b=this.A;a&&"object"===typeof a&&(a=a.A);return R(Xa(b,a),V)};Object.defineProperty(W.prototype,"languages",{get:W.prototype.Z});W.prototype.__destroy__=function(){Ya(this.A)};S.alloc=S.alloc.bind(S);S.P=S.P.bind(S);
for(var X={ISO_8859_1:0,ISO_8859_2:1,ISO_8859_3:2,ISO_8859_4:3,ISO_8859_5:4,ISO_8859_6:5,ISO_8859_7:6,ISO_8859_8:7,ISO_8859_9:8,ISO_8859_10:9,JAPANESE_EUC_JP:10,EUC_JP:10,JAPANESE_SHIFT_JIS:11,SHIFT_JIS:11,JAPANESE_JIS:12,JIS:12,CHINESE_BIG5:13,BIG5:13,CHINESE_GB:14,CHINESE_EUC_CN:15,EUC_CN:15,KOREAN_EUC_KR:16,EUC_KR:16,UNICODE_UNUSED:17,CHINESE_EUC_DEC:18,EUC_DEC:18,CHINESE_CNS:19,CNS:19,CHINESE_BIG5_CP950:20,BIG5_CP950:20,JAPANESE_CP932:21,CP932:21,UTF8:22,UNKNOWN_ENCODING:23,ASCII_7BIT:24,RUSSIAN_KOI8_R:25,
var loadCLD2=(()=>{var _scriptName=globalThis.document?.currentScript?.src;return async function(moduleArg={}){var moduleRtn;function ba(a){var b=0;return function(){return b<a.length?{done:!1,value:a[b++]}:{done:!0}}}function h(a){var b="undefined"!=typeof Symbol&&Symbol.iterator&&a[Symbol.iterator];if(b)return b.call(a);if("number"==typeof a.length)return{next:ba(a)};throw Error(String(a)+" is not an iterable or ArrayLike");}var l="function"==typeof Object.defineProperties?Object.defineProperty:function(a,b,c){if(a==Array.prototype||a==Object.prototype)return a;a[b]=c.value;return a};
function ca(a){a=["object"==typeof globalThis&&globalThis,a,"object"==typeof window&&window,"object"==typeof self&&self,"object"==typeof global&&global];for(var b=0;b<a.length;++b){var c=a[b];if(c&&c.Math==Math)return c}throw Error("Cannot find global object");}var n=ca(this);function r(a,b){if(b)a:{var c=n;a=a.split(".");for(var d=0;d<a.length-1;d++){var e=a[d];if(!(e in c))break a;c=c[e]}a=a[a.length-1];d=c[a];b=b(d);b!=d&&null!=b&&l(c,a,{configurable:!0,writable:!0,value:b})}}
function t(){this.H=!1;this.C=null;this.D=void 0;this.A=1;this.K=this.G=0;this.F=null}function u(a){if(a.H)throw new TypeError("Generator is already running");a.H=!0}t.prototype.I=function(a){this.D=a};function v(a,b){a.F={S:b,aa:!0};a.A=a.G||a.K}t.prototype.return=function(a){this.F={return:a};this.A=this.K};function w(a,b,c){a.A=c;return{value:b}}function y(a){a.G=0;var b=a.F.S;a.F=null;return b}function da(a){this.A=new t;this.C=a}
function ea(a,b){u(a.A);var c=a.A.C;if(c)return z(a,"return"in c?c["return"]:function(d){return{value:d,done:!0}},b,a.A.return);a.A.return(b);return C(a)}function z(a,b,c,d){try{var e=b.call(a.A.C,c);if(!(e instanceof Object))throw new TypeError("Iterator result "+e+" is not an object");if(!e.done)return a.A.H=!1,e;var g=e.value}catch(f){return a.A.C=null,v(a.A,f),C(a)}a.A.C=null;d.call(a.A,g);return C(a)}
function C(a){for(;a.A.A;)try{var b=a.C(a.A);if(b)return a.A.H=!1,{value:b.value,done:!1}}catch(c){a.A.D=void 0,v(a.A,c)}a.A.H=!1;if(a.A.F){b=a.A.F;a.A.F=null;if(b.aa)throw b.S;return{value:b.return,done:!0}}return{value:void 0,done:!0}}
function fa(a){this.next=function(b){u(a.A);a.A.C?b=z(a,a.A.C.next,b,a.A.I):(a.A.I(b),b=C(a));return b};this.throw=function(b){u(a.A);a.A.C?b=z(a,a.A.C["throw"],b,a.A.I):(v(a.A,b),b=C(a));return b};this.return=function(b){return ea(a,b)};this[Symbol.iterator]=function(){return this}}function ha(a){function b(d){return a.next(d)}function c(d){return a.throw(d)}return new Promise(function(d,e){function g(f){f.done?d(f.value):Promise.resolve(f.value).then(b,c).then(g,e)}g(a.next())})}
function D(a){return ha(new fa(new da(a)))}r("Symbol",function(a){function b(g){if(this instanceof b)throw new TypeError("Symbol is not a constructor");return new c(d+(g||"")+"_"+e++,g)}function c(g,f){this.A=g;l(this,"description",{configurable:!0,writable:!0,value:f})}if(a)return a;c.prototype.toString=function(){return this.A};var d="jscomp_symbol_"+(1E9*Math.random()>>>0)+"_",e=0;return b});
r("Symbol.iterator",function(a){if(a)return a;a=Symbol("Symbol.iterator");for(var b="Array Int8Array Uint8Array Uint8ClampedArray Int16Array Uint16Array Int32Array Uint32Array Float32Array Float64Array".split(" "),c=0;c<b.length;c++){var d=n[b[c]];"function"===typeof d&&"function"!=typeof d.prototype[a]&&l(d.prototype,a,{configurable:!0,writable:!0,value:function(){return ia(ba(this))}})}return a});function ia(a){a={next:a};a[Symbol.iterator]=function(){return this};return a}
r("Promise",function(a){function b(f){this.C=0;this.D=void 0;this.A=[];this.I=!1;var k=this.F();try{f(k.resolve,k.reject)}catch(m){k.reject(m)}}function c(){this.A=null}function d(f){return f instanceof b?f:new b(function(k){k(f)})}if(a)return a;c.prototype.C=function(f){if(null==this.A){this.A=[];var k=this;this.D(function(){k.G()})}this.A.push(f)};var e=n.setTimeout;c.prototype.D=function(f){e(f,0)};c.prototype.G=function(){for(;this.A&&this.A.length;){var f=this.A;this.A=[];for(var k=0;k<f.length;++k){var m=
f[k];f[k]=null;try{m()}catch(p){this.F(p)}}}this.A=null};c.prototype.F=function(f){this.D(function(){throw f;})};b.prototype.F=function(){function f(p){return function(q){m||(m=!0,p.call(k,q))}}var k=this,m=!1;return{resolve:f(this.W),reject:f(this.G)}};b.prototype.W=function(f){if(f===this)this.G(new TypeError("A Promise cannot resolve to itself"));else if(f instanceof b)this.Y(f);else{a:switch(typeof f){case "object":var k=null!=f;break a;case "function":k=!0;break a;default:k=!1}k?this.V(f):this.H(f)}};
b.prototype.V=function(f){var k=void 0;try{k=f.then}catch(m){this.G(m);return}"function"==typeof k?this.Z(k,f):this.H(f)};b.prototype.G=function(f){this.K(2,f)};b.prototype.H=function(f){this.K(1,f)};b.prototype.K=function(f,k){if(0!=this.C)throw Error("Cannot settle("+f+", "+k+"): Promise already settled in state"+this.C);this.C=f;this.D=k;2===this.C&&this.X();this.T()};b.prototype.X=function(){var f=this;e(function(){if(f.U()){var k=n.console;"undefined"!==typeof k&&k.error(f.D)}},1)};b.prototype.U=
function(){if(this.I)return!1;var f=n.CustomEvent,k=n.Event,m=n.dispatchEvent;if("undefined"===typeof m)return!0;"function"===typeof f?f=new f("unhandledrejection",{cancelable:!0}):"function"===typeof k?f=new k("unhandledrejection",{cancelable:!0}):(f=n.document.createEvent("CustomEvent"),f.initCustomEvent("unhandledrejection",!1,!0,f));f.promise=this;f.reason=this.D;return m(f)};b.prototype.T=function(){if(null!=this.A){for(var f=0;f<this.A.length;++f)g.C(this.A[f]);this.A=null}};var g=new c;b.prototype.Y=
function(f){var k=this.F();f.O(k.resolve,k.reject)};b.prototype.Z=function(f,k){var m=this.F();try{f.call(k,m.resolve,m.reject)}catch(p){m.reject(p)}};b.prototype.then=function(f,k){function m(x,A){return"function"==typeof x?function(Z){try{p(x(Z))}catch(aa){q(aa)}}:A}var p,q,B=new b(function(x,A){p=x;q=A});this.O(m(f,p),m(k,q));return B};b.prototype.catch=function(f){return this.then(void 0,f)};b.prototype.O=function(f,k){function m(){switch(p.C){case 1:f(p.D);break;case 2:k(p.D);break;default:throw Error("Unexpected state: "+
p.C);}}var p=this;null==this.A?g.C(m):this.A.push(m);this.I=!0};b.resolve=d;b.reject=function(f){return new b(function(k,m){m(f)})};b.race=function(f){return new b(function(k,m){for(var p=h(f),q=p.next();!q.done;q=p.next())d(q.value).O(k,m)})};b.all=function(f){var k=h(f),m=k.next();return m.done?d([]):new b(function(p,q){function B(Z){return function(aa){x[Z]=aa;A--;0==A&&p(x)}}var x=[],A=0;do x.push(void 0),A++,d(m.value).O(B(x.length-1),q),m=k.next();while(!m.done)})};return b});
r("globalThis",function(a){return a||n});function ja(a,b,c){if(null==a)throw new TypeError("The 'this' value for String.prototype."+c+" must not be null or undefined");if(b instanceof RegExp)throw new TypeError("First argument to String.prototype."+c+" must not be a regular expression");return a+""}
r("String.prototype.startsWith",function(a){return a?a:function(b,c){var d=ja(this,b,"startsWith"),e=d.length,g=b.length;c=Math.max(0,Math.min(c|0,d.length));for(var f=0;f<g&&c<e;)if(d[c++]!=b[f++])return!1;return f>=g}});r("String.prototype.codePointAt",function(a){return a?a:function(b){var c=ja(this,null,"codePointAt"),d=c.length;b=Number(b)||0;if(0<=b&&b<d){b|=0;var e=c.charCodeAt(b);if(55296>e||56319<e||b+1===d)return e;b=c.charCodeAt(b+1);return 56320>b||57343<b?e:1024*(e-55296)+b+9216}}});
function ka(a,b){a instanceof String&&(a+="");var c=0,d=!1,e={next:function(){if(!d&&c<a.length){var g=c++;return{value:b(g,a[g]),done:!1}}d=!0;return{done:!0,value:void 0}}};e[Symbol.iterator]=function(){return e};return e}r("Array.prototype.keys",function(a){return a?a:function(){return ka(this,function(b){return b})}});
var E=moduleArg,la=!!globalThis.window,ma=!!globalThis.WorkerGlobalScope,na,oa,pa,qa=(null==(na=globalThis.process)?void 0:null==(oa=na.versions)?void 0:oa.node)&&"renderer"!=(null==(pa=globalThis.process)?void 0:pa.type);"undefined"!=typeof __filename?_scriptName=__filename:ma&&(_scriptName=self.location.href);var F="",ra,G;
if(qa){var fs=require("fs");F=__dirname+"/";G=function(a){a=H(a)?new URL(a):a;return fs.readFileSync(a)};ra=function(a){var b=void 0===b?!0:b;var c;return D(function(d){a=H(a)?new URL(a):a;c=fs.readFileSync(a,b?void 0:"utf8");return d.return(c)})};process.argv.slice(2)}else if(la||ma){try{F=(new URL(".",_scriptName)).href}catch(a){}ma&&(G=function(a){var b=new XMLHttpRequest;b.open("GET",a,!1);b.responseType="arraybuffer";b.send(null);return new Uint8Array(b.response)});ra=function(a){var b;return D(function(c){if(1==
c.A)return H(a)?c.return(new Promise(function(d,e){var g=new XMLHttpRequest;g.open("GET",a,!0);g.responseType="arraybuffer";g.onload=function(){200==g.status||0==g.status&&g.response?d(g.response):e(g.status)};g.onerror=e;g.send(null)})):w(c,fetch(a,{credentials:"same-origin"}),2);b=c.D;if(b.ok)return c.return(b.arrayBuffer());throw Error(b.status+" : "+b.url);})}}var sa=console.log.bind(console),I=console.error.bind(console),J,ta=!1;function H(a){return a.startsWith("file://")}
var ua,va,wa,K,L,xa=!1;function ya(){var a=M.buffer;wa=new Int8Array(a);new Int16Array(a);K=new Uint8Array(a);new Uint16Array(a);new Int32Array(a);L=new Uint32Array(a);new Float32Array(a);new Float64Array(a);new BigInt64Array(a);new BigUint64Array(a)}function N(a){var b;null==(b=E.onAbort)||b.call(E,a);a="Aborted("+a+")";I(a);ta=!0;a=new WebAssembly.RuntimeError(a+". Build with -sASSERTIONS for more info.");var c;null==(c=va)||c(a);throw a;}var O;
function za(a){var b;return D(function(c){switch(c.A){case 1:if(J){c.A=2;break}c.G=3;return w(c,ra(a),5);case 5:return b=c.D,c.return(new Uint8Array(b));case 3:y(c);case 2:var d=c.return;if(a==O&&J)var e=new Uint8Array(J);else if(G)e=G(a);else throw"both async and sync fetching of the wasm failed";return d.call(c,e)}})}
function Aa(a,b){var c,d,e;return D(function(g){switch(g.A){case 1:return g.G=2,w(g,za(a),4);case 4:return c=g.D,w(g,WebAssembly.instantiate(c,b),5);case 5:return d=g.D,g.return(d);case 2:e=y(g),I("failed to asynchronously prepare wasm: "+e),N(e),g.A=0}})}
function Ba(a){var b=J,c=O,d,e,g;return D(function(f){switch(f.A){case 1:if(b||H(c)||qa){f.A=2;break}f.G=3;d=fetch(c,{credentials:"same-origin"});return w(f,WebAssembly.instantiateStreaming(d,a),5);case 5:return e=f.D,f.return(e);case 3:g=y(f),I("wasm streaming compile failed: "+g),I("falling back to ArrayBuffer instantiation");case 2:return f.return(Aa(c,a))}})}function Ca(a){for(;0<a.length;)a.shift()(E)}var Da=[],Ea=[];function Fa(){var a=E.preRun.shift();Ea.push(a)}
function Ga(a){this.B=a-24}var Ha=0,Ia=0,Ja=[null,[],[]],Ka=globalThis.TextDecoder&&new TextDecoder;
function La(a,b){b=void 0===b?0:b;var c=b;for(var d=c+void 0;a[c]&&!(c>=d);)++c;if(16<c-b&&a.buffer&&Ka)return Ka.decode(a.subarray(b,c));for(d="";b<c;){var e=a[b++];if(e&128){var g=a[b++]&63;if(192==(e&224))d+=String.fromCharCode((e&31)<<6|g);else{var f=a[b++]&63;e=224==(e&240)?(e&15)<<12|g<<6|f:(e&7)<<18|g<<12|f<<6|a[b++]&63;65536>e?d+=String.fromCharCode(e):(e-=65536,d+=String.fromCharCode(55296|e>>10,56320|e&1023))}}else d+=String.fromCharCode(e)}return d}function Ma(a){return a?La(K,a):""}
var Na=[];E.print&&(sa=E.print);E.printErr&&(I=E.printErr);E.wasmBinary&&(J=E.wasmBinary);if(E.preInit)for("function"==typeof E.preInit&&(E.preInit=[E.preInit]);0<E.preInit.length;)E.preInit.shift()();
var Oa,Pa,Qa,Ra,Sa,Ta,Ua,Va,Wa,Xa,Ya,Za,$a,ab,M,bb={a:function(a,b,c){var d=new Ga(a);L[d.B+16>>2]=0;L[d.B+4>>2]=b;L[d.B+8>>2]=c;Ha=a;Ia++;throw Ha;},c:function(){return N("")},e:function(a,b){throw"Array index "+a+" out of bounds: [0,"+b+")";},d:function(a){var b=K.length;a>>>=0;if(2147483648<a)return!1;for(var c=1;4>=c;c*=2){var d=b*(1+.2/c);d=Math.min(d,a+100663296);a:{d=(Math.min(2147483648,65536*Math.ceil(Math.max(a,d)/65536))-M.buffer.byteLength+65535)/65536|0;try{M.grow(d);ya();var e=1;break a}catch(g){}e=
void 0}if(e)return!0}return!1},b:function(a,b,c,d){for(var e=0,g=0;g<c;g++){var f=L[b>>2],k=L[b+4>>2];b+=8;for(var m=0;m<k;m++){var p=a,q=K[f+m],B=Ja[p];0===q||10===q?((1===p?sa:I)(La(B)),B.length=0):B.push(q)}e+=k}L[d>>2]=e;return 0}},P;
P=await (function(){function a(e){e=P=e.exports;E._webidl_free=e.h;E._webidl_malloc=e.i;Oa=E._emscripten_bind_Language_getLanguageCode_0=e.j;Pa=E._emscripten_bind_Language___destroy___0=e.k;Qa=E._emscripten_bind_VoidPtr___destroy___0=e.l;Ra=E._emscripten_bind_LanguageGuess_getPercent_0=e.m;Sa=E._emscripten_bind_LanguageGuess_getLanguageCode_0=e.n;Ta=E._emscripten_bind_LanguageGuess___destroy___0=e.o;Ua=E._emscripten_bind_LanguageInfo_detectLanguageWithLength_3=e.p;Va=E._emscripten_bind_LanguageInfo_detectLanguageWithLength_6=
e.q;Wa=E._emscripten_bind_LanguageInfo_detectLanguage_2=e.r;Xa=E._emscripten_bind_LanguageInfo_detectLanguage_5=e.s;Ya=E._emscripten_bind_LanguageInfo_getIsReliable_0=e.t;Za=E._emscripten_bind_LanguageInfo_getLanguageCode_0=e.u;$a=E._emscripten_bind_LanguageInfo_get_languages_1=e.v;ab=E._emscripten_bind_LanguageInfo___destroy___0=e.w;M=e.f;ya();return P}var b,c,d;return D(function(e){if(1==e.A){b={a:bb};if(E.instantiateWasm)return e.return(new Promise(function(g){E.instantiateWasm(b,function(f,k){g(a(f,
k))})}));null!=O||(O=E.locateFile?E.locateFile("cld2.wasm",F):F+"cld2.wasm");return w(e,Ba(b),2)}c=e.D;d=a(c.instance);return e.return(d)})}());
(function(){function a(){E.calledRun=!0;if(!ta){xa=!0;Ca(Na);P.g();var b;null==(b=ua)||b(E);var c;null==(c=E.onRuntimeInitialized)||c.call(E);if(E.postRun)for("function"==typeof E.postRun&&(E.postRun=[E.postRun]);E.postRun.length;)b=E.postRun.shift(),Da.push(b);Ca(Da)}}if(E.preRun)for("function"==typeof E.preRun&&(E.preRun=[E.preRun]);E.preRun.length;)Fa();Ca(Ea);E.setStatus?(E.setStatus("Running..."),setTimeout(function(){setTimeout(function(){return E.setStatus("")},1);a()},1)):a()})();
function Q(){}Q.prototype=Object.create(Q.prototype);Q.prototype.constructor=Q;Q.prototype.J=Q;Q.L={};E.WrapperObject=Q;function cb(a){return(a||Q).L}E.getCache=cb;function R(a,b){var c=cb(b),d=c[a];if(d)return d;d=Object.create((b||Q).prototype);d.B=a;return c[a]=d}E.wrapPointer=R;E.castObject=function(a,b){return R(a.B,b)};E.NULL=R(0);E.destroy=function(a){if(!a.__destroy__)throw"Error: Cannot destroy object. (Did you create it yourself?)";a.__destroy__();delete cb(a.J)[a.B]};
E.compare=function(a,b){return a.B===b.B};E.getPointer=function(a){return a.B};E.getClass=function(a){return a.J};
var S={buffer:0,size:0,N:0,R:[],M:0,P:function(){if(S.M){for(var a=0;a<S.R.length;a++)E._webidl_free(S.R[a]);S.R.length=0;E._webidl_free(S.buffer);S.buffer=0;S.size+=S.M;S.M=0}S.buffer||(S.size+=128,S.buffer=E._webidl_malloc(S.size),S.buffer||N());S.N=0},alloc:function(a,b){S.buffer||N();a=a.length*b.BYTES_PER_ELEMENT;a=8*Math.ceil(a/8);S.N+a>=S.size?(0<a||N(),S.M+=a,b=E._webidl_malloc(a),S.R.push(b)):(b=S.buffer+S.N,S.N+=a);return b}};
function T(a){if("string"===typeof a){for(var b=0,c=0;c<a.length;++c){var d=a.charCodeAt(c);127>=d?b++:2047>=d?b+=2:55296<=d&&57343>=d?(b+=4,++c):b+=3}b=Array(b+1);d=b.length;c=0;if(0<d){d=c+d-1;for(var e=0;e<a.length;++e){var g=a.codePointAt(e);if(127>=g){if(c>=d)break;b[c++]=g}else if(2047>=g){if(c+1>=d)break;b[c++]=192|g>>6;b[c++]=128|g&63}else if(65535>=g){if(c+2>=d)break;b[c++]=224|g>>12;b[c++]=128|g>>6&63;b[c++]=128|g&63}else{if(c+3>=d)break;b[c++]=240|g>>18;b[c++]=128|g>>12&63;b[c++]=128|g>>
6&63;b[c++]=128|g&63;e++}}b[c]=0}a=S.alloc(b,wa);for(c=0;c<b.length;c++)wa[a+c]=b[c];return a}return a}function U(){throw"cannot construct a Language, no constructor in IDL";}U.prototype=Object.create(Q.prototype);U.prototype.constructor=U;U.prototype.J=U;U.L={};E.Language=U;U.prototype.getLanguageCode=function(){return Ma(Oa(this.B))};U.prototype.__destroy__=function(){Pa(this.B)};function V(){throw"cannot construct a VoidPtr, no constructor in IDL";}V.prototype=Object.create(Q.prototype);
V.prototype.constructor=V;V.prototype.J=V;V.L={};E.VoidPtr=V;V.prototype.__destroy__=function(){Qa(this.B)};function W(){throw"cannot construct a LanguageGuess, no constructor in IDL";}W.prototype=Object.create(U.prototype);W.prototype.constructor=W;W.prototype.J=W;W.L={};E.LanguageGuess=W;W.prototype.getPercent=function(){return Ra(this.B)};W.prototype.getLanguageCode=function(){return Ma(Sa(this.B))};W.prototype.__destroy__=function(){Ta(this.B)};
function X(){throw"cannot construct a LanguageInfo, no constructor in IDL";}X.prototype=Object.create(U.prototype);X.prototype.constructor=X;X.prototype.J=X;X.L={};E.LanguageInfo=X;
X.prototype.detectLanguageWithLength=function(a,b,c,d,e,g){S.P();a=a&&"object"===typeof a?a.B:T(a);b&&"object"===typeof b&&(b=b.B);c&&"object"===typeof c&&(c=c.B);d=d&&"object"===typeof d?d.B:T(d);e&&"object"===typeof e&&(e=e.B);g=g&&"object"===typeof g?g.B:T(g);return void 0===d?R(Ua(a,b,c),X):void 0===e?R(_emscripten_bind_LanguageInfo_detectLanguageWithLength_4(a,b,c,d),X):void 0===g?R(_emscripten_bind_LanguageInfo_detectLanguageWithLength_5(a,b,c,d,e),X):R(Va(a,b,c,d,e,g),X)};
X.prototype.detectLanguage=function(a,b,c,d,e){S.P();a=a&&"object"===typeof a?a.B:T(a);b&&"object"===typeof b&&(b=b.B);c=c&&"object"===typeof c?c.B:T(c);d&&"object"===typeof d&&(d=d.B);e=e&&"object"===typeof e?e.B:T(e);return void 0===c?R(Wa(a,b),X):void 0===d?R(_emscripten_bind_LanguageInfo_detectLanguage_3(a,b,c),X):void 0===e?R(_emscripten_bind_LanguageInfo_detectLanguage_4(a,b,c,d),X):R(Xa(a,b,c,d,e),X)};X.prototype.getIsReliable=function(){return!!Ya(this.B)};X.prototype.getLanguageCode=function(){return Ma(Za(this.B))};
X.prototype.get_languages=X.prototype.$=function(a){var b=this.B;a&&"object"===typeof a&&(a=a.B);return R($a(b,a),W)};Object.defineProperty(X.prototype,"languages",{get:X.prototype.$});X.prototype.__destroy__=function(){ab(this.B)};S.alloc=S.alloc.bind(S);S.P=S.P.bind(S);
for(var Y={ISO_8859_1:0,ISO_8859_2:1,ISO_8859_3:2,ISO_8859_4:3,ISO_8859_5:4,ISO_8859_6:5,ISO_8859_7:6,ISO_8859_8:7,ISO_8859_9:8,ISO_8859_10:9,JAPANESE_EUC_JP:10,EUC_JP:10,JAPANESE_SHIFT_JIS:11,SHIFT_JIS:11,JAPANESE_JIS:12,JIS:12,CHINESE_BIG5:13,BIG5:13,CHINESE_GB:14,CHINESE_EUC_CN:15,EUC_CN:15,KOREAN_EUC_KR:16,EUC_KR:16,UNICODE_UNUSED:17,CHINESE_EUC_DEC:18,EUC_DEC:18,CHINESE_CNS:19,CNS:19,CHINESE_BIG5_CP950:20,BIG5_CP950:20,JAPANESE_CP932:21,CP932:21,UTF8:22,UNKNOWN_ENCODING:23,ASCII_7BIT:24,RUSSIAN_KOI8_R:25,
KOI8_R:25,RUSSIAN_CP1251:26,CP1251:26,MSFT_CP1252:27,CP1252:27,RUSSIAN_KOI8_RU:28,KOI8_RU:28,MSFT_CP1250:29,CP1250:29,ISO_8859_15:30,MSFT_CP1254:31,CP1254:31,MSFT_CP1257:32,CP1257:32,ISO_8859_11:33,MSFT_CP874:34,CP874:34,MSFT_CP1256:35,CP1256:35,MSFT_CP1255:36,CP1255:36,ISO_8859_8_I:37,HEBREW_VISUAL:38,CZECH_CP852:39,CP852:39,CZECH_CSN_369103:40,CSN_369103:40,MSFT_CP1253:41,CP1253:41,RUSSIAN_CP866:42,CP866:42,ISO_8859_13:43,ISO_2022_KR:44,GBK:45,GB18030:46,BIG5_HKSCS:47,ISO_2022_CN:48,TSCII:49,TAMIL_MONO:50,
TAMIL_BI:51,JAGRAN:52,MACINTOSH_ROMAN:53,UTF7:54,BHASKAR:55,HTCHANAKYA:56,UTF16BE:57,UTF16LE:58,UTF32BE:59,UTF32LE:60,BINARYENC:61,HZ_GB_2312:62,UTF8UTF8:63,TAM_ELANGO:64,TAM_LTTMBARANI:65,TAM_SHREE:66,TAM_TBOOMIS:67,TAM_TMNEWS:68,TAM_WEBTAMIL:69,KDDI_SHIFT_JIS:70,DOCOMO_SHIFT_JIS:71,SOFTBANK_SHIFT_JIS:72,KDDI_ISO_2022_JP:73,ISO_2022_JP:73,SOFTBANK_ISO_2022_JP:74},bb=g(Object.keys(X)),cb=bb.next();!cb.done;cb=bb.next()){var db=cb.value;db.includes("_")&&(X[db.replace(/_/g,"")]=X[db])}
E.Encodings=X;wa?moduleRtn=E:moduleRtn=new Promise(function(a,b){ta=a;ua=b});
TAMIL_BI:51,JAGRAN:52,MACINTOSH_ROMAN:53,UTF7:54,BHASKAR:55,HTCHANAKYA:56,UTF16BE:57,UTF16LE:58,UTF32BE:59,UTF32LE:60,BINARYENC:61,HZ_GB_2312:62,UTF8UTF8:63,TAM_ELANGO:64,TAM_LTTMBARANI:65,TAM_SHREE:66,TAM_TBOOMIS:67,TAM_TMNEWS:68,TAM_WEBTAMIL:69,KDDI_SHIFT_JIS:70,DOCOMO_SHIFT_JIS:71,SOFTBANK_SHIFT_JIS:72,KDDI_ISO_2022_JP:73,ISO_2022_JP:73,SOFTBANK_ISO_2022_JP:74},db=h(Object.keys(Y)),eb=db.next();!eb.done;eb=db.next()){var fb=eb.value;fb.includes("_")&&(Y[fb.replace(/_/g,"")]=Y[fb])}
E.Encodings=Y;xa?moduleRtn=E:moduleRtn=new Promise(function(a,b){ua=a;va=b});
;return moduleRtn}})();if(typeof exports==="object"&&typeof module==="object"){module.exports=loadCLD2;module.exports.default=loadCLD2}else if(typeof define==="function"&&define["amd"])define([],()=>loadCLD2);

Binary file not shown.

View File

@@ -36,13 +36,50 @@ private:
class LanguageInfo : public Language {
public:
static LanguageInfo* detectLanguageWithLength(const char* buffer, int bufferLength, bool isPlainText)
{
CLD2::Language languages[MAX_RESULTS] = {};
int percentages[MAX_RESULTS] = {};
bool isReliable = false;
int textBytes;
CLD2::Language bestGuess = DetectLanguageSummary(
buffer, bufferLength, isPlainText,
languages, percentages, &textBytes,
&isReliable);
return new LanguageInfo(isReliable, bestGuess, languages, percentages);
}
static LanguageInfo* detectLanguageWithLength(const char* buffer, int bufferLength, bool isPlainText,
const char* tldHint, int encodingHint,
const char* languageHint)
{
CLD2::CLDHints hints = {languageHint, tldHint, encodingHint, CLD2::UNKNOWN_LANGUAGE};
CLD2::Language languages[MAX_RESULTS] = {};
int percentages[MAX_RESULTS] = {};
bool isReliable = false;
double scores[MAX_RESULTS];
int textBytes;
CLD2::Language bestGuess = ExtDetectLanguageSummary(
buffer, bufferLength, isPlainText,
&hints, 0,
languages, percentages, scores,
nullptr, &textBytes, &isReliable);
return new LanguageInfo(isReliable, bestGuess, languages, percentages);
}
static LanguageInfo* detectLanguage(const char* buffer, bool isPlainText)
{
CLD2::Language languages[MAX_RESULTS] = {};
int percentages[MAX_RESULTS] = {};
bool isReliable = false;
// This is ignored.
int textBytes;
CLD2::Language bestGuess = DetectLanguageSummary(

View File

@@ -12,6 +12,7 @@ import swaggerDocument from '@/generated/swagger.json';
import { uiStatic } from '@/middleware/ui.js';
import { swaggerStatic } from '@/middleware/swagger.js';
import { checkForUpdate } from '@/utils/update-checker.js';
import { VERSION } from '@/version';
export async function run() {
const config = getConfig();
@@ -32,7 +33,7 @@ export async function run() {
app.use(express.json());
app.use(cors());
if (config.logRequests) {
app.use(requestLogger());
app.use(requestLogger());
}
RegisterRoutes(app);
@@ -58,6 +59,7 @@ export async function run() {
app.use(errorHandler());
const server = app.listen(parseInt(config.port), config.host, () => {
logger.important(`MTranServer v${VERSION} is running!`);
logger.important(`Web UI: http://${config.host}:${config.port}/ui/`);
logger.important(`Swagger Docs: http://${config.host}:${config.port}/docs/`);
logger.important(`Log level set to: ${config.logLevel}`);

View File

@@ -15,15 +15,69 @@ export interface TextSegment {
const DEFAULT_CONFIDENCE_THRESHOLD = 0.5;
const MAXIMUM_LANGUAGES_IN_ONE_TEXT = 2;
const MAX_DETECTION_LENGTH = 1024;
const MAX_DETECTION_BYTES = 512;
const MAX_FALLBACK_DETECTION_BYTES = 1024;
let cldModule: any = null;
let initPromise: Promise<void> | null = null;
function handleCldError(error: any) {
function sanitizeInput(text: string): string {
let sanitized = text.replace(/\0/g, '');
sanitized = sanitized.replace(/[\x01-\x08\x0B-\x0C\x0E-\x1F\x7F]/g, '');
return sanitized;
}
function truncateByUtf8Bytes(text: string, maxBytes: number): string {
const encoder = new TextEncoder();
const bytes = encoder.encode(text);
if (bytes.length <= maxBytes) {
return text;
}
let truncated = bytes.slice(0, maxBytes);
while (truncated.length > 0) {
try {
return new TextDecoder('utf-8', { fatal: true }).decode(truncated);
} catch {
truncated = truncated.slice(0, -1);
}
}
return '';
}
function validateAndSanitizeInput(text: string, maxBytes: number = MAX_DETECTION_BYTES): string {
if (!text || text.length === 0) {
return text;
}
const sanitized = sanitizeInput(text);
const truncated = truncateByUtf8Bytes(sanitized, maxBytes);
if (truncated !== text) {
logger.debug(
`Input sanitized/truncated: ${text.length}${truncated.length} chars (limit: ${maxBytes})`
);
}
return truncated;
}
function handleCldError(error: any, context?: {
text?: string;
operation?: string
}) {
const errStr = error.toString();
if (errStr.includes('RuntimeError') || errStr.includes('memory access')) {
logger.error(`CLD2 crashed (RuntimeError), resetting module: ${error}`);
logger.error('CLD2 crashed (RuntimeError), resetting module', {
error: errStr,
stack: error.stack,
textLength: context?.text?.length,
textPreview: context?.text?.substring(0, 100),
operation: context?.operation
});
cldModule = null;
initPromise = null;
}
@@ -51,8 +105,13 @@ async function initCLD(): Promise<void> {
wasmBinary: wasmBuffer,
});
if (module.LanguageInfo && module.LanguageInfo.prototype && module.LanguageInfo.prototype.detectLanguage) {
module.LanguageInfo.detectLanguage = module.LanguageInfo.prototype.detectLanguage;
if (module.LanguageInfo && module.LanguageInfo.prototype) {
if (module.LanguageInfo.prototype.detectLanguage) {
module.LanguageInfo.detectLanguage = module.LanguageInfo.prototype.detectLanguage;
}
if (module.LanguageInfo.prototype.detectLanguageWithLength) {
module.LanguageInfo.detectLanguageWithLength = module.LanguageInfo.prototype.detectLanguageWithLength;
}
}
cldModule = module;
@@ -66,17 +125,29 @@ async function initCLD(): Promise<void> {
return initPromise;
}
function detectLanguageWithCLD(text: string, isHTML: boolean = false) {
function detectLanguageWithCLD(text: string, isHTML: boolean = false, maxBytes: number = MAX_DETECTION_BYTES) {
if (!cldModule) {
throw new Error('CLD2 module not initialized');
}
const validatedText = validateAndSanitizeInput(text, maxBytes);
if (!validatedText) {
logger.warn('Input validation resulted in empty text');
return {
language: 'un',
confident: false,
languages: [],
percentScore: 0
};
}
const LanguageInfo = cldModule.LanguageInfo;
if (!LanguageInfo || !LanguageInfo.detectLanguage) {
throw new Error('CLD2 LanguageInfo or detectLanguage not available');
}
const result = LanguageInfo.detectLanguage(text, !isHTML);
const result = LanguageInfo.detectLanguage(validatedText, !isHTML);
const languages = Array(3).fill(0).map((_, i) => {
const lang = result.get_languages(i);
@@ -107,7 +178,7 @@ function bcp47Normalize(code: string): string {
}
}
export async function detectLanguage(text: string): Promise<string> {
export async function detectLanguage(text: string, maxBytes: number = MAX_DETECTION_BYTES): Promise<string> {
if (!text) {
return '';
}
@@ -115,22 +186,19 @@ export async function detectLanguage(text: string): Promise<string> {
await initCLD();
try {
const processText = text.length > MAX_DETECTION_LENGTH
? text.slice(0, MAX_DETECTION_LENGTH)
: text;
const result = detectLanguageWithCLD(processText);
const result = detectLanguageWithCLD(text, false, maxBytes);
return bcp47Normalize(result.language);
} catch (error) {
logger.warn(`Language detection failed: ${error}`);
handleCldError(error);
handleCldError(error, { text, operation: 'detectLanguage' });
return 'en';
}
}
export async function detectLanguageWithConfidence(
text: string,
minConfidence: number = DEFAULT_CONFIDENCE_THRESHOLD
minConfidence: number = DEFAULT_CONFIDENCE_THRESHOLD,
maxBytes: number = MAX_DETECTION_BYTES
): Promise<{ language: string; confidence: number }> {
if (!text) {
return { language: '', confidence: 0 };
@@ -139,11 +207,7 @@ export async function detectLanguageWithConfidence(
await initCLD();
try {
const processText = text.length > MAX_DETECTION_LENGTH
? text.slice(0, MAX_DETECTION_LENGTH)
: text;
const result = detectLanguageWithCLD(processText);
const result = detectLanguageWithCLD(text, false, maxBytes);
const confidence = result.percentScore / 100;
if (confidence < minConfidence) {
@@ -156,7 +220,7 @@ export async function detectLanguageWithConfidence(
};
} catch (error) {
logger.warn(`Language detection with confidence failed: ${error}`);
handleCldError(error);
handleCldError(error, { text, operation: 'detectLanguageWithConfidence' });
return { language: 'en', confidence: 0 };
}
}
@@ -187,6 +251,36 @@ function hasMixedScripts(text: string): boolean {
return false;
}
function getScriptType(text: string): 'Latin' | 'CJK' | 'Mixed' | 'Other' {
let hasCJK = false;
let hasLatin = false;
for (const char of text) {
const code = char.charCodeAt(0);
if (
(code >= 0x4e00 && code <= 0x9fff) ||
(code >= 0x3040 && code <= 0x309f) ||
(code >= 0x30a0 && code <= 0x30ff) ||
(code >= 0xac00 && code <= 0xd7af)
) {
hasCJK = true;
} else if ((code >= 0x0041 && code <= 0x005a) || (code >= 0x0061 && code <= 0x007a)) {
hasLatin = true;
}
if (hasCJK && hasLatin) return 'Mixed';
}
if (hasCJK) return 'CJK';
if (hasLatin) return 'Latin';
return 'Other';
}
function isCJKLanguage(lang: string): boolean {
return ['zh', 'zh-Hans', 'zh-Hant', 'ja', 'ko'].includes(lang) || lang.startsWith('zh-');
}
export async function detectMultipleLanguages(text: string): Promise<TextSegment[]> {
return detectMultipleLanguagesWithThreshold(text, DEFAULT_CONFIDENCE_THRESHOLD);
}
@@ -201,7 +295,7 @@ export async function detectMultipleLanguagesWithThreshold(
await initCLD();
const fallbackLang = await detectLanguage(text);
const fallbackLang = await detectLanguage(text, MAX_FALLBACK_DETECTION_BYTES);
const effectiveFallback = fallbackLang || 'en';
if (!hasMixedScripts(text)) {
@@ -220,29 +314,51 @@ export async function detectMultipleLanguagesWithThreshold(
const segments: TextSegment[] = [];
const segmenterAny = new (Intl as any).Segmenter(undefined, { granularity: 'sentence' });
const sentenceSegments = Array.from(segmenterAny.segment(text)) as Array<{segment: string, index: number}>;
const sentenceSegments = Array.from(segmenterAny.segment(text)) as Array<{ segment: string, index: number }>;
for (const { segment, index } of sentenceSegments) {
try {
await initCLD();
const processSegment = segment.length > MAX_DETECTION_LENGTH
? segment.slice(0, MAX_DETECTION_LENGTH)
: segment;
const result = detectLanguageWithCLD(processSegment);
const result = detectLanguageWithCLD(segment);
const detectedLang = bcp47Normalize(result.language);
const confidence = result.percentScore / 100;
const scriptType = getScriptType(segment);
let finalLang = effectiveFallback;
let usedLogic = 'fallback';
if (confidence >= threshold) {
finalLang = detectedLang;
usedLogic = 'confidence';
} else {
if (scriptType === 'Latin' && isCJKLanguage(effectiveFallback)) {
if (detectedLang && detectedLang !== 'un') {
finalLang = detectedLang;
usedLogic = 'script-override-latin';
} else {
finalLang = 'en';
usedLogic = 'script-override-en';
}
} else if (scriptType === 'CJK' && !isCJKLanguage(effectiveFallback)) {
if (detectedLang && detectedLang !== 'un') {
finalLang = detectedLang;
usedLogic = 'script-override-cjk';
}
}
}
logger.debug(`Segment[${segments.length}]: "${segment.replace(/\n/g, '\\n')}" -> lang=${detectedLang}, conf=${confidence.toFixed(2)}, script=${scriptType}, final=${finalLang} (${usedLogic})`);
segments.push({
text: segment,
language: confidence >= threshold ? detectedLang : effectiveFallback,
language: finalLang,
start: index,
end: index + segment.length,
confidence
});
} catch (error) {
logger.warn(`Failed to detect language for segment: ${error}`);
handleCldError(error);
handleCldError(error, { text: segment, operation: 'detectMultipleLanguages' });
segments.push({
text: segment,
language: effectiveFallback,

View File

@@ -0,0 +1,53 @@
import { describe, test, expect } from 'bun:test';
import { detectLanguage } from '@/services/detector';
describe('CLD2 Memory Safety Tests', () => {
test('包含 null 字节的字符串', async () => {
const text = 'Hello\0World';
const result = await detectLanguage(text);
expect(result).toBeDefined();
expect(typeof result).toBe('string');
});
test('超长文本1MB', async () => {
const text = 'A'.repeat(1024 * 1024);
const result = await detectLanguage(text);
expect(result).toBeDefined();
});
test('混合 UTF-8 多字节字符', async () => {
const text = '你好世界🌍Hello'.repeat(1000);
const result = await detectLanguage(text);
expect(result).toBeDefined();
});
test('控制字符', async () => {
const text = 'Test\x01\x02\x03Text';
const result = await detectLanguage(text);
expect(result).toBeDefined();
});
test('连续多次检测不崩溃', async () => {
for (let i = 0; i < 100; i++) {
const text = `Test ${i} with special chars 你好\0\x01`;
await detectLanguage(text);
}
});
test('空文本', async () => {
const result = await detectLanguage('');
expect(result).toBe('');
});
test('纯空白字符', async () => {
const text = ' \n\t ';
const result = await detectLanguage(text);
expect(result).toBeDefined();
});
test('emoji 表情符号', async () => {
const text = '🎉🎊🎈🎁🎀';
const result = await detectLanguage(text);
expect(result).toBeDefined();
});
});