From 3c3087dc4e64615821237c6895e96e7bdb166e38 Mon Sep 17 00:00:00 2001 From: jock Date: Tue, 13 Jan 2026 18:59:32 +0800 Subject: [PATCH] update --- package.json | 1 + server/api/test.get.ts | 21 ++++++++++++++ shared/utils/html.ts | 42 +++++++++++++++++++++++++++ test/parse_cgi_data.ts | 66 ++++++++++++++++++++++++++++++++++++++++++ yarn.lock | 40 +++++++++++++++++++++++++ 5 files changed, 170 insertions(+) create mode 100644 server/api/test.get.ts create mode 100644 test/parse_cgi_data.ts diff --git a/package.json b/package.json index 9d2ff35..3c78ff3 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ "dompurify": "^3.2.5", "exceljs": "^4.4.0", "file-saver": "^2.0.5", + "happy-dom": "^20.1.0", "highlight.js": "^11.11.1", "jszip": "^3.10.1", "lucide-vue-next": "^0.441.0", diff --git a/server/api/test.get.ts b/server/api/test.get.ts new file mode 100644 index 0000000..ae0ddaa --- /dev/null +++ b/server/api/test.get.ts @@ -0,0 +1,21 @@ +import { parseCgiDataNewServer } from '#shared/utils/html'; + +interface DebugQuery { + key: string; + url: string; +} + +export default defineEventHandler(async event => { + const { key, url } = getQuery(event); + if (key && key === process.env.DEBUG_KEY) { + const html = await fetch(`https://10.workers-proxy-2.shop?url=${url}`, { + method: 'GET', + headers: { + referer: 'https://down.mptext.top', + }, + }).then(resp => resp.text()); + return parseCgiDataNewServer(html); + } else { + return 'not set debug key'; + } +}); diff --git a/shared/utils/html.ts b/shared/utils/html.ts index a17d9c5..74262c0 100644 --- a/shared/utils/html.ts +++ b/shared/utils/html.ts @@ -1,4 +1,5 @@ import * as cheerio from 'cheerio'; +import { Window } from 'happy-dom'; import { extractCommentId } from '~/utils/comment'; /** @@ -122,3 +123,44 @@ export function validateHTMLContent(html: string): ['Success' | 'Deleted' | 'Exc return ['Error', null]; } } + +// 识别文章的类型 +function detectArticleType(html: string) {} + +function parseCgiDataNewClient(html: string): Promise { + const iframe = document.createElement('iframe'); + iframe.style.display = 'none'; + iframe.srcdoc = html; + document.body.appendChild(iframe); + + return new Promise((resolve, reject) => { + iframe.onload = function () { + // @ts-ignore + const data = iframe.contentWindow.cgiDataNew; + + // 用完后清理 + document.body.removeChild(iframe); + resolve(data); + }; + iframe.onerror = function (e) { + reject(e); + }; + }); +} + +// 解析 html 中的 window.cgiDataNew 对象 +export function parseCgiDataNewServer(html: string): any { + const window = new Window(); + + // 关键:显式启用 JavaScript 执行 + // @ts-ignore – happyDOM 是内部属性,但社区广泛使用 + window.happyDOM.settings.enableJavaScriptEvaluation = true; + + window.document.write(html); + window.document.close(); + + const data = (window as any).cgiDataNew; + window.close(); + + return data; +} diff --git a/test/parse_cgi_data.ts b/test/parse_cgi_data.ts new file mode 100644 index 0000000..e99561a --- /dev/null +++ b/test/parse_cgi_data.ts @@ -0,0 +1,66 @@ +import { normalizeHtml, parseCgiDataNewServer } from '#shared/utils/html'; +import path from 'node:path'; +import fs from 'node:fs'; + +const samplesDirectory = path.join(__dirname, '../samples'); + +const samples = [ + { + group: '图片分享', + samples: [ + path.join(samplesDirectory, '图片分享/01.html'), + path.join(samplesDirectory, '图片分享/02.html'), + path.join(samplesDirectory, '图片分享/03.html'), + path.join(samplesDirectory, '图片分享/04.html'), + path.join(samplesDirectory, '图片分享/05.html'), + ], + }, + { + group: '文本分享', + samples: [ + path.join(samplesDirectory, '文本分享/01.html'), + path.join(samplesDirectory, '文本分享/02.html'), + path.join(samplesDirectory, '文本分享/03.html'), + path.join(samplesDirectory, '文本分享/04.html'), + path.join(samplesDirectory, '文本分享/c01.html'), + path.join(samplesDirectory, '文本分享/c02.html'), + path.join(samplesDirectory, '文本分享/c03.html'), + path.join(samplesDirectory, '文本分享/c04.html'), + path.join(samplesDirectory, '文本分享/c05.html'), + ], + }, + { + group: '文章分享', + samples: [ + path.join(samplesDirectory, '文章分享/01.html'), + path.join(samplesDirectory, '文章分享/02.html'), + path.join(samplesDirectory, '文章分享/03.html'), + path.join(samplesDirectory, '文章分享/04.html'), + ], + }, + { + group: '普通图文', + samples: [ + path.join(samplesDirectory, '普通图文/01.html'), + path.join(samplesDirectory, '普通图文/02.html'), + path.join(samplesDirectory, '普通图文/03.html'), + path.join(samplesDirectory, '普通图文/04.html'), + path.join(samplesDirectory, '普通图文/c01.html'), + ], + }, +]; + +async function run() { + for (const example of samples) { + console.group(example.group); + for (const samplePath of example.samples) { + const rawHTMLContent = fs.readFileSync(samplePath, 'utf8'); + const data = await parseCgiDataNewServer(rawHTMLContent); + console.log(data.item_show_type); + } + console.groupEnd(); + console.log(); + } +} + +run(); diff --git a/yarn.lock b/yarn.lock index a57899e..c012677 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2222,6 +2222,13 @@ resolved "https://registry.npmjs.org/@types/node/-/node-14.18.63.tgz#1788fa8da838dbb5f9ea994b834278205db6ca2b" integrity sha512-fAtCfv4jJg+ExtXhvCkCqUKZ+4ok/JQk01qDKhL5BDDoS3AxKXhV5/MAVUZyQnSEd2GT92fkgZl0pz0Q0AzcIQ== +"@types/node@^20.0.0": + version "20.19.28" + resolved "https://registry.npmjs.org/@types/node/-/node-20.19.28.tgz#d968c492e405e4a572f5c27e4f5efc24f0eb9937" + integrity sha512-VyKBr25BuFDzBFCK5sUM6ZXiWfqgCTwTAOK8qzGV/m9FCirXYDlmczJ+d5dXBAQALGCdRRdbteKYfJ84NGEusw== + dependencies: + undici-types "~6.21.0" + "@types/parse-path@^7.0.0": version "7.1.0" resolved "https://registry.npmjs.org/@types/parse-path/-/parse-path-7.1.0.tgz#1bdddfe4fb2038e76c7e622234a97d6a050a1be3" @@ -2282,6 +2289,18 @@ resolved "https://registry.npmjs.org/@types/web-bluetooth/-/web-bluetooth-0.0.21.tgz#525433c784aed9b457aaa0ee3d92aeb71f346b63" integrity sha512-oIQLCGWtcFZy2JW77j9k8nHzAOpqMHLQejDA48XXMWH6tjCQHz5RCFz1bzsmROyL6PUm+LLnUiI4BCn221inxA== +"@types/whatwg-mimetype@^3.0.2": + version "3.0.2" + resolved "https://registry.npmjs.org/@types/whatwg-mimetype/-/whatwg-mimetype-3.0.2.tgz#e5e06dcd3e92d4e622ef0129637707d66c28d6a4" + integrity sha512-c2AKvDT8ToxLIOUlN51gTiHXflsfIFisS4pO7pDPoKouJCESkhZnEy623gwP9laCy5lnLDAw1vAzu2vM2YLOrA== + +"@types/ws@^8.18.1": + version "8.18.1" + resolved "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz#48464e4bf2ddfd17db13d845467f6070ffea4aa9" + integrity sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg== + dependencies: + "@types/node" "*" + "@unhead/vue@^2.0.19": version "2.0.19" resolved "https://registry.npmjs.org/@unhead/vue/-/vue-2.0.19.tgz#d628b88526c1e92bb960b2997fdd676b279fe6d9" @@ -4244,6 +4263,17 @@ h3@^1.12.0, h3@^1.15.1, h3@^1.15.4: ufo "^1.6.1" uncrypto "^0.1.3" +happy-dom@^20.1.0: + version "20.1.0" + resolved "https://registry.npmjs.org/happy-dom/-/happy-dom-20.1.0.tgz#bb85b1fa696a0780bc3665a137bd8da04a4337eb" + integrity sha512-ebvqjBqzenBk2LjzNEAzoj7yhw7rW/R2/wVevMu6Mrq3MXtcI/RUz4+ozpcOcqVLEWPqLfg2v9EAU7fFXZUUJw== + dependencies: + "@types/node" "^20.0.0" + "@types/whatwg-mimetype" "^3.0.2" + "@types/ws" "^8.18.1" + whatwg-mimetype "^3.0.0" + ws "^8.18.3" + has-flag@^4.0.0: version "4.0.0" resolved "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" @@ -7022,6 +7052,11 @@ unctx@^2.4.1: magic-string "^0.30.17" unplugin "^2.1.0" +undici-types@~6.21.0: + version "6.21.0" + resolved "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz#691d00af3909be93a7faa13be61b3a5b50ef12cb" + integrity sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ== + undici-types@~7.16.0: version "7.16.0" resolved "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz#ffccdff36aea4884cbfce9a750a0580224f58a46" @@ -7407,6 +7442,11 @@ whatwg-encoding@^3.1.1: dependencies: iconv-lite "0.6.3" +whatwg-mimetype@^3.0.0: + version "3.0.0" + resolved "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-3.0.0.tgz#5fa1a7623867ff1af6ca3dc72ad6b8a4208beba7" + integrity sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q== + whatwg-mimetype@^4.0.0: version "4.0.0" resolved "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz#bc1bf94a985dc50388d54a9258ac405c3ca2fc0a"