帖子导出工具

导出帖子内容到数据库

  1. // ==UserScript==
  2. // @name:zh-CN 帖子导出工具
  3. // @name Posts_Dumper
  4. // @namespace https://blog.chrxw.com
  5. // @version 1.4
  6. // @description:zh-CN 导出帖子内容到数据库
  7. // @description 导出帖子内容到数据库
  8. // @author Chr_
  9. // @match https://keylol.com/*
  10. // @match https://dev.keylol.com/*
  11. // @connect 127.0.0.1
  12. // @connect store.steampowered.com
  13. // @license AGPL-3.0
  14. // @icon https://blog.chrxw.com/favicon.ico
  15. // @grant GM_setValue
  16. // @grant GM_getValue
  17. // @grant GM_deleteValue
  18. // @grant GM_xmlhttpRequest
  19. // @grant GM_addStyle
  20. // ==/UserScript==
  21.  
  22. setTimeout(async () => {
  23. 'use strict';
  24.  
  25. const port = 8000;
  26. const host = '127.0.0.1';
  27.  
  28. const matchTid = new RegExp(/(?:t|tid=)(\d+)/);
  29.  
  30. const treadList = document.querySelector("#threadlisttableid");
  31.  
  32. if (treadList !== null) {//获取帖子列表
  33.  
  34. function genBtn(name, foo) {
  35. const b = document.createElement('button');
  36. b.textContent = name;
  37. b.className = 'pd_btn';
  38. b.addEventListener('click', foo);
  39. return b;
  40. }
  41. function genDiv(cls) {
  42. const d = document.createElement('div');
  43. d.className = cls ?? 'pd_div';
  44. return d;
  45. }
  46. function genSpan(text) {
  47. const s = document.createElement('span');
  48. s.textContent = text;
  49. return s;
  50. }
  51. function genHr() {
  52. const b = document.createElement('hr');
  53. return b;
  54. }
  55. function genBr() {
  56. const b = document.createElement('br');
  57. return b;
  58. }
  59. function genIframe() {
  60. const i = document.createElement('iframe');
  61. return i;
  62. }
  63. function genText() {
  64. const t = document.createElement('input');
  65. t.placeholder = '帖子ID';
  66. t.className = 'pd_text';
  67. return t;
  68. }
  69.  
  70. const panel = genDiv('pd_panel');
  71.  
  72. const tempIframe = genIframe();
  73. const tempIframe2 = genIframe();
  74. const tempIframe3 = genIframe();
  75.  
  76. const tempIFrames = [tempIframe, tempIframe2, tempIframe3];
  77.  
  78. const status = await testBackend();
  79.  
  80. const statusTips = genSpan(status ? '连接成功' : '连接失败');
  81.  
  82. const btnGrubNew = genBtn('抓取尚未记录的', async () => {
  83. const postLists = treadList.querySelectorAll("th.common>a.pd_not_added.xst,th.new>a.pd_not_added.xst,th.lock>a.pd_not_added.xst");
  84. const total = postLists.length;
  85. if (total > 0) {
  86. statusTips.textContent = `开始抓取,共 ${total} 篇`;
  87. const workTread = tempIFrames.length;
  88. for (let i = 0; i < total; i += workTread) {
  89. const max = Math.min(i + workTread, total);
  90. const tasks = [];
  91. for (let j = i; j < max; j++) {
  92. const postTag = postLists[j];
  93. const tid = grubTid(postTag.href);
  94. const url = genUrl(tid) + '?utm=114514';
  95. tempIFrames[j - i].src = url;
  96. postTag.classList.remove('pd_not_added');
  97. postTag.classList.add('pd_done');
  98. tasks.push(waitUnitlDone(tid));
  99. }
  100.  
  101. await Promise.all(tasks);
  102.  
  103. statusTips.textContent = `抓取进度 ${max}/${total}`;
  104. }
  105. statusTips.textContent = '抓取结束';
  106. } else {
  107. statusTips.textContent = '没有可以抓取的帖子';
  108. }
  109. await freshPostList();
  110. });
  111.  
  112. const btnGrubAll = genBtn('抓取所有', async () => {
  113. const postLists = treadList.querySelectorAll("th.common>a.xst,th.new>a.xst,th.lock>a.xst");
  114. const total = postLists.length;
  115. if (total > 0) {
  116. statusTips.textContent = `开始抓取,共 ${total} 篇`;
  117. const workTread = tempIFrames.length;
  118. for (let i = 0; i < total; i += workTread) {
  119. const max = Math.min(i + workTread, total);
  120. const tasks = [];
  121. for (let j = i; j < max; j++) {
  122. const postTag = postLists[j];
  123. const tid = grubTid(postTag.href);
  124. const url = genUrl(tid) + '?utm=114514';
  125. tempIFrames[j - i].src = url;
  126. postTag.classList.remove('pd_not_added');
  127. postTag.classList.add('pd_done');
  128. tasks.push(waitUnitlDone(tid));
  129. }
  130.  
  131. await Promise.all(tasks);
  132.  
  133. statusTips.textContent = `抓取进度 ${max}/${total}`;
  134. }
  135. statusTips.textContent = '抓取结束';
  136. } else {
  137. statusTips.textContent = '没有可以抓取的帖子';
  138. }
  139. await freshPostList();
  140. });
  141.  
  142. const txtTid = genText();
  143. const btnGrubOne = genBtn('手动抓取', async () => {
  144.  
  145. const tid = parseInt(txtTid.value);
  146. if (!(tid > 0)) {
  147. alert('请输入整数 TID');
  148. return;
  149. }
  150. statusTips.textContent = `TID ${tid} 开始抓取`;
  151. const url = genUrl(tid) + '?utm=114514';
  152. tempIframe.src = url;
  153. const result = await waitUnitlDone(tid);
  154. postTag.classList.remove('pd_not_added');
  155. postTag.classList.remove('pd_added');
  156. postTag.classList.add('pd_done');
  157. statusTips.textContent = `TID ${tid} ${result}`;
  158.  
  159. await freshPostList();
  160. });
  161.  
  162. const btnExportExcel = genBtn('导出Excel', () => {
  163. window.open(`http://${host}:${port}/api/excel`);
  164. });
  165.  
  166. const btnExportBBCode = genBtn('导出BBCode', () => {
  167. window.open(`http://${host}:${port}/api/bbcode`);
  168. });
  169.  
  170. const btnResetDB = genBtn('重置数据库(删除所有数据)', async () => {
  171. if (confirm('真的要删除所有数据吗?')) {
  172. await deleteAllData();
  173. }
  174. });
  175.  
  176. const btnControl = genBtn('在管理面板浏览数据', () => {
  177. window.open(`http://${host}:${port}/index.html`);
  178. });
  179.  
  180. panel.appendChild(statusTips);
  181. panel.appendChild(genHr());
  182.  
  183. if (status) {
  184. panel.appendChild(btnGrubNew);
  185. panel.appendChild(btnGrubAll);
  186. panel.appendChild(genHr());
  187. panel.appendChild(txtTid);
  188. panel.appendChild(btnGrubOne);
  189. panel.appendChild(genHr());
  190. panel.appendChild(btnExportExcel);
  191. panel.appendChild(btnExportBBCode);
  192. panel.appendChild(genHr());
  193. panel.appendChild(btnResetDB);
  194. panel.appendChild(genHr());
  195. panel.appendChild(btnControl);
  196. panel.appendChild(genHr());
  197. panel.appendChild(tempIframe);
  198. panel.appendChild(genBr());
  199. panel.appendChild(tempIframe2);
  200. panel.appendChild(genBr());
  201. panel.appendChild(tempIframe3);
  202.  
  203. document.getElementById('autopbn').addEventListener('click', async () => {
  204. setTimeout(async () => {
  205. await freshPostList();
  206. }, 500);
  207. });
  208.  
  209. //判断是否已抓取
  210. await freshPostList();
  211. }
  212. else {
  213. panel.appendChild(genSpan('请检查软件是否运行以及端口是否被占用'));
  214.  
  215. setTimeout(() => {
  216. panel.style.display = 'none';
  217. }, 3000);
  218. }
  219.  
  220. document.body.appendChild(panel);
  221.  
  222. } else if (ifNeedGrub()) {//抓取帖子内容
  223. const tid = grubTid(location.href);
  224. const post_url = genUrl(tid);
  225. const post_title = document.getElementById('thread_subject')?.textContent ?? '获取失败';
  226. const eleAuthor = document.querySelector('div.pi>div.authi>a.xw1');
  227. const author_nick = eleAuthor?.textContent ?? '获取失败';
  228. const author_uid = eleAuthor?.href.replace('https://keylol.com/suid-', '') ?? '获取失败';
  229. const post_date = document.querySelector('div.pti>div.authi>em[id]')?.textContent.substring(4) ?? '获取失败';
  230. const eleContent = document.querySelector('td[id^=postmessage');
  231. const nodes = eleContent?.childNodes ?? [];
  232. const contentLines = [];
  233.  
  234. function node2text(node) {
  235. switch (node.nodeName) {
  236. case 'I':
  237. case 'A':
  238. case 'IFRAME':
  239. case 'STYLE':
  240. case 'SCRIPT':
  241. case 'IMG':
  242. return;
  243. case "DIV":
  244. if (node.classList.contains('aimg_tip')) {
  245. return;
  246. }
  247. }
  248.  
  249. if (node.nodeType === Node.TEXT_NODE) {
  250. const raw = node.textContent?.trim();
  251. if (raw && raw.length > 2 && raw.search('未经许可,严禁转载') === -1) {
  252. contentLines.push(raw);
  253. }
  254. }
  255. else {
  256. if (node.childNodes?.length > 0) {
  257. for (let child of node.childNodes) {
  258. node2text(child);
  259. }
  260. }
  261. }
  262. }
  263.  
  264. for (let node of nodes) {
  265. node2text(node);
  266. }
  267. const content = contentLines.join('\n');
  268.  
  269. const steamLinks = document.querySelectorAll("a[href^='https://store.steampowered.com/'],a[href^='https://steamdb.info/app/']");
  270. const grubAppid = new RegExp(/app\/(\d+)\/?/);
  271. const appIDsSet = new Set();
  272. for (const ele of steamLinks) {
  273. const href = ele.href;
  274. if (href) {
  275. const appID = parseInt(grubAppid.exec(href)?.[1] ?? 0);
  276. if (appID > 0) {
  277. appIDsSet.add(appID);
  278. }
  279. }
  280. }
  281.  
  282. const appIDs = [...appIDsSet];
  283. const bbcodes = [];
  284. const excels = [];
  285.  
  286. const tasks = [];
  287. for (let appid of appIDs) {
  288. tasks.push(getGameName(appid));
  289. }
  290.  
  291. const values = await Promise.all(tasks);
  292.  
  293. for (let [succ, name, appid] of values) {
  294. if (!succ) {
  295. name = `【${name ?? '读取出错'}】`;
  296. }
  297. bbcodes.push(`[url=https://store.steampowered.com/app/${appid}/]${name}[/url]`);
  298. excels.push(`${name} https://store.steampowered.com/app/${appid}/`);
  299. }
  300.  
  301. const game_list = appIDs.join(' | ');
  302. const game_bbcode = bbcodes.join('\n');
  303. const game_excel = excels.join('\r\n');
  304. const data = { tid, post_url, post_title, author_nick, author_uid, post_date, content, game_list, game_bbcode, game_excel };
  305. console.log(data);
  306. try {
  307. GM_setValue(tid, '抓取完成');
  308. await savePostData(data);
  309. }
  310. catch (error) {
  311. GM_setValue(tid, error);
  312. }
  313. }
  314.  
  315. //显示是否已经抓取
  316. async function freshPostList() {
  317. const tidSet = await getPostIds();
  318. const postLists = treadList.querySelectorAll("th.common>a.xst,th.new>a.xst,th.lock>a.xst");
  319. for (let postTag of postLists) {
  320. const tid = grubTid(postTag.href);
  321.  
  322. postTag.classList.remove('pd_not_added');
  323. postTag.classList.remove('pd_added');
  324. postTag.classList.remove('pd_done');
  325.  
  326. if (tidSet.has(tid)) {
  327. postTag.classList.add('pd_added');
  328. postTag.title = '【已抓取】';
  329. } else {
  330. postTag.classList.add('pd_not_added');
  331. postTag.title = '【未抓取】';
  332. }
  333. }
  334. }
  335.  
  336. //判断是否需要抓取
  337. function ifNeedGrub() {
  338. if (location.search.endsWith('utm=114514')) {
  339. return matchTid.test(location.href) >= 0;
  340. } else {
  341. return false;
  342. }
  343. }
  344.  
  345. //提取tid
  346. function grubTid(url) {
  347. return matchTid.exec(url)?.[1] ?? url.match(matchTid);
  348. }
  349.  
  350. //生成链接
  351. function genUrl(tid) {
  352. return `https://keylol.com/t${tid}-1-1`;
  353. }
  354.  
  355. //-----------------------------------
  356. //检测后台连通性
  357. function testBackend() {
  358. return new Promise((resolve, reject) => {
  359. $http.get(`http://${host}:${port}/api/test`)
  360. .then((response) => {
  361. resolve(response?.code === 666);
  362. })
  363. .catch((reason) => {
  364. resolve(false);
  365. });
  366. });
  367. }
  368. //检测是否抓取完成
  369. function waitUnitlDone(tid) {
  370. return new Promise((resolve, reject) => {
  371. let t1, t2;
  372.  
  373. t1 = setInterval(() => {
  374. const fin = GM_getValue(tid);
  375. if (fin) {
  376. clearInterval(t1);
  377. clearInterval(t2);
  378. GM_deleteValue(tid);
  379. resolve(fin);
  380. }
  381. }, 50);
  382.  
  383. t2 = setTimeout(() => {
  384. clearInterval(t1);
  385. GM_deleteValue(tid);
  386. resolve('操作超时');
  387. }, 10000);
  388. });
  389. }
  390. //获取已抓取的帖子tid列表
  391. function getPostIds() {
  392. return new Promise((resolve, reject) => {
  393. $http.get(`http://${host}:${port}/api/posts/ids`)
  394. .then((response) => {
  395. const tidSet = new Set();
  396. if (response?.code !== 0) {
  397. console.error(response?.msg ?? '消息为空');
  398. } else {
  399. const data = response?.data ?? [];
  400. for (let o of data) {
  401. tidSet.add(o);
  402. }
  403. }
  404. resolve(tidSet);
  405. })
  406. .catch((reason) => {
  407. reject(reason);
  408. });
  409. });
  410. }
  411. //上传抓取结果
  412. function savePostData(data) {
  413. return new Promise((resolve, reject) => {
  414. $http.post(`http://${host}:${port}/api/post`, JSON.stringify(data))
  415. .then((response) => {
  416. console.log(response);
  417. resolve(response?.code !== 0);
  418. })
  419. .catch((reason) => {
  420. console.log(reason);
  421. resolve(false);
  422. });
  423. });
  424. }
  425. //删除所有数据
  426. function deleteAllData() {
  427. return new Promise((resolve, reject) => {
  428. $http.delete(`http://${host}:${port}/api/posts`)
  429. .then((response) => {
  430. console.log(response);
  431. resolve(response?.code !== 0);
  432. })
  433. .catch((reason) => {
  434. console.log(reason);
  435. resolve(false);
  436. });
  437. });
  438. }
  439. //获取游戏名
  440. function getGameName(appid) {
  441. return new Promise((resolve, reject) => {
  442. $http.get(`https://store.steampowered.com/api/appdetails?appids=${appid}&l=schinese`)
  443. .then((response) => {
  444. const { success, data } = response[appid];
  445. resolve([success, data['name'], appid]);
  446. })
  447. .catch((reason) => {
  448. console.log(reason);
  449. resolve(false, reason, appid);
  450. });
  451. });
  452. }
  453. }, 500);
  454. //-----------------------------------
  455. class Request {
  456. 'use strict';
  457. constructor(timeout = 3000) {
  458. this.timeout = timeout;
  459. }
  460. get(url, opt = {}) {
  461. return this.#baseRequest(url, 'GET', opt, 'json');
  462. }
  463. getHtml(url, opt = {}) {
  464. return this.#baseRequest(url, 'GET', opt, '');
  465. }
  466. getText(url, opt = {}) {
  467. return this.#baseRequest(url, 'GET', opt, 'text');
  468. }
  469. post(url, data, opt = {}) {
  470. opt.data = data;
  471. opt.headers = {
  472. "Content-Type": "application/json"
  473. };
  474. return this.#baseRequest(url, 'POST', opt, 'json');
  475. }
  476. delete(url, opt = {}) {
  477. return this.#baseRequest(url, 'DELETE', opt, 'json');
  478. }
  479. #baseRequest(url, method = 'GET', opt = {}, responseType = 'json') {
  480. Object.assign(opt, {
  481. url, method, responseType, timeout: this.timeout
  482. });
  483. return new Promise((resolve, reject) => {
  484. opt.ontimeout = opt.onerror = reject;
  485. opt.onload = ({ readyState, status, response, responseXML, responseText }) => {
  486. if (readyState === 4 && status === 200) {
  487. if (responseType == 'json') {
  488. resolve(response);
  489. } else if (responseType == 'text') {
  490. resolve(responseText);
  491. } else {
  492. resolve(responseXML);
  493. }
  494. } else {
  495. console.error('网络错误');
  496. console.log(readyState);
  497. console.log(status);
  498. console.log(response);
  499. reject('解析出错');
  500. }
  501. };
  502. GM_xmlhttpRequest(opt);
  503. });
  504. }
  505. }
  506. const $http = new Request();
  507.  
  508. //CSS表
  509. GM_addStyle(`
  510. .pd_div {
  511. vertical-align: middle;
  512. }
  513. .pd_panel {
  514. background: rgba(58, 58, 58, 0.5);
  515. position: fixed;
  516. top: 50%;
  517. right: 0px;
  518. text-align: center;
  519. transform: translate(0px, -50%);
  520. z-index: 100;
  521. padding: 5px;
  522. border-radius: 5px 0 0 5px;
  523. }
  524. .pd_panel > *:not(:last-child) {
  525. margin-right: 5px;
  526. }
  527. .pd_panel > hr {
  528. margin: 5px 0 5px;
  529. }
  530. .pd_panel > span {
  531. color: #fff;
  532. }
  533. .pd_panel > iframe {
  534. width: 200px;
  535. height: 50px;
  536. }
  537. .pd_added::before {
  538. content: "✅";
  539. }
  540. .pd_not_added::before {
  541. content: "❌";
  542. }
  543. .pd_done::before {
  544. content: "🤔";
  545. }
  546. .pd_text {
  547. width: 90px;
  548. text-align: center;
  549. }
  550. `);

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址