SingleFile Pure - Pure html downloader

将当前网页保存为一个纯文本的.html网页文件,不保存二进制

目前為 2023-11-01 提交的版本,檢視 最新版本

  1. // ==UserScript==
  2. // @name SingleFile Pure - Pure html downloader
  3. // @name:zh SingleFile Pure - 保存纯HTML
  4. // @namespace https://gist.github.com/KnIfER
  5. // @version 3
  6. // @description 将当前网页保存为一个纯文本的.html网页文件,不保存二进制
  7. // @description:en Save webpages into one pure html file, without binary data.
  8. // @author PY-DNG
  9. // @license MIT
  10. // @grant GM_registerMenuCommand
  11. // @grant GM_unregisterMenuCommand
  12. // @grant unsafeWindow
  13. // @icon 
  14. // @match *://*/*
  15. // @include *
  16. // @noframes
  17. // ==/UserScript==
  18.  
  19. // based on @PY-DNG https://gf.qytechs.cn/zh-CN/scripts/419798-singlefile-单文件保存网页
  20.  
  21. (function() {
  22. 'use strict';
  23. function debug(...args) {
  24. console.log("%c SingleFile: ", "color:#333!important;background:#0FF;", ...args);
  25. }
  26.  
  27. var win = window.unsafeWindow || window, doc=document, d=doc
  28. , bank=win._sfpr_bank;
  29. var rM=debug, rMd=debug, err=console.error;
  30. if(!bank) {
  31. bank = win._sfpr_bank = {};
  32. rM = GM_registerMenuCommand;
  33. rMd = GM_unregisterMenuCommand;
  34. } else try{
  35. bank.unreg();
  36. } catch(e){debug(e)}
  37. bank.unreg = uninstall;var unregs = [];
  38. function uninstall() { // hot-reload
  39. for(var i=0;i<unregs.length;i++) {
  40. unregs[i]();
  41. }
  42. return 1;
  43. }
  44. function addEvent(a, b, c, d) {
  45. if(!d) d = win;
  46. ((a, b, c, d)=>{
  47. d.addEventListener(a, b, c);
  48. unregs.push(function(){ d.removeEventListener(a, b, c)} );
  49. })(a, b, c, d);
  50. }
  51. const MSG = {
  52. 'zh': {
  53. SavePage: '保存纯网页',
  54. Saving: '保存中……'
  55. },
  56. 'en': {
  57. SavePage: 'Save pure webpage',
  58. Saving: 'Saving, please wait……'
  59. },
  60. }
  61. var btn, evtSt="single-file-on-before-capture-request", evtEd=evtSt.replace('before', 'after');
  62. addEvent(evtSt, (e) => {
  63. btn = doc.getElementById('sf-pure');
  64. if(btn) btn.remove();
  65. });
  66. addEvent(evtEd, (e) => {
  67. if(btn) {
  68. doc.body.append(btn);
  69. btn = 0;
  70. }
  71. });
  72. var t = navigator.language;
  73. if(t.includes('-')) t = t.slice(0, t.indexOf('-'));
  74.  
  75. // GUI
  76. var GT=MSG[t]||MSG['en'], fnMenu, menu = rM(GT.SavePage, fnMenu = function() {
  77. Generate_Single_File({
  78. onfinish: (FinalHTML) => {
  79. var title = doc.title;
  80. saveTextToFile(FinalHTML, '{Title}.html'.replace('{Title}', title).replace('{Time}', getTime('-', '-')));
  81. dispatchEvent(new CustomEvent(evtEd));
  82. rMd(menu);
  83. menu = rM(GT.SavePage, fnMenu);
  84. }
  85. });
  86. });
  87. addEvent("single-file-pure-save", (e) => {
  88. fnMenu();
  89. stop(e);
  90. });
  91. if(!win.saveAsTaken)
  92. addEvent('keydown', (e) => {
  93. if(e.key=='s' && e.altKey && e.ctrlKey) {
  94. fnMenu();
  95. stop(e);
  96. }
  97. });
  98. function Generate_Single_File(details) {
  99. debug('Generate started...');
  100. if(!bank.init) {
  101. dispatchEvent(new CustomEvent("single-file-user-script-init"));
  102. bank.init = 1;
  103. }
  104. dispatchEvent(new CustomEvent(evtSt));
  105. // Init DOM
  106. var html, tmp, dom = doc;
  107.  
  108. // Functions
  109. var _J = (args) => {const a = []; for (let i = 0; i < args.length; i++) {a.push(args[i]);}; return a;};
  110. var $ = function() {return dom.querySelector.apply(dom, _J(arguments))};
  111. var $_ = function() {return dom.querySelectorAll.apply(dom, _J(arguments))};
  112. var $C = function() {return dom.createElement.apply(dom, _J(arguments))};
  113. var $A = (a,b) => (a.appendChild(b));
  114. var $R = (e) => (e.parentElement ? e.parentElement.removeChild(e) : null);
  115. function ishttp(s) {
  116. // !/^[^\/:]*:/.test(s)
  117. if(s) return s.startsWith('/') || s.startsWith('http')
  118. }
  119. dom = doc.cloneNode(1);
  120. const ElmProps = new (function() {
  121. const props = this.props = {};
  122. const cssMap = this.cssMap = new Map();
  123. this.getCssPath = function(elm) {
  124. return cssMap.get(elm) || (cssMap.set(elm, cssPath(elm)), cssMap.get(elm));
  125. }
  126. this.add = function(elm, type, value) {
  127. var path = cssPath(elm), store=props[path];
  128. if(!store) store = props[path] = [];
  129. store.push({type:type, value:value});
  130. }
  131. });
  132. // Generate info button!
  133. function about() {
  134. var t=$C('A');
  135. t.id = 'sf-pure';
  136. t.style = 'position:fixed;right:16px;top:16px;width:24px;height:24px;color:#2d2d2d;background-color:#737373;border:2px solid;border-color:#eee;border-radius:16px;z-index:2147483647;opacity:0.7;display:flex;justify-content:center;align-items:center;';
  137. t.innerHTML = '<svg style=\'width:65%;height:65%;margin-left:1px;\' xmlns="http://www.w3.org/2000/svg"viewBox="0 0 64 64"width="64"height="64"><style>.p{fill:#f0f0f0}</style><path class="p"d="M30 3A3 3 0 1130 21 3 3 0 1130 3ZM16 25 23 29 23 58 16 63 46 63 39 58 39 25Z"/></svg>';
  138. t.name = Date.now()+'';
  139. t.title = doc.title;
  140. t.href = location.ohref || location.href;
  141. t.target = 'blank';
  142. return t;
  143. }
  144.  
  145. const AM = new AsyncManager();
  146. AM.onfinish = function() {
  147. // Add applyProps script
  148. var script = $C('script');
  149. script.innerHTML = "window.addEventListener('load', function(){"+
  150. // show info button
  151. "setTimeout(function(){var btn=document.getElementById('sf-pure');btn.title=new Date(parseInt(btn.name))+'\\n\\n'+btn.title;location.ohref=btn.href;btn.oncontextmenu=function(e){btn.style.display='none';e.preventDefault()}}, 800);"
  152. +
  153. // {FUNC}
  154. "(function(c){var fs={Canvas_DataUrl:function(a,b){var e=new Image(),v=a.getContext('2d');e.onload=function(){v.drawImage(e,0,0)};e.src=b},Input_Value:function(a,b){a.value=b}};for(var i=0,arr=Object.entries(c),t,el;i<arr.length;i++){try{t=arr[i];if(el=document.querySelector(t[0]))for(var p of t[1])fs[p.type](el,p.value)}catch(e){console.error(e)}}}"
  155. +")("+
  156. // {PROPS}
  157. JSON.stringify(ElmProps.props)
  158. +")})";
  159. $A(dom.head, script);
  160. $A(dom.body, about());
  161. // Generate html
  162. var FinalHTML = dom.querySelector('html').outerHTML;
  163. debug('Generation Complete.', FinalHTML.length)
  164. details.onfinish(FinalHTML)
  165. };
  166.  
  167. // debug('Setting charset');
  168. if (doc.characterSet !== 'UTF-8') {
  169. const meta = $('meta[http-equiv="Content-Type"][content*="charset"]');
  170. meta && (meta.content = meta.content.replace(/charset\s*=\s*[^;\s]*/i, 'charset=UTF-8'));
  171. }
  172.  
  173. // debug('strip scripts');
  174. for (var tmp of $_('script')) {
  175. $R(tmp);
  176. }
  177.  
  178. // debug('strip inline scripts');
  179. for (var tmp of $_('*')) {
  180. var ISKeys = ['onabort', 'onerror', 'onresize', 'onscroll', 'onunload', 'oncancel', 'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'onclose', 'oncuechange', 'ondblclick', 'ondrag', 'ondragend', 'ondragenter', 'ondragexit', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'ondurationchange', 'onemptied', 'onended', 'onerror', 'onfocus', 'oninput', 'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onloadeddata', 'onloadedmetadata', 'onloadstart', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onpause', 'onplay', 'onplaying', 'onprogress', 'onratechange', 'onreset', 'onresize', 'onscroll', 'onseeked', 'onseeking', 'onselect', 'onshow', 'onstalled', 'onsubmit', 'onsuspend', 'ontimeupdate', 'ontoggle', 'onvolumechange', 'onwaiting', 'onbegin', 'onend', 'onrepeat'];
  181. for (var key of ISKeys) {
  182. tmp.removeAttribute(key);
  183. tmp[key] = undefined;
  184. }
  185. }
  186.  
  187. // debug('strip preload scripts');
  188. for (var tmp of $_('link[rel*=modulepreload]')) {
  189. $R(tmp);
  190. }
  191.  
  192. // debug('strip meta headers');
  193. for (var tmp of $_('meta[http-equiv="Content-Security-Policy"]')) {
  194. $R(tmp);
  195. }
  196.  
  197. //debug('Resolve style urls');
  198. for (var tmp of $_('link[rel*=stylesheet][href]')) {
  199. resolveStyleLinked(tmp)
  200. }
  201. for (var elm of $_('style')) {
  202. //debug('style elm=', elm.id, elm)
  203. resolveStyle(elm.innerText, (style, elm) => (elm.innerHTML = style), elm);
  204. }
  205.  
  206. //debug('Resolve links');
  207. for (const link of $_('link[href]')) {
  208. // Only for http[s] links
  209. if (!link.href) {continue;}
  210. if (!ishttp(link.href)) {continue;}
  211.  
  212. // Only for links that rel includes one of the following:
  213. // icon, apple-touch-icon, apple-touch-startup-image, prefetch, preload, prerender, manifest, stylesheet
  214. // And in the same time NOT includes any of the following:
  215. // alternate
  216. var deal = false;
  217. const accepts = ['icon', 'apple-touch-icon', 'apple-touch-startup-image', 'prefetch', 'preload', 'prerender', 'manifest', 'stylesheet'];
  218. const excludes = ['alternate']
  219. const rels = link.rel.split(' ');
  220. for (const rel of rels) {
  221. deal = deal || (accepts.includes(rel) && !excludes.includes(rel));
  222. }
  223. if (!deal) {continue;}
  224.  
  225. // Save original href to link.ohref
  226. link.ohref = link.href;
  227.  
  228. AM.add();
  229. requestDataURL(link.href, function(durl, link) {
  230. link.href = durl;
  231. // Deal style if links to a stylesheet
  232. if (rels.includes('stylesheet')) {
  233. resolveStyleLinked(link);
  234. }
  235. AM.finish();
  236. }, link);
  237. }
  238. var arr = dom.links;
  239. for(var i=0;tmp=arr[i++];) {
  240. tmp.href = fullUrl(tmp.href);
  241. }
  242.  
  243. //debug('Resolve image src');
  244. for (var img of $_('img[src], source[src]')) {
  245. // Get full src
  246. // if (img.src.length > 3999) {continue;}
  247. if (!img.src) {continue;}
  248. if (!ishttp(img.src)) {continue;}
  249. img.src = fullUrl(img.src);
  250. }
  251.  
  252. //debug('Resolve image srcset');
  253. for (var img of $_('img[srcset], source[srcset]')) {
  254. if (img.srcset) {
  255. var list = img.srcset.split(',');
  256. for (let i = 0; i < list.length; i++) { // Get all srcs list
  257. var srcitem = list[i].trim();
  258. if (srcitem.length > 3999) {continue;}
  259. if (!srcitem) {continue}
  260. var parts = srcitem.replaceAll(/(\s){2,}/g, '$1').split(' ');
  261. if (!ishttp(parts[0])) {continue};
  262. var src = fullUrl(parts[0]);
  263. list[i] = {
  264. src: src,
  265. rest: parts.slice(1, parts.length).join(' '),
  266. parts: parts,
  267. dataurl: null,
  268. string: null
  269. };
  270. }
  271. img.srcset = list.join(',');
  272. }
  273. }
  274.  
  275. //debug('Resolve canvas');
  276. for (var tmp of $_('canvas')) {
  277. try {
  278. var url = img2url(tmp);
  279. ElmProps.add(tmp, 'Canvas_DataUrl', url);
  280. } catch (e) {}
  281. }
  282. debug('Resolve styles', dom);
  283. for (var tmp of $_('style')) {
  284. try {
  285. if(!tmp.firstChild) {
  286. var oelm = doc.querySelector(ElmProps.getCssPath(tmp));
  287. debug('cssRulesX', tmp, oelm);
  288. if(oelm && oelm.sheet?.cssRules?.length) {
  289. var cssRules = oelm.sheet.cssRules;
  290. var text = '';
  291. for (var i = 0; i < cssRules.length; i++) {
  292. var rule = cssRules[i];
  293. text += rule.cssText;
  294. text += '\n';
  295. }
  296. //debug('cssRules', text);
  297. tmp.innerHTML = text;
  298. }
  299. }
  300. } catch (e) {}
  301. }
  302.  
  303. //debug('Resolve background-images');
  304. var urlReg = /^\s*url\(\s*['"]?([^\(\)'"]+)['"]?\s*\)\s*$/;
  305. for (var elm of $_('*')) {
  306. var url = elm.style.backgroundImage;
  307. if(url && url.length < 3999 // CONST.Number.MaxUrlLength
  308. && url.lastIndexOf('data:', 10)==-1) { // not /^data:/.test(url)
  309. url = url.match(urlReg);
  310. if (url) { // Get full image url
  311. url = fullUrl(url[1]);
  312. elm.style.backgroundImage = 'url('+url+')';
  313. }
  314. }
  315. }
  316.  
  317. //debug('Resolve input/textarea/progress values');
  318. for (var tmp of $_('input,textarea,progress')) {
  319. // Query origin element's value
  320. var oelm = doc.querySelector(ElmProps.getCssPath(tmp));
  321. // Add to property map
  322. oelm.value && ElmProps.add(tmp, 'Input_Value', oelm.value);
  323. }
  324.  
  325. // Get favicon.ico if no icon found
  326. debug('Resolve favicon.ico');
  327. if (!$('link[rel*=icon]')) {
  328. var icon = $C('link');
  329. icon.rel = 'icon';
  330. icon.href = getHost() + 'favicon.ico',
  331. $A(dom.head, icon);
  332. }
  333.  
  334. // Start generating the finish event
  335. debug('Waiting for async tasks to be finished');
  336. AM.finishEvent = true;
  337.  
  338. function resolveStyle(style, callback, args=[]) {
  339. const argvs = [style].concat(args);
  340. if(!style) {
  341. return callback.apply(null, argvs);
  342. }
  343. const re = /url\(\s*['"]?([^\(\)'"]+)['"]?\s*\)/;
  344. const rg = /url\(\s*['"]?([^\(\)'"]+)['"]?\s*\)/g;
  345. const replace = (durl, urlexp, arg1, arg2, arg3) => {
  346. // Replace style text
  347. const durlexp = 'url("'+durl+'")';
  348. style = style.replaceAll(urlexp, durlexp);
  349. // Get args
  350. argvs[0]=style;
  351. callback.apply(null, argvs);
  352. AM.finish();
  353. };
  354.  
  355. const all = style.match(rg);
  356. if (!all) {return;}
  357. for (const urlexp of all) {
  358. // Check url
  359. if (urlexp.length > 3999) {continue;}
  360. const osrc = urlexp.match(re)[1];
  361. const baseurl = args instanceof HTMLLinkElement && args.ohref ? args.ohref : location.href;
  362. if (!ishttp(osrc)) {continue;}
  363. const src = fullUrl(osrc, baseurl);
  364.  
  365. // Request
  366. AM.add();
  367. requestDataURL(src, replace, [urlexp].concat(args));
  368. }
  369. }
  370. function resolveStyleLinked(link) {
  371. const durl = link.href;
  372. if ((durl||'')[0]!=='d') {return;} // not /^data:/.test()
  373. const blob = dataURLToBlob(durl);
  374. const reader = new FileReader();
  375. reader.onload = () => {
  376. resolveStyle(reader.result, (style, link) => {
  377. const blob = new Blob([style],{type:"text/css"});
  378. AM.add();
  379. blobToDataURL(blob, function(durl, link) {
  380. //debug('style elm=', link.id, link)
  381. link.href = durl;
  382. AM.finish();
  383. }, link)
  384. }, link);
  385. AM.finish();
  386. }
  387. AM.add();
  388. reader.readAsText(blob);
  389. }
  390. }
  391.  
  392. var t0 = doc.createElement('a');
  393. function fullUrl(url, baseurl) {
  394. if(url) {
  395. if (url.startsWith('//')) {url = location.protocol + url;}
  396. if (!url.startsWith('http')) {
  397. var base = (baseurl||location.href).replace(/(.+\/).*?$/, '$1');
  398. t0.href = base + url;
  399. url = t0.href;
  400. }
  401. }
  402. return url;
  403. }
  404.  
  405. function cssPath(el) {
  406. if (!(el instanceof Element)) return;
  407. var path = [];
  408. while (el.nodeType === Node.ELEMENT_NODE) {
  409. var selector = el.nodeName.toLowerCase();
  410. if (el.id) {
  411. selector += '#' + el.id;
  412. path.unshift(selector);
  413. break;
  414. } else {
  415. var sib = el,
  416. nth = 1;
  417. while (sib = sib.previousElementSibling) {
  418. if (sib.nodeName.toLowerCase() == selector) nth++;
  419. }
  420. if (nth != 1) selector += ":nth-of-type(" + nth + ")";
  421. }
  422. path.unshift(selector);
  423. el = el.parentNode;
  424. }
  425. return path.join(" > ");
  426. }
  427.  
  428. function requestDataURL(url, callback, args=[]) {
  429. try{
  430. //debug('requestDataURL::', url, args);
  431. const argvs = [url].concat(args);
  432. callback.apply(null, argvs);
  433. }catch(e){err(e)}
  434. }
  435.  
  436. function blobToDataURL(blob, callback, args=[]) {
  437. const reader = new FileReader();
  438. reader.onload = function () {
  439. callback.apply(null, [reader.result].concat(args));
  440. }
  441. reader.readAsDataURL(blob);
  442. }
  443.  
  444. function dataURLToBlob(dataurl) {
  445. let arr = dataurl.split(','),
  446. mime = arr[0].match(/:(.*?);/)[1],
  447. bstr = atob(arr[1]),
  448. n = bstr.length,
  449. u8arr = new Uint8Array(n)
  450. while (n--) {
  451. u8arr[n] = bstr.charCodeAt(n)
  452. }
  453. return new Blob([u8arr], { type: mime })
  454. }
  455.  
  456. function AsyncManager() {
  457. const AM = this;
  458.  
  459. // Ongoing xhr count
  460. this.taskCount = 0;
  461.  
  462. // Whether generate finish events
  463. let finishEvent = false;
  464. Object.defineProperty(this, 'finishEvent', {
  465. configurable: true,
  466. enumerable: true,
  467. get: () => (finishEvent),
  468. set: (b) => {
  469. finishEvent = b;
  470. b && AM.taskCount === 0 && AM.onfinish && AM.onfinish();
  471. }
  472. });
  473. // Add one task
  474. this.add = () => (++AM.taskCount);
  475. // Finish one task
  476. this.finish = () => ((--AM.taskCount === 0 && AM.finishEvent && AM.onfinish && AM.onfinish(), AM.taskCount));
  477. }
  478.  
  479. function img2url(img) {
  480. var cvs = doc.createElement('canvas');
  481. var v = cvs.getContext('2d');
  482. cvs.width = img.width;
  483. cvs.height = img.height;
  484. v.drawImage(img, 0, 0)
  485. return cvs.toDataURL();
  486. }
  487.  
  488. // Format timecode like 1970-01-01 00:00:00
  489. // if data-sep provided false, there will be no data part.
  490. function getTime(dateSep='-', timeSep=':') {
  491. var d = new Date(), fulltime = ''
  492. fulltime += dateSep ? f0(d.getFullYear(), 4) + dateSep + f0((d.getMonth() + 1), 2) + dateSep + f0(d.getDate(), 2) : '';
  493. fulltime += dateSep && timeSep ? ' ' : '';
  494. fulltime += timeSep ? f0(d.getHours(), 2) + timeSep + f0(d.getMinutes(), 2) + timeSep + f0(d.getSeconds(), 2) : '';
  495. return fulltime;
  496. }
  497. function f0(number, ln) {
  498. var str = String(number);
  499. for (var i = str.length; i < ln; i++) {
  500. str = '0' + str;
  501. }
  502. return str;
  503. }
  504.  
  505. function stop(e) {
  506. try{
  507. e.stopPropagation();
  508. e.preventDefault();
  509. } catch(e) {debug(e)}
  510. }
  511.  
  512. function saveTextToFile(text, name) {
  513. const blob = new Blob([text],{type:"text/plain;charset=utf-8"});
  514. const url = URL.createObjectURL(blob);
  515. const a = doc.createElement('a');
  516. a.href = url;
  517. a.download = name;
  518. a.click();
  519. }
  520.  
  521. // get host part from a url(includes '^https://', '/$')
  522. function getHost(url=location.href) {
  523. const match = location.href.match(/https?:\/\/[^\/]+\//);
  524. return match ? match[0] : match;
  525. }
  526.  
  527. })();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址