SingleFile - 单文件保存网页

保存当前页面的全部可见内容到一个.html文件中,包含了所有文字、排版、图像

目前为 2021-05-12 提交的版本。查看 最新版本

// ==UserScript==
// @name         SingleFile - 单文件保存网页
// @namespace    SingleFile
// @version      1.0.2
// @description  保存当前页面的全部可见内容到一个.html文件中,包含了所有文字、排版、图像
// @author       PY-DNG
// @include      *
// @connect      *
// @icon         data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==
// @grant        GM_xmlhttpRequest
// @grant        GM_registerMenuCommand
// @grant        GM_unregisterMenuCommand
// @grant        GM_info
// ==/UserScript==

// /*-pass*/ 标明待开发内容

(function () {
	'use strict';

	// Developer Mode
	const developer = true;

	// Inner consts
	const NUMBER_MAX_XHR = 20;
	const TEXT_SAVEPAGE = '保存此网页';
	const TEXT_SAVING = '保存中...';
	const TEXT_ABOUT = '<!-- Web Page Saved By {SCNM} Ver.{VRSN}, Author {ATNM} -->\n<!-- Page URL: {LINK} -->'
		.replaceAll('{SCNM}', GM_info.script.name)
		.replaceAll('{VRSN}', GM_info.script.version)
		.replaceAll('{ATNM}', GM_info.script.author)
		.replaceAll('{LINK}', location.href);

	// variants
	let i, j;

	let LogLevel = {
		None: 0,
		Error: 1,
		Success: 2,
		Warning: 3,
		Info: 4,
		Elements: 5,
	};
	let g_logCount = 0;
	let g_logLevel = LogLevel.Info;

	function DoLog(level = LogLevel.Info, msgOrElement, isElement = false) {
		if (level <= g_logLevel) {
			let prefix = '%c';
			let param = '';

			if (level == LogLevel.Error) {
				prefix += '[Error]';
				param = 'color:#ff0000';
			} else if (level == LogLevel.Success) {
				prefix += '[Success]';
				param = 'color:#00aa00';
			} else if (level == LogLevel.Warning) {
				prefix += '[Warning]';
				param = 'color:#ffa500';
			} else if (level == LogLevel.Info) {
				prefix += '[Info]';
				param = 'color:#888888';
			} else if (level == LogLevel.Elements) {
				prefix += 'Elements';
				param = 'color:#000000';
			}

			if (level != LogLevel.Elements && !isElement) {
				console.log(prefix + msgOrElement, param);
			} else {
				console.log(msgOrElement);
			}

			if (++g_logCount > 512) {
				console.clear();
				g_logCount = 0;
			}
		}
	}

	// XHRHOOK
	GMXHRHook(NUMBER_MAX_XHR);

	// Task list
	const taskList = [getDom, removeScripts, dealStyles, dealElements, output];
	let taskNow = null;

	let Dom;
	let saving = false, cmdID;

	GUI();

	function GUI() {
		cmdID = GM_registerMenuCommand(TEXT_SAVEPAGE, saveOnclick);
	}

	function saveOnclick() {
		if (saving) {return false;};
		switchStatus();
		DoLog(LogLevel.Success, 'SingleFile started.');
		nextTask();
	}

	function switchStatus() {
		saving = !saving;
		if (cmdID) {GM_unregisterMenuCommand(cmdID);};
		cmdID = GM_registerMenuCommand(saving ? TEXT_SAVING : TEXT_SAVEPAGE, saveOnclick);
	}

	function getDom() {
		DoLog(LogLevel.Info, 'Getting document...');
		const HTML_ORGINAL = document.querySelector('html').outerHTML;
		Dom = new DOMParser().parseFromString(HTML_ORGINAL, 'text/html');
		DoLog(LogLevel.Info, Dom, true);
		nextTask();
	}

	function removeScripts() {
		DoLog(LogLevel.Info, 'Removing scripts...');
		const scripts = Dom.querySelectorAll('script');
		for (i = 0; i < scripts.length; i++) {
			scripts[i].parentElement.removeChild(scripts[i]);
		}
		DoLog(Dom, true)
		DoLog(scripts, true);

		nextTask();
	}

	function dealStyles() {
		DoLog(LogLevel.Info, 'Dealing styles...');
		const CSSLinks = Dom.querySelectorAll('link[rel="stylesheet"]');
		let style = '', rest = CSSLinks.length;

		for (const cLink of CSSLinks) {
			if (!cLink.href) {continue;};
			DoLog(LogLevel.Info, 'Requesting style from ' + cLink.href);
			requestText(cLink.href, addToStyleText);
		}

		function addToStyleText(styleText) {
			style += styleText;
			rest--;
			DoLog(LogLevel.Info, 'Style got. Rest: ' + String(rest));
			if (rest === 0) {
				finish();
			}
		}

		function finish() {
			// Insert style element
			const styleEle = Dom.createElement('style');
			styleEle.innerHTML = style;
			const firstInnerStyle = Dom.querySelector('style');
			firstInnerStyle ?
				firstInnerStyle.parentElement.insertBefore(styleEle, firstInnerStyle) :
				Dom.head.appendChild(styleEle);

			// Remove link elements
			for (const link of CSSLinks) {
				link.parentElement.removeChild(link);
			}

			nextTask();
		}
	}

	function dealElements() {
		DoLog(LogLevel.Info, 'dealing elements...');
		const allEles = Dom.querySelectorAll('*');
		let restElesCount = allEles.length;
		for (const element of allEles) {
			dealElement(element);
		}

		function dealElement(element) {
			DoLog(LogLevel.Info, element, true);

			dealImg(element);
		}

		function dealImg(element) {
			const nextDealingTask = function() {dealBackgroundImg(element);};
			if (element.tagName === 'IMG' && element.src !== '') {
				if (element.src.substr(0,5) !== 'data:') {
					requestImageURL(element.src, function(dataURL) {
						element.src = dataURL;
						// 如何处理canvas? /*-pass*/
						// next dealing task
						nextDealingTask();
					})
				} else {nextDealingTask();}
			} else {nextDealingTask();}
		}

		function dealBackgroundImg(element) {
			// background-image to dataURL
			const cStyle = getComputedStyle(element);
			const backgroundImage = cStyle['background-image'];
			const httpUrlMatch = backgroundImage.match(/url\("(http.+)"\)/);
			if (httpUrlMatch) {
				const url = httpUrlMatch[1].replaceAll('\\\\', '\\');
				requestImageURL(url, function(dataURL) {
					const propValue = backgroundImage.replace(httpUrlMatch[1], dataURL);
					element.style['background-image'] = propValue;
					elementDealed();
				});
			} else {
				elementDealed();
			}
		}

		function elementDealed() {
			restElesCount--;
			DoLog(LogLevel.Info, 'element dealed, rest: ' + String(restElesCount) + ' elements')
			if (restElesCount === 0) {
				nextTask();
			}
		}
	}

	function output() {
		DoLog(LogLevel.Success, 'SingleFile finished.');
		DoLog(LogLevel.Success, Dom, true);

		const outputText = TEXT_ABOUT + '\n\n' + Dom.lastChild.outerHTML;
		saveTextToFile(outputText, 'SingleFile - ' + document.title + '.html');
		switchStatus();
	}

	function nextTask() {
		const funcIndex = taskNow ? taskList.indexOf(taskNow) : -1;
		if (funcIndex === taskList.length - 1) {
			taskNow = taskList[0];
			return true;
		}
		taskNow = taskList[funcIndex+1];
		taskNow();
	}

	function requestText(url, callback, args=[]) {
		GM_xmlhttpRequest({
            method:       'GET',
            url:          url,
            responseType: 'text',
            onload:       function(response) {
                const text = response.responseText;
				const argvs = [text].concat(args);
                callback.apply(null, argvs);
            }
        })
	}

	function requestImageURL(url, callback, args=[]) {
		GM_xmlhttpRequest({
            method:       'GET',
            url:          url,
            responseType: 'blob',
            onload:       function(response) {
                const blob = response.response;
				blobToDataURI(blob, function(url) {
					const argvs = [url].concat(args);
					callback.apply(null, argvs);
				})
            }
        })

		function blobToDataURI(blob, callback) {
			var reader = new FileReader();
			reader.onload = function (e) {
				callback(e.target.result);
			}
			reader.readAsDataURL(blob);
		}
	}

	// GM_XHR HOOK: The number of running GM_XHRs in a time must under maxXHR
	// Returns the abort function to stop the request anyway(no matter it's still waiting, or requesting)
	// (If the request is invalid, such as url === '', will return false and will NOT make this request)
	// If the abort function called on a request that is not running(still waiting or finished), there will be NO onabort event
	// Requires: function delItem(){...} & function uniqueIDMaker(){...}
	function GMXHRHook(maxXHR=5) {
		const GM_XHR = GM_xmlhttpRequest;
		const getID = uniqueIDMaker();
		let todoList = [], ongoingList = [];
		GM_xmlhttpRequest = safeGMxhr;

		function safeGMxhr() {
			// Get an id for this request, arrange a request object for it.
			const id = getID();
			const request = {id: id, args: arguments, aborter: null};

			// Deal onload function first
			dealEndingEvents(request);

			// Stop invalid requests
			if (!validCheck(request)) {
				return false;
			}

			// Judge if we could start the request now or later?
			todoList.push(request);
			checkXHR();
			return makeAbortFunc(id);

			// Decrease activeXHRCount while GM_XHR onload;
			function dealEndingEvents(request) {
				const e = request.args[0];

				// onload event
				const oriOnload = e.onload;
				e.onload = function() {
					reqFinish(request.id);
					checkXHR();
					oriOnload ? oriOnload.apply(null, arguments) : function() {};
				}

				// onerror event
				const oriOnerror = e.onerror;
				e.onerror = function() {
					reqFinish(request.id);
					checkXHR();
					oriOnerror ? oriOnerror.apply(null, arguments) : function() {};
				}

				// ontimeout event
				const oriOntimeout = e.ontimeout;
				e.ontimeout = function() {
					reqFinish(request.id);
					checkXHR();
					oriOntimeout ? oriOntimeout.apply(null, arguments) : function() {};
				}

				// onabort event
				const oriOnabort = e.onabort;
				e.onabort = function() {
					reqFinish(request.id);
					checkXHR();
					oriOnabort ? oriOnabort.apply(null, arguments) : function() {};
				}
			}

			// Check if the request is invalid
			function validCheck(request) {
				const e = request.args[0];

				if (!e.url) {
					return false;
				}

				return true;
			}

			// Call a XHR from todoList and push the request object to ongoingList if called
			function checkXHR() {
				if (ongoingList.length >= maxXHR) {return false;};
				if (todoList.length === 0) {return false;};
				const req = todoList.shift();
				const reqArgs = req.args;
				const aborter = GM_XHR.apply(null, reqArgs);
				req.aborter = aborter;
				ongoingList.push(req);
				return req;
			}

			// Make a function that aborts a certain request
			function makeAbortFunc(id) {
				return function() {
					let i;

					// Check if the request haven't been called
					for (i = 0; i < todoList.length; i++) {
						const req = todoList[i];
						if (req.id === id) {
							// found this request: haven't been called
							delItem(todoList, i);
							return true;
						}
					}

					// Check if the request is running now
					for (i = 0; i < ongoingList.length; i++) {
						const req = todoList[i];
						if (req.id === id) {
							// found this request: running now
							req.aborter();
							reqFinish(id);
							checkXHR();
						}
					}

					// Oh no, this request is already finished...
					return false;
				}
			}

			// Remove a certain request from ongoingList
			function reqFinish(id) {
				let i;
				for (i = 0; i < ongoingList.length; i++) {
					const req = ongoingList[i];
					if (req.id === id) {
						ongoingList = delItem(ongoingList, i);
						return true;
					}
				}
				return false;
			}
		}
	}

	// Del a item from an array using its index. Returns the array but can NOT modify the original array directly!!
	function delItem(arr, delIndex) {
		arr = arr.slice(0, delIndex).concat(arr.slice(delIndex+1));
		return arr;
	}

	// Makes a function that returns a unique ID number each time
	function uniqueIDMaker() {
		let id = 0;
		return makeID;
		function makeID() {
			id++;
			return id;
		}
	}

	function saveTextToFile(text, name) {
		const blob = new Blob([text],{type:"text/plain;charset=utf-8"});
		const url = URL.createObjectURL(blob);
		const a = document.createElement('a');
		a.href = url;
		a.download = name;
		a.click();
	}
})();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址