// ==UserScript==
// @name MiddleMan Sandbox
// @namespace Itsnotlupus Industries
// @description A test script for the middleman library. See https://gf.qytechs.cn/en/scripts/472943-itsnotlupus-middleman for details.
// @author Itsnotlupus
// @version 1.4.1
// @license MIT
// @run-at document-start
// @match *://*/*
// @require https://gf.qytechs.cn/scripts/468394-itsnotlupus-tiny-utilities/code/utils.js
// @require https://gf.qytechs.cn/scripts/472943-itsnotlupus-middleman/code/middleman.js
// @grant unsafeWindow
// ==/UserScript==
// standard Web APIs eshint doesn't know about
/* global DecompressionStream */
// things defined from @require scripts
/* global crel, logGroup, middleMan */
// If you see some sites complaining loudly about TrustedHTML issues caused by the hook below,
// you may choose to uncomment the next line to make them go away. Be wary, this has.. implications.
// unsafeWindow.trustedTypes.createPolicy('default', {createHTML: (string, sink) => string})
// adapted from https://www.bram.us/2022/02/13/log-images-to-the-devtools-console-with-console-image/
// tweaked because Chrome now only accepts data: URIs as background urls in console.
async function blobToImageLog(blob, scale = 1) {
const src = URL.createObjectURL(blob);
try {
let {target: img, target: { width, height } } = await new Promise((onload, onerror) =>crel('img', { src, onload, onerror }));
const canvas = crel("canvas", { width, height });
canvas.getContext('2d').drawImage(img, 0, 0);
width *= scale; height *= scale;
return ["%c .", `font-size:1px;padding:${~~(height/2)}px ${~~(width/2)}px;background:url("${canvas.toDataURL()}");background-size:${width}px ${height}px;color: transparent;`];
} catch {
return ["Invalid image", blob];
} finally {
URL.revokeObjectURL(src);
}
}
function hexdump(buffer, blockSize = 16) {
const lines = [];
const array = new Uint8Array(buffer);
for (let i = 0; i < array.length; i += blockSize) {
const addr = i.toString(16).padStart(4, '0');
let hex = '';
let chars = '';
for (let j = 0; j < blockSize ; j++) {
const v = array[i+j];
if (j%16==0) { hex += ' '; chars += ' '; }
hex += ' ' + (v!=null ? v.toString(16).padStart(2, '0') : ' ');
chars += v!=null ? v<32?'.':String.fromCharCode(v) : ' ';
}
lines.push(addr + ' ' + hex + ' ' + chars);
}
return lines.join('\n');
}
const urlParamsToObject = params => [...new URLSearchParams(params).entries()].reduce((obj, [key, val])=>((obj[key] ? !Array.isArray(obj[key])?obj[key] = [obj[key],val]:obj[key].push(val):obj[key]=val),obj),{});
const domainFromHostname = str => str.split('.').reduceRight((domain, chunk)=> domain.length<7&&chunk!='www' ? domain=chunk+'.'+domain : domain, '').slice(0,-1);
/**
* Inspect the object passed and try to derive the most
* immediately usable data representation from its body.
*
* @param {Request|Resource} r
* @returns {{type: 'text'|'json'|'doc'|'image'|'binary', operations: string[], payload: any}}
*/
async function autoParseBody(r) {
const unzip = (r,encoding="gzip") => new Response(r.body.pipeThrough(new DecompressionStream(encoding)));
const toJSON = str => { try { return JSON.parse(str); } catch {} };
const toText = buffer => new TextDecoder(charset).decode(buffer);
const toArray = obj => Array.isArray(obj) ? obj : Object.keys(obj).reduce((a,k)=>((a[k]=obj[k]),a),[]);
const isBinary = async blob => { try {new TextDecoder(charset, {fatal:true}).decode(await blob.arrayBuffer());return false} catch (e) { return true}};
const isArrayShaped = obj => Array.isArray(obj) || Object.keys(obj).every(key => key==parseInt(key));
const isArrayOfBytes = arr => arr.every(value => (value & 255) == value);
const isURLEncoded = str => /^([a-z0-9_.~-]|%[0-9a-f]{2})+=([a-z0-9_.~-]|%[0-9a-f]{2})*(&([a-z0-9_.~-]|%[0-9a-f]{2})+=([a-z0-9_.~-]|%[0-9a-f]{2})*)*$/i.test(str);
const isPerhapsURLEncoded = str => /[&%]/.test(str) || /^[a-z0-9_-]+=[a-z0-9_-]+$/g.test(str);
const mayBeHTML = str => /<\/\s*html\s*>/i.test(str);
const mayBeXML = str => /<[a-z]+.*?(>.*?<\/[a-z]+>|\/>)/i.test(str);
const contentType = r.headers.get('content-type')?.split(';')[0] ?? '';
const charset = r.headers.get('content-type')?.match(/charset=(?<charset>[^()<>@,;:\"/[\]?.=\s]*)/i)?.groups?.charset ?? "utf-8";
const encoding = r.headers.get('content-encoding');
let ops = [];
// 1. unzip any compressed content.
if (r instanceof Request && ['gzip', 'deflate'].includes(encoding)) {
// A web app went out of its way to compress a Request payload. cool.
r = unzip(r, encoding);
ops.push(encoding);
}
let body, type;
// devour the body, leaving only a blob behind. j/k. we cloned it so you can still grab a working response in the console.
const blob = await r.clone().blob();
// 2. get rid of binary formats: images.
if (contentType.startsWith("image/")) {
return {
type: 'image',
operations: ops.concat('raw'),
payload: blob
};
}
// 3. get rid of other binary formats.
if (await isBinary(blob)) {
return {
type: 'binary',
operations: ops.concat('raw'),
payload: blob
};
}
// 4. from here on, everything is text-based. more or less.
async function decodeText(text, operations, hint = '') {
// explicit url-encoded content, with a guardrail for mis-typed payloads
if (hint == "application/x-www-form-urlencoded" && isPerhapsURLEncoded(text)) {
const obj = urlParamsToObject(text);
operations.push('urlparams');
return await decodeJSON(obj, operations);
}
// explicit json content
if (hint.includes('json')) {
// dumb loop to skip over security-minded folks that add junk characters at the beginning of their json payloads.
for (let i=0;i<10;i++) {
const obj = toJSON(text.slice(i));
if (obj !== undefined) {
operations.push('json');
return await decodeJSON(obj, operations);
}
}
}
// explicit html or xml content
if (hint.includes('html') || hint.includes('xml')) {
try {
const doc = new DOMParser().parseFromString(text, hint);
operations.push(hint.includes('html')?'html':'xml');
return {
type: 'doc',
operations,
payload: doc
}
} catch {}
}
// implicit json content
if (text[0]=='[' || text[0]=='{') { // "1" is not an interesting JSON content.
// dumb loop to skip over security-minded folks that add junk characters at the beginning of their json payloads.
for (let i=0;i<10;i++) {
const obj = toJSON(text.slice(i));
if (obj !== undefined) {
operations.push('json');
return await decodeJSON(obj, operations);
}
}
}
// implicit HTML content
if (mayBeHTML(text)) {
try {
let node = new DOMParser().parseFromString(text, 'text/html');
if (node.childElementCount ==1) node = node.firstChild;
operations.push('html');
return {
type: 'doc',
operations,
payload: node
}
} catch {}
}
if (mayBeXML(text)) {
try {
let node = new DOMParser().parseFromString(text, 'text/xml');
if (node.childElementCount ==1) node = node.firstChild;
operations.push('xml');
return {
type: 'doc',
operations,
payload: node
}
} catch {}
}
// implicit url-encoded content
if (isURLEncoded(text) && isPerhapsURLEncoded(text)) {
const obj = urlParamsToObject(text);
operations.push('urlparams');
return await decodeJSON(obj, operations);
}
// implicit base64 of non-empty US ASCII strings
if (text.length) {
try {
const decoded = atob(unescape(text.replace(/_/g,'/').replace(/-/g,'+'))); // handles URI-escaped strings, as well as "web-safe" base64.
if (/^[0x0d0x0a0x20-0x7f]*$/.test(decoded)) { // but only keep ascii results.
operations.push('base64');
return {
type: 'base64',
operations,
payload: decoded
}
}
} catch {}
}
// sometimes a chunk of text is just a chunk of text.
return {
type: 'text',
operations,
payload: text
};
}
async function decodeJSON(obj, operations) {
if (obj) {
// 1. is our object an array?
if (isArrayShaped(obj)) {
const array = toArray(obj);
// 1.1 is our array an array of bytes
if (array.length> 10 && isArrayOfBytes(array)) {
let buffer = Uint8Array.from(array).buffer;
//operations.push('binary');
// how high are the odds of ever seeing this in the wild? The answer may surprise you (youtube/log_event)
if (buffer.byteLength > 10 && new DataView(buffer).getInt16() == 0x1f8b) { // gzip magic number
buffer = await unzip(new Response(buffer)).arrayBuffer();
operations.push('gzip');
const text = toText(buffer);
operations.push('text');
return await decodeText(text, operations);
}
}
}
// 2. dig into the object fields. XXX this might be a terrible idea.
if (typeof obj == 'object') {
const sub_ops = Object.assign([], { toString() { return `[ ${this.join()} ]`; }});
// XXX this messes with `operations` a lot. tweak how operations track things.
await Promise.all(Object.keys(obj).map(async key => obj[key] = typeof obj[key] == 'string' ? (await decodeText(obj[key], sub_ops)).payload : obj[key] )); //(await decodeJSON(obj[key], sub_ops)).payload));
if (sub_ops.length) operations.push(sub_ops);
}
}
return {
type: 'json',
operations,
payload: obj
};
}
if (blob.size == 0) {
return { type: 'empty', operations: ['empty'], payload: '' }
}
const text = toText(await blob.arrayBuffer()); // this is charset aware, unlike r.text().
return await decodeText(text, ['text'], contentType);
}
// logging hook. tries to show what's going on, decoding bodies in potentially convoluted ways.
const logHook = async (req, res, err) => {
// used to prefix an object in the console.
function QueryString(obj) { Object.assign(this, obj); }
function Body(obj) { return typeof obj == 'string' || obj instanceof Blob ? obj : Object.assign(this, obj); }
async function logHalf(r) {
const t = Date.now();
const headers = [...r.headers.entries()].map(a=>a.join(": ")).join('\n');
const { type, operations, payload } = await autoParseBody(r);
let body, size;
switch (type) {
case 'image':
size = payload.size;
body = await blobToImageLog(payload);
break;
case 'empty':
case 'text':
size = payload.length;
body = payload;
break;
case 'json':
size = JSON.stringify(payload).length;
body = payload;
break;
case 'doc':
size = new XMLSerializer().serializeToString(payload).length;
body = payload;
break;
case 'binary':
// body = hexdump(await payload.arrayBuffer(), 32); // expensive, and not really useful
size = payload.size;
body = payload;
break;
}
const method = r.method ?? 'GET';
return { size, type, method, ops: operations.join(' => '), headers, body, cost: Date.now()-t };
};
const url = new URL(req.url);
const short = domainFromHostname(url.hostname) + url.pathname;
const reqObj = await logHalf(req);
const query = await logHalf(new Response(url.searchParams, { headers: { 'content-type': 'application/x-www-form-urlencoded' }}));
const type = reqObj.type == 'empty' ? query.type : reqObj.type;
const size = reqObj.type == 'empty' ? query.size : reqObj.size;
const ops = reqObj.type == 'empty' ? query.ops : reqObj.ops;
const opsCount = ops.split(' => ').length;
logGroup("Request " + reqObj.method + ' ' + short + ' '+size+'B ['+type+'] ('+opsCount+')', (reqObj.cost+query.cost)+"ms - "+ops, reqObj.headers, req, new QueryString(query.body), typeof reqObj.body !== "json" ? reqObj.body : new Body(reqObj.body));
if (res) {
const resObj = await logHalf(res);
const resOpsCount = resObj.ops.split(' => ').length;
logGroup("Response " + resObj.method + ' ' + short + ' '+resObj.size+'B ['+resObj.type+'] ('+resOpsCount+')', resObj.cost+"ms - "+resObj.ops, resObj.headers, res, typeof resObj.body !== "json" ? resObj.body : new Body(resObj.body));
} else {
logGroup("Response " + reqObj.method + ' ' + short + " error: "+err.message, err);
}
};
// The actual middleman call: Snoop into everything, log all requests and responses.
middleMan.addHook("*", {
responseHandler: logHook
});