URL Encoder

URL encode and decode for non-utf8 encodings

此脚本不应直接安装,它是一个供其他脚本使用的外部库。如果您需要使用该库,请在脚本元属性加入:// @require https://update.gf.qytechs.cn/scripts/471280/1247074/URL%20Encoder.js

  1. /* eslint-disable no-multi-spaces */
  2.  
  3. // ==UserScript==
  4. // @name URL Encoder
  5. // @namespace URL-Encoder
  6. // @version 0.2.2
  7. // @description URL encode and decode for non-utf8 encodings
  8. // @author EtherDream, PY-DNG
  9. // @license MIT
  10. // ==/UserScript==
  11.  
  12. let $URL = (function () {
  13. const str2big5 = (function () {
  14. 'use strict'
  15.  
  16. let table;
  17. return str2big5;
  18.  
  19. function initBig5Table() {
  20. // https://en.wikipedia.org/wiki/Big5
  21. const ranges = [
  22. [0xA1, 0xF9, 0x40, 0x7E],
  23. [0xA1, 0xF9, 0xA1, 0xFE],
  24. ]
  25. const codePoints = new Uint16Array(13973); // 13973 === (0xF9-0xA1+1)*(0x7E-0x40+1 + 0xFE-0xA1+1)
  26. let i = 0;
  27.  
  28. for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
  29. for (let b2 = b2Begin; b2 <= b2End; b2++) {
  30. for (let b1 = b1Begin; b1 <= b1End; b1++) {
  31. codePoints[i++] = b2 << 8 | b1;
  32. }
  33. }
  34. }
  35. table = {};
  36.  
  37. const str = [...new TextDecoder('big5').decode(codePoints)];
  38. for (let i = 0; i < str.length; i++) {
  39. table[str[i].charCodeAt(0)] = codePoints[i];
  40. }
  41. }
  42.  
  43. function str2big5(str) {
  44. if (!table) {
  45. initBig5Table();
  46. }
  47.  
  48. const buf = [];
  49.  
  50. for (let i = 0; i < str.length; i++) {
  51. const codePoint = str.codePointAt(i);
  52. const code = String.fromCodePoint(codePoint);
  53. i += code.length-1;
  54.  
  55. if (codePoint < 0x80) {
  56. buf.push(codePoint);
  57. continue;
  58. }
  59. const big5 = table[codePoint];
  60.  
  61. if (table.hasOwnProperty(codePoint)) {
  62. const uarr = new Uint8Array(2);
  63. uarr[0] = big5;
  64. uarr[1] = big5 >> 8;
  65. buf.push(uarr[0], uarr[1]);
  66. } else {
  67. const encoded = str2big5(`&#${codePoint};`);
  68. for (const charcode of encoded) {
  69. buf.push(charcode);
  70. }
  71. }
  72. }
  73. return buf;
  74. }
  75. }) ();
  76.  
  77. const str2gbk = (function () {
  78. 'use strict'
  79.  
  80. let table;
  81. return str2gbk;
  82.  
  83. function initGbkTable() {
  84. // https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
  85. const ranges = [
  86. [0xA1, 0xA9, 0xA1, 0xFE],
  87. [0xB0, 0xF7, 0xA1, 0xFE],
  88. [0x81, 0xA0, 0x40, 0xFE],
  89. [0xAA, 0xFE, 0x40, 0xA0],
  90. [0xA8, 0xA9, 0x40, 0xA0],
  91. [0xAA, 0xAF, 0xA1, 0xFE],
  92. [0xF8, 0xFE, 0xA1, 0xFE],
  93. [0xA1, 0xA7, 0x40, 0xA0],
  94. ]
  95. const codePoints = new Uint16Array(23940);
  96. let i = 0;
  97.  
  98. for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
  99. for (let b2 = b2Begin; b2 <= b2End; b2++) {
  100. if (b2 !== 0x7F) {
  101. for (let b1 = b1Begin; b1 <= b1End; b1++) {
  102. codePoints[i++] = b2 << 8 | b1;
  103. }
  104. }
  105. }
  106. }
  107. table = {}
  108.  
  109. const str = [...new TextDecoder('gbk').decode(codePoints)];
  110. for (let i = 0; i < str.length; i++) {
  111. table[str[i].charCodeAt(0)] = codePoints[i];
  112. }
  113. }
  114.  
  115. function str2gbk(str, opt = {}) {
  116. if (!table) {
  117. initGbkTable();
  118. }
  119.  
  120. const buf = [];
  121.  
  122. for (let i = 0; i < str.length; i++) {
  123. const codePoint = str.codePointAt(i);
  124. const code = String.fromCodePoint(codePoint);
  125. i += code.length-1;
  126.  
  127. if (codePoint < 0x80) {
  128. buf.push(codePoint);
  129. continue;
  130. }
  131. const gbk = table[codePoint];
  132.  
  133. if (table.hasOwnProperty(codePoint)) {
  134. const uarr = new Uint8Array(2);
  135. uarr[0] = gbk;
  136. uarr[1] = gbk >> 8;
  137. buf.push(uarr[0], uarr[1]);
  138. } else if (codePoint === 8364) {
  139. // 8364 == '€'.charCodeAt(0)
  140. // Code Page 936 has a single-byte euro sign at 0x80
  141. buf.push(0x80);
  142. } else {
  143. const encoded = str2gbk(`&#${codePoint};`);
  144. for (const charcode of encoded) {
  145. buf.push(charcode);
  146. }
  147. }
  148. }
  149. return buf;
  150. }
  151. }) ();
  152.  
  153. const docEncoding = document.characterSet.toLowerCase();
  154. const encoder = {
  155. big5: {
  156. encode: str => arr2url(str2big5(str)),
  157. decode: url => decodeURL(url, 'big5'),
  158. encodeBuffer: str => arr2buf(str2big5(str))
  159. },
  160. gbk: {
  161. encode: str => arr2url(str2gbk(str)),
  162. decode: url => decodeURL(url, 'gbk'),
  163. encodeBuffer: str => arr2buf(str2gbk(str))
  164. },
  165. get encode() { return encoder[docEncoding].encode; },
  166. get decode() { return encoder[docEncoding].decode; },
  167. get encodeBuffer() { return encoder[docEncoding].encodeBuffer; },
  168. };
  169. return encoder;
  170.  
  171. function arr2url(buf) {
  172. return buf.map(charcode => '%' + charcode.toString(16).padStart(2, '0').toUpperCase()).join('');
  173. }
  174.  
  175. function arr2buf(arr) {
  176. return arr.reduce((buf, charcode, i) => {
  177. buf[i] = charcode;
  178. return buf;
  179. }, new Uint8Array(arr.length));
  180. }
  181.  
  182. function decodeURL(url, encoding) {
  183. const arr = [];
  184. let inCharcode = false, charcode = '';
  185. for (const char of url) {
  186. if (inCharcode) {
  187. charcode += char;
  188. if (charcode.length === 2) {
  189. arr.push(parseInt(charcode, 16));
  190. inCharcode = false;
  191. charcode = '';
  192. }
  193. } else if (char === '%') {
  194. inCharcode = true;
  195. } else {
  196. arr.push(char.charCodeAt(0));
  197. }
  198. }
  199. const buf = arr.reduce((buf, charcode, i) => {
  200. buf[i] = charcode;
  201. return buf;
  202. }, new Uint8Array(arr.length));
  203.  
  204. return new TextDecoder(encoding).decode(buf);
  205. }
  206. })();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址