// ==UserScript==
// @name CleanURLs
// @namespace i2p.schimon.cleanurl
// @description Remove tracking parameters and redirect to original URL. This Userscript uses the URL Interface instead of RegEx.
// @homepageURL https://gf.qytechs.cn/en/scripts/465933-clean-url-improved
// @supportURL https://gf.qytechs.cn/en/scripts/465933-clean-url-improved/feedback
// @copyright 2023, Schimon Jehudah (http://schimon.i2p)
// @license MIT; https://opensource.org/licenses/MIT
// @grant none
// @run-at document-end
// @match *://*/*
// @version 23.06.19
// @icon 
// ==/UserScript==
/*
FIXME
fix comparison of safe and unsafe
safe and unsafe are the same
https://addons.palemoon.org/addon/bamboo-feed-reader/
https://addons.palemoon.org/?component=download&id=%7Bb2e69492-2358-071a-7056-24ad0c3defb1%7D&version=2.3.2
safe and unsafe are the same (encoding difference @ vs %40)
https://addons.palemoon.org/addon/inforss-reloaded/
https://addons.palemoon.org/?component=download&[email protected]&version=2.3.1.0
https://addons.palemoon.org/?component=download&id=inforss-reloaded%40addons.palemoon.org&version=2.3.1.0
*/
/*
Simple version of this Userscript
let url = new URL(location.href);
if (url.hash || url.search) {
location.href = url.origin + url.pathname
};
*/
// Check whether HTML; otherwise, exit.
//if (!document.contentType == 'text/html')
if (document.doctype == null) return;
//let point = [];
const namespace = 'i2p.schimon.cleanurl';
// List of url parameters
const urls = [
'redirect',
'ref',
'source',
'src',
'url',
'utm_source'];
// List of reserved parameters
const whitelist = [
'art', // article
'action', // wiki
'bill', // law
'c', // cdn
'category', // id
'code', // code
'component', // addons.palemoon.org
'content', // id
'dark', // yorik.uncreated.net
'date', // date
'days', // wiki
'district', // house.mo.gov
'exp_time', // cdn
'expires', // cdn
'ezimgfmt', // cdn image processor
'feedformat', // wiki
'fid', // mybb
'file_host', // cdn
'filename', // filename
'for', // cdn
'format', // file type
'guid', // guid
'hash', // cdn
'hidebots', // wiki
'hl', // language
'id', // id
'ie', // character encoding
'ip', // ip address
'item_class', // greasyfork
'item_id', // greasyfork
'jid', // jabber id (xmpp)
'key', // cdn
'limit', // wiki
'lang', // language
'language', // language
'library', // oujs
'locale', // locale
'lr', // cdn
'lra', // cdn
'member', // xmb forum
'mobileaction', // wiki
'news_id', // post
'order', // bugzilla
'orderBy', // oujs
'orderDir', // oujs
'p', // search query / page number
'page', // mybb
'preferencesReturnUrl', // return url
'product', // bugzilla
'q', // search query
'query', // search query
'query_format', // bugzilla
//'referer', // signin <-- provided pathname contains login (log-in) or signin (sign-in)
'resolution', // bugzilla
'return_to', // signin
's', // search query
'search', // search query
'show_all_versions', // greasyfork
'sign', // cdn
'signature', // cdn
'sort', // greasyfork
'speed', // cdn
'start_time', // media playback
'state', // cdn
'__switch_theme', // theme (theanarchistlibrary.org)
'tag', // id
'tid', // mybb
'title', // send (share) links and wiki
'type', // file type
//'url', // url <-- not whitelisted nor blacklisted
'utf8', // encoding
'urlversion', // wiki
'v', // video
'version', // greasyfork
//'_x_tr_sl', // translate online service
//'_x_tr_tl=', // translate online service
//'_x_tr_hl=', // translate online service
//'_x_tr_pto', // translate online service
//'_x_tr_hist', // translate online service
'year' // year
];
// List of useless hash
const hash = [
'back-url',
'intcid',
'niche-',
//'searchinput',
'src'];
// List of useless parameters
const blacklist = [
'ad',
'ad_medium',
'ad_name',
'ad_pvid',
'ad_sub',
//'ad_tags',
'advertising-id',
//'aem_p4p_detail',
'af',
'aff',
'aff_fcid',
'aff_fsk',
'aff_platform',
'aff_trace_key',
'affparams',
'afSmartRedirect',
'afftrack',
'affparams',
//'aid',
'algo_exp_id',
'algo_pvid',
'ar',
//'ascsubtag',
//'asc_contentid',
'asgtbndr',
'atc',
'ats',
'autostart',
//'b64e', // breaks yandex
'bizType',
//'block',
'bta',
'businessType',
'campaign',
'campaignId',
//'__cf_chl_rt_tk',
'cid',
'ck',
//'clickid',
//'client_id',
//'cm_ven',
'content-id',
'crid',
'cst',
'cts',
'curPageLogUid',
//'data', // breaks yandex
//'dchild',
//'dclid',
'deals-widget',
'dicbo',
//'dt',
'edd',
'edm_click_module',
//'ei',
//'embed',
'_encoding',
//'etext', // breaks yandex
'eventSource',
'fbclid',
'feature',
'forced_click',
//'fr',
'frs',
//'from', // breaks yandex
'_ga',
'ga_order',
'ga_search_query',
'ga_search_type',
'ga_view_type',
'gatewayAdapt',
//'gclid',
//'gclsrc',
'gh_jid',
'gps-id',
//'gs_lcp',
'gt',
'guccounter',
'hdtime',
'ICID',
'ico',
'ig_rid',
//'idzone',
//'iflsig',
'irclickid',
//'irgwc',
//'irpid',
'itid',
//'itok',
//'katds_labels',
//'keywords',
'keyno',
'l10n',
'linkCode',
'mc',
'mid',
'mp',
'nats',
'nci',
'obOrigUrl',
'offer_id',
'optout',
'oq',
'organic_search_click',
'pa',
'Partner',
'partner',
'partner_id',
'pcampaignid',
'pd_rd_i',
'pd_rd_r',
'pd_rd_w',
'pd_rd_wg',
'pdp_npi',
'pf_rd_i',
'pf_rd_m',
'pf_rd_p',
'pf_rd_r',
'pf_rd_s',
'pf_rd_t',
'pg',
'PHPSESSID',
'pk_campaign',
'pdp_ext_f',
'pkey',
'platform',
'plkey',
'pqr',
'pr',
'pro',
'prod',
'promo',
'promocode',
'promoid',
'psc',
'psprogram',
'pvid',
'qid',
//'r',
'realDomain',
'recruiter_id',
'redirect',
'ref',
'ref_',
'ref_src',
'refcode',
'referrer',
'refinements',
'reftag',
'rowan_id1',
'rowan_msg_id',
//'sCh',
'sclient',
'scm',
'scm_id',
'scm-url',
//'sd',
'sh',
'shareId',
'showVariations',
'si',
'sid',
'___SID',
//'site_id',
'sk',
'smid',
'social_params',
'source',
'sourceId',
'sp_csd',
'spLa',
'spm',
'spreadType',
//'sprefix',
'sr',
'src',
'_src',
'src_cmp',
'src_player',
'src_src',
'srcSns',
'su',
'_t',
//'tag',
'tcampaign',
'td',
'terminal_id',
//'text',
'th', // Sometimes restored after page load
//'title',
'tracelog',
'traffic_id',
'traffic_type',
'tt',
'uact',
'ug_edm_item_id',
//'utm1',
//'utm2',
//'utm3',
//'utm4',
//'utm5',
//'utm6',
//'utm7',
//'utm8',
//'utm9',
'utm_campaign',
'utm_content',
'utm_medium',
'utm_source',
'utm_term',
'uuid',
//'utype',
//'ve',
//'ved',
//'zone'
];
// URL Indexers
const paraIDX = [
'algo_exp_id',
'algo_pvid',
'b64e',
'cst',
'cts',
'data',
'ei',
//'etext',
'from',
'iflsig',
'gbv',
'gs_lcp',
'hdtime',
'keyno',
'l10n',
'mc',
'oq',
//'q',
'sei',
'sclient',
'sign',
'source',
'state',
//'text',
'uact',
'uuid',
'ved'];
// Market Places
const paraMKT = [
'___SID',
'_t',
'ad_pvid',
'af',
'aff_fsk',
'aff_platform',
'aff_trace_key',
'afSmartRedirect',
'bizType',
'businessType',
'ck',
'content-id',
'crid',
'curPageLogUid',
'deals-widget',
'edm_click_module',
'gatewayAdapt',
'gps-id',
'keywords',
'pd_rd_i',
'pd_rd_r',
'pd_rd_w',
'pd_rd_wg',
'pdp_npi',
'pf_rd_i',
'pf_rd_m',
'pf_rd_p',
'pf_rd_r',
'pf_rd_s',
'pf_rd_t',
'platform',
'pdp_ext_f',
'ref_',
'refinements',
'rowan_id1',
'rowan_msg_id',
'scm',
'scm_id',
'scm-url',
'shareId',
//'showVariations',
'sk',
'smid',
'social_params',
'spLa',
'spm',
'spreadType',
'sr',
'srcSns',
'terminal_id',
'th', // Sometimes restored after page load
'tracelog',
'tt',
'ug_edm_item_id'];
// IL
const paraIL = [
'dicbo',
'obOrigUrl'];
// General
const paraWWW = [
'aff',
'promo',
'promoid',
'ref',
'utm_campaign',
'utm_content',
'utm_medium',
'utm_source',
'utm_term'];
// For URL of the Address bar
// Check and modify page address
// TODO Add bar and ask to clean address bar
(function modifyURL() {
let
check = [],
url = new URL(location.href);
// TODO turn into boolean function
for (let i = 0; i < blacklist.length; i++) {
if (url.searchParams.get(blacklist[i])) {
check.push(blacklist[i]);
url.searchParams.delete(blacklist[i]);
//newURL = url.origin + url.pathname + url.search + url.hash;
}
}
// TODO turn into boolean function
for (let i = 0; i < hash.length; i++) {
if (url.hash.startsWith('#' + hash[i])) {
check.push(hash[i]);
//newURL = url.origin + url.pathname + url.search;
}
}
if (check.length > 0) {
let newURL = url.origin + url.pathname + url.search;
window.history.pushState(null, null, newURL);
//location.href = newURL;
}
})();
(function scanAllURLs() {
for (let i = 0; i < document.links.length; i++) {
let url = new URL(document.links[i].href);
// NOTE Consider BitTorrent Magnet links
// removing trackers would need a warning about
// private torrents, if torrent is not public (dht-enabled)
const allowedProtocols = [
'finger:', 'freenet:', 'gemini:', 'gopher:',
'wap:', 'ipfs:', 'https:', 'ftps:', 'http:', 'ftp:'];
if (url.search && allowedProtocols.includes(url.protocol)) {
//if (url.search || url.hash) {
document.links[i].setAttribute('href-data', document.links[i].href);
}
}
})();
(function scanBadURLs() {
for (let i = 0; i < document.links.length; i++) {
// TODO callback, Mutation Observer, and Event Listener
hash.forEach(j => cleanLink(document.links[i], j, 'hash'));
blacklist.forEach(j => cleanLink(document.links[i], j, 'para'));
}
})();
// TODO Add an Event Listener
function cleanLink(link, target, type) {
let url = new URL(link.href);
switch (type) {
case 'hash':
//console.log('hash ' + i)
if (url.hash.startsWith('#' + target)) {
//link.setAttribute('href-data', link.href);
link.href = url.origin + url.pathname + url.search;
}
break;
case 'para':
//console.log('para ' + i)
if (url.searchParams.get(target)) {
url.searchParams.delete(target);
//link.setAttribute('href-data', link.href);
link.href = url.origin + url.pathname + url.search;
}
break;
}
/*
// EXTRA
// For URL of hyperlinks
for (const a of document.querySelectorAll('a')) {
try{
let url = new URL(a.href);
for (let i = 0; i < blacklist.length; i++) {
if (url.searchParams.get(blacklist[i])) {
url.searchParams.delete(blacklist[i]);
}
}
a.href = url;
} catch (err) {
//console.warn('Found no href for element: ' + a);
//console.error(err);
}
} */
}
// TODO Hunt (for any) links within attributes using getAttributeNames()[i]
// Event Listener
// TODO Scan 'e.target.childNodes' until 'href-data' (link) is found
document.body.addEventListener("mouseover", function(e) { // mouseover works with keyboard too
//if (e.target && e.target.nodeName == "A") {
hrefData = e.target.getAttribute('href-data');
//if (e.target && hrefData && !document.getElementById(namespace)) {
if (e.target && hrefData && hrefData != document.getElementById('url-original')) {
if (document.getElementById(namespace)) {
document.getElementById(namespace).remove();
}
selectionItem = createButton(e.pageX, e.pageY, hrefData);
hrefData = new URL(hrefData);
selectionItem.append(purgeURL(hrefData));
let types = ['whitelist', 'blacklist', 'original']
for (let i = 0; i < types.length; i++) {
let button = purgeURL(hrefData, types[i]);
let exist;
selectionItem.childNodes.forEach(
node => {
if (button.href == node.href) {
exist = true;
}
}
)
if (!exist) {
selectionItem.append(button);
}
}
// Check for URLs
for (let i = 0; i < urls.length; i++) {
if (hrefData.searchParams.get(urls[i])) { // hrefData.includes('url=')
urlParameter = hrefData.searchParams.get(urls[i]);
try {
urlParameter = new URL (urlParameter);
} catch {
if (urlParameter.includes('.')) { // NOTE It is a guess
try {
urlParameter = new URL ('http:' + urlParameter);
} catch {}
}
}
if (typeof urlParameter == 'object') {
newURLItem = extractURL(urlParameter);
selectionItem.prepend(newURLItem);
}
}
}
// compare original against purged
if (selectionItem.querySelector(`#url-purged`)) {
//let urlOrigin = new URL (selectionItem.querySelector(`#url-original`).href);
let urlPurge = new URL (selectionItem.querySelector(`#url-purged`).href);
console.log(urlPurge.searchParams.sort())
console.log(hrefData.searchParams.sort())
if (hrefData.search == urlPurge.search) {
selectionItem.querySelector(`#url-original`).remove();
}
}
// do not add element, if url has only whitelisted parameters and no potential url
// add element, only if a potential url or non-whitelisted parameter was found
let urlTypes = ['url-extracted', 'url-original', 'url-purged'];
for (let i = 0; i < urlTypes.length; i++) {
if (selectionItem.querySelector(`#${urlTypes[i]}`)) {
document.body.append(selectionItem);
return;
}
}
// NOTE in case return did not reach
e.target.removeAttribute('href-data')
}
});
function createButton(x, y, url) {
// create element
let item = document.createElement(namespace);
// set content
item.id = namespace;
// set position
item.style.all = 'unset';
item.style.position = 'absolute';
//item.style.left = x+5 + 'px';
//item.style.top = y-3 + 'px';
item.style.left = x+45 + 'px';
item.style.top = y-65 + 'px';
// set appearance
item.style.fontFamily = 'none'; // emoji
item.style.background = '#333';
item.style.borderRadius = '5%';
item.style.padding = '3px';
item.style.zIndex = 10000;
//item.style.opacity = 0.7;
//item.style.filter = 'brightness(0.7) drop-shadow(2px 4px 6px black)'
item.style.filter = 'brightness(0.7)'
// center character
item.style.justifyContent = 'center';
item.style.alignItems = 'center';
item.style.display = 'flex';
// disable selection marks
item.style.userSelect = 'none';
item.style.cursor = 'default';
// set button behaviour
item.onmouseover = () => {
//item.style.opacity = 1;
//item.style.filter = 'drop-shadow(2px 4px 6px black)';
item.style.filter = 'unset';
};
item.onmouseleave = () => { // onmouseout
// TODO Wait a few seconds
item.remove();
};
return item;
}
function extractURL(url) {
let item = document.createElement('a');
item.textContent = '🔗'; // 🧧 🏷️ 🔖
item.title = 'Extracted URL';
item.id = 'url-extracted';
item.style.all = 'unset';
item.style.outline = 'none';
item.style.height = '15px';
item.style.width = '15px';
item.style.padding = '3px';
item.style.margin = '3px';
//item.style.fontSize = '0.9rem' // 90%
item.style.lineHeight = 'normal'; // initial
//item.style.height = 'fit-content';
item.href = url;
return item;
}
// TODO Use icons (with shapes) for cases when color is not optimal
function purgeURL(url, listType) {
let itemTitle, itemId, resUrl;
let item = document.createElement('a');
item.style.all = 'unset';
switch (listType) {
case 'blacklist':
itemColor = 'yellow';
//itemTextContent = '🟡';
itemTitle = 'Clean link'; // Purged URL
itemId = 'url-purged';
resUrl = hrefDataHandler(url, blacklist);
break;
case 'original': // TODO dbclick (double-click)
itemColor = 'orangered';
//itemTextContent = '🔴';
itemTitle = 'Unsafe link'; // Original URL
itemId = 'url-original';
resUrl = url;
item.style.cursor = `not-allowed`; // no-drop
item.onmouseenter = () => {
item.style.filter = `drop-shadow(2px 4px 6px ${itemColor})`;
};
item.onmouseout = () => {
item.style.filter = 'unset';
};
break;
case 'whitelist':
itemColor = 'lawngreen';
//itemTextContent = '🟢';
itemTitle = 'Safe link'; // Link with whitelisted parameters
itemId = 'url-known';
resUrl = hrefDataHandler(url, whitelist);
break;
default:
itemColor = 'antiquewhite';
//itemTextContent = '⚪';
itemTitle = 'Base link'; // Link without parameters
itemId = 'url-base';
resUrl = url.origin + url.pathname;
break;
}
item.id = itemId;
item.title = itemTitle;
item.style.background = itemColor;
//item.textContent = itemTextContent;
item.style.borderRadius = '50%';
item.style.outline = 'none';
item.style.height = '15px';
item.style.width = '15px';
item.style.padding = '3px';
item.style.margin = '3px';
item.href = resUrl;
return item;
}
function hrefDataHandler(url, listType) {
url = new URL(url.href);
url.searchParams.sort();
switch (listType) {
case whitelist:
let newURL = new URL (url.origin + url.pathname);
for (let i = 0; i < whitelist.length; i++) {
if (url.searchParams.get(whitelist[i])) {
newURL.searchParams.set(
whitelist[i],
url.searchParams.get(whitelist[i]) // catchedValue
);
}
}
url = newURL;
break;
case blacklist:
for (let i = 0; i < blacklist.length; i++) {
if (url.searchParams.get(blacklist[i])) {
url.searchParams.delete(blacklist[i]);
}
}
break;
}
return url;
}