// ==UserScript==
// @name eBay hide duplicate results updated
// @description easy toggle to show or hide bunches of repeated results in eBay searches
// @namespace [email protected]
// @include http://*search.ebay.co.uk/*
// @include http://*search.ebay.com/*
// @include http://*shop.ebay.co.uk/*
// @include http://*shop.ebay.com/*
// @include http://www.ebay.com/sch/*
// @version 1.1
// ==/UserScript==
/*
Changes:
1.1 Implemented a slightly more sophisticated matching algorithm:
convert descriptions to lower case, if one description is
contained in the other then they match. If they do not then split
the descriptions into words and throw away duplicates if the
resulting sets or words differ by less than 20% of the words then
they match.
Description:
This is a version of znerp's eBay hide duplicate results script from
userscripts.org that works on http://www.ebay.com/sch/. I don't know
if it still works on the other urls that are included or even if they
still exist.
It has been extended slightly to allow for almost identical
descriptions to still count as a match and to report the number of
duplicates next to the plus/minus icons.
There was no license specified on the original code so I assume that
znerp meant to dedicate his version to the public domain. This
version is therefore also dedicated to the public domain.
[email protected]
TODO:
- Make duplicate detection better by tokenizing the description and
removing common tokens then run an edit distance on what remains.
*/
var TAG = 'ebhdru: ';
console.log(TAG + 'start');
try {
var plus = "%2FwD%2FA" +
"P83WBt9AAAAZElEQVR4nL2SUQ6AMAhDH2Q38v430DPhh4YsyHCLif0jK21hiJkBIgdvMNsAnWQ7TWCfYTtacKzl" +
"70jp8yhn3lBguaH1RYjhZT%2FeNwdXurTTvf08tKP4xOXT0Poins5aBwhs4ATOOiHsGI5R2gAAAABJRU5ErkJgg" +
"g%3D%3D"
var minus = "%2FwD%2F" +
"AP83WBt9AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAARklEQVR4nGP8%2F%2F8%2FAwMDIyMjAyEAUQmjiAMkKYYCh" +
"Evw64a7mYlUG2ivAQHw%2BAFZimQbWIi0hHwb6BlKxAJSEx8T8XogygC6eSP5CxYWpwAAAABJRU5ErkJggg%3D%" +
"3D"
var thisResult, nextResult
switch (true) {
case /search/.test(location.host):
console.log(TAG + 'found search');
handlePagesNamedSearch();
break;
case /shop/.test(location.host):
console.log(TAG + 'found shop');
handlePagesNamedShop();
break;
default:
console.log(TAG + 'found other');
handleOtherPages();
break;
}
function handlePagesNamedSearch(){
console.log(TAG + 'handlePagesNamedSearch');
var allResults = document.evaluate('//table[@class="ebItemlist single"]/tbody/tr[contains(@class,"single")]/td[@class="ebcTtl"]/h3/a',
document,
null,
XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
null);
for (i = 0; i < allResults.snapshotLength - 1; i++) {
thisResult = allResults.snapshotItem(i)
nextResult = allResults.snapshotItem(i + 1)
if (thisResult.textContent == nextResult.textContent) {
nextResult.parentNode.parentNode.parentNode.style.display = "none"
nextResult.parentNode.parentNode.parentNode.setAttribute("znerp", "hidden")
nextResult.parentNode.appendChild(document.createTextNode(" <<duplicate>>"))
if (thisResult.parentNode.parentNode.parentNode.style.display != "none") {
icon = document.createElement("img")
icon.src = plus
icon.setAttribute("style", "padding: 3px; cursor: pointer;")
thisResult.parentNode.parentNode.insertBefore(icon, thisResult.parentNode.parentNode.firstChild)
icon.addEventListener(
'click',
function() {
this.src = (this.src == plus
? minus
: plus)
foo = this.parentNode.parentNode
if (this.src == minus)
while((foo = foo.nextSibling).getAttribute("znerp") == "hidden") {
foo.style.display = ""
foo.setAttribute("znerp", "showing")
}
else
while((foo = foo.nextSibling).getAttribute("znerp") == "showing") {
foo.style.display = "none"
foo.setAttribute("znerp", "hidden")
}
},
true)
}
}
}
var otherStuff = document.evaluate('//table[@class="ebItemlist single"]/tbody/tr/td/script',
document,
null,
XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
null);
for (i = otherStuff.snapshotLength - 1; i >= 0; i--)
(foo = otherStuff.snapshotItem(i).parentNode.parentNode).parentNode.removeChild(foo)
}
function handlePagesNamedShop(){
console.log(TAG + 'handlePagesNamedShop');
var allResults = document.evaluate('//div[contains(@class,"lview")]/table[@class="nol"]/tbody/tr/td[@class="details"]/div[@class="ttl"]/a',
document,
null,
XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
null);
for (i = 0; i < allResults.snapshotLength - 1; i++) {
thisResult = allResults.snapshotItem(i)
nextResult = allResults.snapshotItem(i + 1)
if (thisResult.textContent == nextResult.textContent) {
nextResult.parentNode.parentNode.parentNode.parentNode.parentNode.style.display = "none"
nextResult.parentNode.parentNode.parentNode.parentNode.parentNode.nextSibling.style.display = "none"
nextResult.parentNode.parentNode.parentNode.parentNode.parentNode.setAttribute("znerp", "hidden")
nextResult.parentNode.parentNode.parentNode.parentNode.parentNode.nextSibling.setAttribute("znerp", "hidden")
nextResult.parentNode.appendChild(document.createTextNode(" <<duplicate>>"))
if (thisResult.parentNode.parentNode.parentNode.parentNode.parentNode.style.display != "none") {
icon = document.createElement("img")
icon.src = plus
icon.setAttribute("style", "padding: 3px; cursor: pointer;")
thisResult.parentNode.parentNode.insertBefore(icon, thisResult.parentNode.parentNode.firstChild)
icon.addEventListener(
'click',
function() {
this.src = (this.src == plus
? minus
: plus)
foo = this.parentNode.parentNode.parentNode.parentNode
if (this.src == minus)
while((foo = foo.nextSibling.nextSibling.nextSibling).getAttribute("znerp") == "hidden") {
foo.style.display = ""
foo.nextSibling.style.display = ""
foo.setAttribute("znerp", "showing")
foo.nextSibling.setAttribute("znerp", "showing")
}
else
while((foo = foo.nextSibling.nextSibling.nextSibling).getAttribute("znerp") == "showing") {
foo.style.display = "none"
foo.nextSibling.style.display = "none"
foo.setAttribute("znerp", "hidden")
foo.nextSibling.setAttribute("znerp", "hidden")
}
},
true)
}
}
}
}
// Compare each item description with the one that follows, if they
// are very similar hide the second one. Attach a plus sign icon and
// a count of duplicates to the first of the list; that is the one
// that is not hidden. Attach a click event to it to toggle the
// visibility.
function handleOtherPages(){
console.log(TAG + 'handleOtherPages');
var allResults = document.evaluate('//a[contains(@class,"vip")]',
document,
null,
XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
null);
var duplicateCount = 0;
for (i = 0; i < allResults.snapshotLength - 1; i++) {
thisResult = allResults.snapshotItem(i)
nextResult = allResults.snapshotItem(i + 1)
console.log(TAG + 'handleOtherPages: ' + i);
if (tokenMatch(thisResult.textContent, nextResult.textContent)) {
console.log(TAG + ' matched');
var thisHeading = thisResult.parentNode;
var nextHeading = nextResult.parentNode;
var thisListItem = thisHeading.parentNode;
var nextListItem = nextHeading.parentNode;
nextListItem.style.display = "none"
nextListItem.setAttribute("znerp", "hidden")
duplicateCount++;
nextHeading.appendChild(document.createTextNode(" <<duplicate " + duplicateCount +">>"));
if (thisListItem.style.display != "none") {
icon = addIcon(thisListItem);
}
} else {
console.log(TAG + ' not matched');
if (duplicateCount != 0){
var dups = document.createTextNode(duplicateCount + ' duplicates');
icon.parentNode.insertBefore(dups, icon);
duplicateCount = 0;
}
}
}
var otherStuff = document.evaluate('//table[@class="ebItemlist single"]/tbody/tr/td/script',
document,
null,
XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
null);
for (i = otherStuff.snapshotLength - 1; i >= 0; i--) {
(foo = otherStuff.snapshotItem(i).parentNode.parentNode).parentNode.removeChild(foo);
}
}
// A slightly fuzzy match. At the momemnt it simply lops off the
// first and last two characters and checks to see if what remains is
// an exact match. The reason for ignoring the leading and trailing
// characters is that vendors often add spurious characters to
// distinguish otherwise identical descriptions. Should be replaced
// with a more sophisticated measure which also includes the price.
function match(s1, s2) {
console.log(TAG + 'match ');
console.log(TAG + 's1: ' + s1);
console.log(TAG + 's2: ' + s2);
var a1 = s1.substring(2, s1.length - 2);
var a2 = s2.substring(2, s2.length - 2);
console.log(TAG + 'a1: ' + a1);
console.log(TAG + 'a2: ' + a2);
return a1 == a2;
}
// Find the uniques tokens in each description, if they differ by
// fewer than some number then return true, else false.
function tokenMatch(s1, s2) {
console.log(TAG + 'tokenMatch: <' + s1 +'> <' + s2 + '>');
var lc1 = s1.toLowerCase();
var lc2 = s2.toLowerCase();
if (lc1 === lc2) {
console.log(TAG + 'tokenMatch matched lc');
return true;
}
// If one description is wholly contained in another
console.log(TAG + 'tokenMatch a: <' + lc1 + '> <' + lc2 + '>');
if (lc1.includes(lc2) || lc2.includes(lc1)) {
console.log(TAG + 'tokenMatch matched includes');
return true;
}
// Get Sets of tokens
console.log(TAG + 'tokenMatch a1');
var t1 = tokenise(lc1);
console.log(TAG + 'tokenMatch t1: ' + t1.size);
var t2 = tokenise(lc2);
console.log(TAG + 'tokenMatch t2: ' + t2.size);
var onlyIn1 = setDifference(t1, t2);
console.log(TAG + 'tokenMatch a4: ' + onlyIn1.size);
var onlyIn2 = setDifference(t2, t1);
console.log(TAG + 'tokenMatch a5: ' + onlyIn2.size);
if (onlyIn1.size == 0 || onlyIn2.size ==0){
// One set of tokens is a subset of the other or they are
// identical.
return true;
}
var len = Math.max(t1.size, t2.size);
var differing = Math.max(onlyIn1.size, onlyIn2.size);
var diff_fraction = differing / len;
console.log(TAG + 'tokenMatch c: ' + len + ' ' + differing + ' ' + diff_fraction);
// If the number of distinct tokens in the largest set is ten then
// allow two tokens difference.
var max_allowed_diff = 0.2 * len;
// If one description is wholly contained in another
if (onlyIn1.size <= max_allowed_diff && onlyIn2.size <= max_allowed_diff) {
console.log(TAG + 'tokenMatch matched set diff');
return true;
}
// Doesn't match.
return false;
}
// just trim and split on whitespace
var whiteSpace = /\s+/g;
function tokenise(s){
console.log(TAG + 'tokenise ' + s);
var tokens = s.trim().split(whiteSpace);
console.log('tokens s: ' + tokens);
console.log(TAG + 'tokenise cs: ' + tokens.length);
tokens = s.trim().match(/\S+/g);
console.log('tokens m: ' + tokens);
console.log(TAG + 'tokenise cm: ' + tokens.length);
var tokenSet = new Set(tokens);
console.log(TAG + 'tokenise set ' + tokenSet.size);
return tokenSet;
}
// From
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set.
Set.prototype.intersection = function(setB) {
var intersection = new Set();
for (var elem of setB) {
if (this.has(elem)) {
intersection.add(elem);
}
}
return intersection;
}
function setDifference(setA, setB) {
console.log(TAG + ' difference');
var diff = new Set(setA);
for (var elem of setB) {
diff.delete(elem);
}
console.log(TAG + ' difference');
return diff;
}
// Add the plus icon to the beginning of the specified element and
// attach a click event handler to reveal or hide the duplicates.
function addIcon(thisListItem) {
icon = document.createElement("img")
icon.src = plus
icon.setAttribute("style", "padding: 3px; cursor: pointer;")
thisListItem.insertBefore(icon, thisListItem.firstChild)
icon.addEventListener(
'click',
function() {
listItem = this.parentNode;
if (this.src == plus) {
this.src = minus;
hideItems(listItem);
} else {
this.src = plus;
showItems(listItem);
}
},
true)
return icon;
}
function hideItems(listItem) {
while((listItem = listItem.nextSibling).getAttribute("znerp") == "hidden") {
listItem.style.display = ""
listItem.setAttribute("znerp", "showing")
}
}
function showItems(listItem) {
while((listItem = listItem.nextSibling).getAttribute("znerp") == "showing") {
listItem.style.display = "none"
listItem.setAttribute("znerp", "hidden")
}
}
var configuration = {minimumSimilarity: 0.95};
function getConfiguration() {
for (key in configuration) {
configuration[key] = GM_getValue(key, configuration[key] + '') - 0;
}
}
function saveConfiguration() {
for (key in configuration) {
GM_setValue(key, configuration[key] + "");
}
}
} catch (ex){
console.log("error: " + ex.message);
}
console.log(TAG + 'finish');