// ==UserScript==
// @name Text Frequency Analyzer with ASIN Page Support
// @namespace http://tampermonkey.net/
// @version 1.10
// @description Extract text from specific spans based on page URL, remove stop words, and do word frequency analysis.
// @author Your Name
// @match https://www.amazon.com/gp/bestsellers*
// @match https://www.amazon.com/*/dp/*
// @match https://www.amazon.com/dp/*
// @match https://www.amazon.com/s?k=*
// @match https://www.amazon.com/s?*
// @license MIT
// ==/UserScript==
(function () {
'use strict';
const stopWords = new Set([
'with', 'of', 'for', 'and', 'at', 'if', 'to', 'on', 'by', 'from', 'as', 'than', 'too',
// ...省略其他停用词
'now'
]);
function extractText() {
let spans;
const url = window.location.href;
if (url.includes('/gp/bestsellers')) {
spans = document.querySelectorAll('._cDEzb_p13n-sc-css-line-clamp-3_g3dy1');
} else if (url.includes('/s?k=')) {
spans = document.querySelectorAll('.a-size-base-plus.a-color-base.a-text-normal, .a-size-medium.a-color-base.a-text-normal');
} else if (url.includes('/dp/')) {
const titleElement = document.getElementById('productTitle');
const miniElements = document.querySelectorAll('.a-unordered-list.a-vertical.a-spacing-mini .a-spacing-mini');
let textContent = titleElement ? titleElement.innerText : '';
miniElements.forEach(el => {
textContent += ' ' + el.innerText;
});
return textContent.trim();
} else {
alert('This script is not configured for this page.');
return '';
}
let textContent = '';
spans.forEach(span => {
textContent += span.innerText + ' ';
});
return textContent.trim();
}
function cleanText(text) {
return text.toLowerCase()
.replace(/[^a-z0-9\s\/"'.-]/g, '') // 保留特定符号
.replace(/\s+/g, ' ')
.trim();
}
function getWords(text, removeStopWords = true) {
const words = cleanText(text).split(/\s+/).filter(Boolean);
if (removeStopWords) {
return words.filter(word => !stopWords.has(word));
}
return words;
}
function countFrequencies(words, n) {
const freqMap = new Map();
for (let i = 0; i <= words.length - n; i++) {
const phrase = words.slice(i, i + n).join(' ');
freqMap.set(phrase, (freqMap.get(phrase) || 0) + 1);
}
return Array.from(freqMap.entries()).sort((a, b) => b[1] - a[1]).slice(0, 10);
}
function removePreviousHighlights() {
const highlightedElements = document.querySelectorAll('.highlight');
highlightedElements.forEach(el => {
el.outerHTML = el.innerText; // Replace the span with its text content
});
}
function highlightText(phrase) {
removePreviousHighlights(); // Remove previous highlights
const url = window.location.href;
const regex = new RegExp(`(${phrase.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
if (url.includes('/dp/')) {
const titleElement = document.getElementById('productTitle');
if (titleElement) {
titleElement.innerHTML = titleElement.innerHTML.replace(regex, '<span class="highlight">$1</span>');
}
const miniElements = document.querySelectorAll('.a-unordered-list.a-vertical.a-spacing-mini .a-spacing-mini');
miniElements.forEach(el => {
el.innerHTML = el.innerHTML.replace(regex, '<span class="highlight">$1</span>');
});
} else {
const classesToSearch = [
'_cDEzb_p13n-sc-css-line-clamp-3_g3dy1',
'a-size-base-plus.a-color-base.a-text-normal',
'a-size-medium.a-color-base.a-text-normal'
];
classesToSearch.forEach(className => {
document.querySelectorAll(`.${className}`).forEach(span => {
span.innerHTML = span.innerHTML.replace(regex, '<span class="highlight">$1</span>');
});
});
}
}
function displayResults(results) {
const resultDiv = document.createElement('div');
resultDiv.style.position = 'fixed';
resultDiv.style.top = '10px';
resultDiv.style.right = '10px';
resultDiv.style.backgroundColor = 'white';
resultDiv.style.border = '1px solid black';
resultDiv.style.padding = '10px';
resultDiv.style.zIndex = '10000';
resultDiv.style.maxHeight = '90vh';
resultDiv.style.overflowY = 'auto';
resultDiv.innerHTML = '<h2>Word Frequency Analysis</h2>';
results.forEach(([label, data]) => {
const title = document.createElement('h3');
title.textContent = label;
resultDiv.appendChild(title);
const list = document.createElement('ul');
data.forEach(([phrase, count]) => {
const listItem = document.createElement('li');
listItem.textContent = `${phrase}: ${count}`;
listItem.addEventListener('click', () => highlightText(phrase)); // 绑定点击事件
list.appendChild(listItem);
});
resultDiv.appendChild(list);
});
document.body.appendChild(resultDiv);
}
function analyzeText() {
const text = extractText();
if (!text) {
alert('No text found in the specified spans.');
return;
}
const wordsForSingle = getWords(text);
const wordsForPhrases = getWords(text, false);
const results = [
['Top 10 Single Words', countFrequencies(wordsForSingle, 1)],
['Top 10 Two-Word Phrases', countFrequencies(wordsForPhrases, 2)],
['Top 10 Three-Word Phrases', countFrequencies(wordsForPhrases, 3)],
['Top 10 Four-Word Phrases', countFrequencies(wordsForPhrases, 4)]
];
displayResults(results);
}
const highlightStyle = document.createElement('style');
highlightStyle.innerHTML = `
.highlight {
background-color: yellow;
font-weight: bold;
}
`;
document.head.appendChild(highlightStyle);
const analyzeButton = document.createElement('button');
analyzeButton.textContent = 'Analyze Text Frequency';
analyzeButton.style.position = 'fixed';
analyzeButton.style.bottom = '10px';
analyzeButton.style.right = '10px';
analyzeButton.style.zIndex = '10000';
analyzeButton.style.padding = '10px 20px';
analyzeButton.style.backgroundColor = '#007bff';
analyzeButton.style.color = 'white';
analyzeButton.style.border = 'none';
analyzeButton.style.borderRadius = '5px';
analyzeButton.style.cursor = 'pointer';
analyzeButton.addEventListener('click', analyzeText);
document.body.appendChild(analyzeButton);
})();