您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
Extract text from specific spans based on page URL, remove stop words, and do word frequency analysis.
当前为
// ==UserScript== // @name Text Frequency Analyzer with ASIN Page Support // @namespace http://tampermonkey.net/ // @version 1.9 // @description Extract text from specific spans based on page URL, remove stop words, and do word frequency analysis. // @author Your Name // @match https://www.amazon.com/gp/bestsellers* // @match https://www.amazon.com/*/dp/* // @match https://www.amazon.com/dp/* // @match https://www.amazon.com/s?k=* // @match https://www.amazon.com/s?* // @license MIT // ==/UserScript== (function () { 'use strict'; const stopWords = new Set([ 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now' ]); // Function to extract text from all matching spans based on URL function extractText() { let spans; const url = window.location.href; if (url.includes('/gp/bestsellers')) { spans = document.querySelectorAll('._cDEzb_p13n-sc-css-line-clamp-3_g3dy1'); } else if (url.includes('/s?k=')) { spans = document.querySelectorAll('.a-size-base-plus.a-color-base.a-text-normal, .a-size-medium.a-color-base.a-text-normal'); } else if (url.includes('/dp/')) { // ASIN product page const titleElement = document.getElementById('productTitle'); const miniElements = document.querySelectorAll('.a-unordered-list.a-vertical.a-spacing-mini .a-spacing-mini'); let textContent = titleElement ? titleElement.innerText : ''; miniElements.forEach(el => { textContent += ' ' + el.innerText; }); return textContent.trim(); } else { alert('This script is not configured for this page.'); return ''; } let textContent = ''; spans.forEach(span => { textContent += span.innerText + ' '; }); return textContent.trim(); } // Function to clean text but keep meaningful symbols function cleanText(text) { // Retain certain symbols while removing others return text.toLowerCase() .replace(/[^a-z0-9\s\/"'.-]/g, '') // Retain letters, digits, spaces, /, ", ', ., - .replace(/\s+/g, ' ') // Replace multiple spaces with a single space .trim(); } // Function to clean, remove stop words, and split text into words function getWords(text, removeStopWords = true) { const words = cleanText(text).split(/\s+/).filter(Boolean); if (removeStopWords) { return words.filter(word => !stopWords.has(word)); } return words; } // Function to count word frequencies function countFrequencies(words, n) { const freqMap = new Map(); for (let i = 0; i <= words.length - n; i++) { const phrase = words.slice(i, i + n).join(' '); freqMap.set(phrase, (freqMap.get(phrase) || 0) + 1); } return Array.from(freqMap.entries()).sort((a, b) => b[1] - a[1]).slice(0, 10); } // Function to display the frequency analysis results function displayResults(results) { const resultDiv = document.createElement('div'); resultDiv.style.position = 'fixed'; resultDiv.style.top = '10px'; resultDiv.style.right = '10px'; resultDiv.style.backgroundColor = 'white'; resultDiv.style.border = '1px solid black'; resultDiv.style.padding = '10px'; resultDiv.style.zIndex = '10000'; resultDiv.style.maxHeight = '90vh'; resultDiv.style.overflowY = 'auto'; resultDiv.innerHTML = '<h2>Word Frequency Analysis</h2>'; results.forEach(([label, data]) => { const title = document.createElement('h3'); title.textContent = label; resultDiv.appendChild(title); const list = document.createElement('ul'); data.forEach(([phrase, count]) => { const listItem = document.createElement('li'); listItem.textContent = `${phrase}: ${count}`; list.appendChild(listItem); }); resultDiv.appendChild(list); }); document.body.appendChild(resultDiv); } // Function to perform the word frequency analysis function analyzeText() { const text = extractText(); if (!text) { alert('No text found in the specified spans.'); return; } const wordsForSingle = getWords(text); // Remove stop words for single word frequency const wordsForPhrases = getWords(text, false); // Keep stop words for phrase frequency const results = [ ['Top 10 Single Words', countFrequencies(wordsForSingle, 1)], ['Top 10 Two-Word Phrases', countFrequencies(wordsForPhrases, 2)], ['Top 10 Three-Word Phrases', countFrequencies(wordsForPhrases, 3)], ['Top 10 Four-Word Phrases', countFrequencies(wordsForPhrases, 4)] ]; displayResults(results); } // Add a button to the page to trigger the analysis const analyzeButton = document.createElement('button'); analyzeButton.textContent = 'Analyze Text Frequency'; analyzeButton.style.position = 'fixed'; analyzeButton.style.bottom = '10px'; analyzeButton.style.right = '10px'; analyzeButton.style.zIndex = '10000'; analyzeButton.style.padding = '10px 20px'; analyzeButton.style.backgroundColor = '#007bff'; analyzeButton.style.color = 'white'; analyzeButton.style.border = 'none'; analyzeButton.style.borderRadius = '5px'; analyzeButton.style.cursor = 'pointer'; analyzeButton.addEventListener('click', analyzeText); document.body.appendChild(analyzeButton); })();
QingJ © 2025
镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址