Text Frequency Analyzer with ASIN Page Support

Extract text from specific spans based on page URL, remove stop words, and do word frequency analysis.

目前為 2024-08-23 提交的版本,檢視 最新版本

// ==UserScript==
// @name         Text Frequency Analyzer with ASIN Page Support
// @namespace    http://tampermonkey.net/
// @version      1.10
// @description  Extract text from specific spans based on page URL, remove stop words, and do word frequency analysis.
// @author       Your Name
// @match        https://www.amazon.com/gp/bestsellers*
// @match        https://www.amazon.com/*/dp/*
// @match        https://www.amazon.com/dp/*
// @match        https://www.amazon.com/s?k=*
// @match        https://www.amazon.com/s?*

// @license MIT
// ==/UserScript==

(function () {
    'use strict';

    const stopWords = new Set([
        'with', 'of', 'for', 'and', 'at', 'if', 'to', 'on', 'by', 'from', 'as', 'than', 'too',
        // ...省略其他停用词
        'now'
    ]);

    function extractText() {
        let spans;
        const url = window.location.href;

        if (url.includes('/gp/bestsellers')) {
            spans = document.querySelectorAll('._cDEzb_p13n-sc-css-line-clamp-3_g3dy1');
        } else if (url.includes('/s?k=')) {
            spans = document.querySelectorAll('.a-size-base-plus.a-color-base.a-text-normal, .a-size-medium.a-color-base.a-text-normal');
        } else if (url.includes('/dp/')) {
            const titleElement = document.getElementById('productTitle');
            const miniElements = document.querySelectorAll('.a-unordered-list.a-vertical.a-spacing-mini .a-spacing-mini');
            let textContent = titleElement ? titleElement.innerText : '';
            miniElements.forEach(el => {
                textContent += ' ' + el.innerText;
            });
            return textContent.trim();
        } else {
            alert('This script is not configured for this page.');
            return '';
        }

        let textContent = '';
        spans.forEach(span => {
            textContent += span.innerText + ' ';
        });
        return textContent.trim();
    }

    function cleanText(text) {
        return text.toLowerCase()
            .replace(/[^a-z0-9\s\/"'.-]/g, '')  // 保留特定符号
            .replace(/\s+/g, ' ')
            .trim();
    }

    function getWords(text, removeStopWords = true) {
        const words = cleanText(text).split(/\s+/).filter(Boolean);
        if (removeStopWords) {
            return words.filter(word => !stopWords.has(word));
        }
        return words;
    }

    function countFrequencies(words, n) {
        const freqMap = new Map();
        for (let i = 0; i <= words.length - n; i++) {
            const phrase = words.slice(i, i + n).join(' ');
            freqMap.set(phrase, (freqMap.get(phrase) || 0) + 1);
        }
        return Array.from(freqMap.entries()).sort((a, b) => b[1] - a[1]).slice(0, 10);
    }

    function removePreviousHighlights() {
        const highlightedElements = document.querySelectorAll('.highlight');
        highlightedElements.forEach(el => {
            el.outerHTML = el.innerText; // Replace the span with its text content
        });
    }

    function highlightText(phrase) {
        removePreviousHighlights(); // Remove previous highlights

        const url = window.location.href;
        const regex = new RegExp(`(${phrase.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');

        if (url.includes('/dp/')) {
            const titleElement = document.getElementById('productTitle');
            if (titleElement) {
                titleElement.innerHTML = titleElement.innerHTML.replace(regex, '<span class="highlight">$1</span>');
            }
            const miniElements = document.querySelectorAll('.a-unordered-list.a-vertical.a-spacing-mini .a-spacing-mini');
            miniElements.forEach(el => {
                el.innerHTML = el.innerHTML.replace(regex, '<span class="highlight">$1</span>');
            });
        } else {
            const classesToSearch = [
                '_cDEzb_p13n-sc-css-line-clamp-3_g3dy1',
                'a-size-base-plus.a-color-base.a-text-normal',
                'a-size-medium.a-color-base.a-text-normal'
            ];

            classesToSearch.forEach(className => {
                document.querySelectorAll(`.${className}`).forEach(span => {
                    span.innerHTML = span.innerHTML.replace(regex, '<span class="highlight">$1</span>');
                });
            });
        }
    }

    function displayResults(results) {
        const resultDiv = document.createElement('div');
        resultDiv.style.position = 'fixed';
        resultDiv.style.top = '10px';
        resultDiv.style.right = '10px';
        resultDiv.style.backgroundColor = 'white';
        resultDiv.style.border = '1px solid black';
        resultDiv.style.padding = '10px';
        resultDiv.style.zIndex = '10000';
        resultDiv.style.maxHeight = '90vh';
        resultDiv.style.overflowY = 'auto';
        resultDiv.innerHTML = '<h2>Word Frequency Analysis</h2>';

        results.forEach(([label, data]) => {
            const title = document.createElement('h3');
            title.textContent = label;
            resultDiv.appendChild(title);
            const list = document.createElement('ul');
            data.forEach(([phrase, count]) => {
                const listItem = document.createElement('li');
                listItem.textContent = `${phrase}: ${count}`;
                listItem.addEventListener('click', () => highlightText(phrase));  // 绑定点击事件
                list.appendChild(listItem);
            });
            resultDiv.appendChild(list);
        });

        document.body.appendChild(resultDiv);
    }

    function analyzeText() {
        const text = extractText();
        if (!text) {
            alert('No text found in the specified spans.');
            return;
        }

        const wordsForSingle = getWords(text);
        const wordsForPhrases = getWords(text, false);

        const results = [
            ['Top 10 Single Words', countFrequencies(wordsForSingle, 1)],
            ['Top 10 Two-Word Phrases', countFrequencies(wordsForPhrases, 2)],
            ['Top 10 Three-Word Phrases', countFrequencies(wordsForPhrases, 3)],
            ['Top 10 Four-Word Phrases', countFrequencies(wordsForPhrases, 4)]
        ];

        displayResults(results);
    }

    const highlightStyle = document.createElement('style');
    highlightStyle.innerHTML = `
        .highlight {
            background-color: yellow;
            font-weight: bold;
        }
    `;
    document.head.appendChild(highlightStyle);

    const analyzeButton = document.createElement('button');
    analyzeButton.textContent = 'Analyze Text Frequency';
    analyzeButton.style.position = 'fixed';
    analyzeButton.style.bottom = '10px';
    analyzeButton.style.right = '10px';
    analyzeButton.style.zIndex = '10000';
    analyzeButton.style.padding = '10px 20px';
    analyzeButton.style.backgroundColor = '#007bff';
    analyzeButton.style.color = 'white';
    analyzeButton.style.border = 'none';
    analyzeButton.style.borderRadius = '5px';
    analyzeButton.style.cursor = 'pointer';

    analyzeButton.addEventListener('click', analyzeText);

    document.body.appendChild(analyzeButton);
})();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址