Any Hackernews Link Utils

Utility functions for Any Hackernews Link
当前为 2025-01-24 提交的版本，查看最新版本。
此脚本不应直接安装。它是供其他脚本使用的外部库，要使用该库请加入元指令 // @require https://update.gf.qytechs.cn/scripts/524693/1525919/Any%20Hackernews%20Link%20Utils.js
提问、发表评价或者举报此脚本。
换行
// ==UserScript==
// @name         Any Hackernews Link Utils
// @namespace    http://tampermonkey.net/
// @version      0.1.0
// @description  Utility functions for Any Hackernews Link
// @author       RoCry
// @grant        GM_xmlhttpRequest
// @connect      hn.algolia.com
// @license      MIT
// ==/UserScript==

/**
 * Configuration
 */
const CONFIG = {
    // Additional domains to ignore that couldn't be handled by @exclude
    IGNORED_DOMAINS: [
        'gmail.com',
        'accounts.google.com',
        'accounts.youtube.com',
        'signin.',
        'login.',
        'auth.',
        'oauth.',
    ],

    // Patterns that indicate a search page
    SEARCH_PATTERNS: [
        '/search',
        '/webhp',
        '/results',
        '?q=',
        '?query=',
        '?search=',
        '?s='
    ],

    // URL parameters to remove during normalization
    TRACKING_PARAMS: [
        'utm_source',
        'utm_medium',
        'utm_campaign',
        'utm_term',
        'utm_content',
        'fbclid',
        'gclid',
        '_ga',
        'ref',
        'source'
    ],

    // Minimum ratio of ASCII characters to consider content as English
    MIN_ASCII_RATIO: 0.9,
    
    // Number of characters to check for language detection
    CHARS_TO_CHECK: 300
};

/**
 * URL Utilities
 */
const URLUtils = {
    /**
     * Check if a URL should be ignored based on domain or search patterns
     * @param {string} url - URL to check
     * @returns {boolean} - True if URL should be ignored
     */
    shouldIgnoreUrl(url) {
        try {
            const urlObj = new URL(url);
            
            // Check remaining ignored domains
            if (CONFIG.IGNORED_DOMAINS.some(domain => urlObj.hostname.includes(domain))) {
                return true;
            }

            // Check if it's a search page
            if (CONFIG.SEARCH_PATTERNS.some(pattern => 
                urlObj.pathname.includes(pattern) || urlObj.search.includes(pattern))) {
                return true;
            }

            return false;
        } catch (e) {
            console.error('Error checking URL:', e);
            return false;
        }
    },

    /**
     * Normalize URL by removing tracking parameters and standardizing format
     * @param {string} url - URL to normalize
     * @returns {string} - Normalized URL
     */
    normalizeUrl(url) {
        try {
            const urlObj = new URL(url);
            
            // Remove tracking parameters
            CONFIG.TRACKING_PARAMS.forEach(param => urlObj.searchParams.delete(param));
            
            // Remove sepecial parameter for all hosts
            // https://github.com/HackerNews/API?tab=readme-ov-file -> https://github.com/HackerNews/API
            urlObj.searchParams.delete('tab');

            // Handle GitHub repository paths
            if (urlObj.hostname === 'github.com') {
                // Split path into segments
                const pathSegments = urlObj.pathname.split('/').filter(Boolean);
                
                // Only process if we have at least username/repo
                if (pathSegments.length >= 2) {
                    const [username, repo, ...rest] = pathSegments;
                    
                    // If path contains tree/master, blob/master, or similar, remove them
                    if (rest.length > 0 && (rest[0] === 'tree' || rest[0] === 'blob')) {
                        urlObj.pathname = `/${username}/${repo}`;
                    }
                }
            }
            // for arxiv
            // https://arxiv.org/pdf/1706.03762 -> https://arxiv.org/abs/1706.03762
            if (urlObj.hostname === 'arxiv.org') {
                urlObj.pathname = urlObj.pathname.replace('/pdf/', '/abs/');
            }

            // Remove hash
            urlObj.hash = '';
            
            // Remove trailing slash for consistency
            let normalizedUrl = urlObj.toString();
            if (normalizedUrl.endsWith('/')) {
                normalizedUrl = normalizedUrl.slice(0, -1);
            }
            
            return normalizedUrl;
        } catch (e) {
            console.error('Error normalizing URL:', e);
            return url;
        }
    },

    /**
     * Compare two URLs for equality after normalization
     * @param {string} url1 - First URL
     * @param {string} url2 - Second URL
     * @returns {boolean} - True if URLs match
     */
    urlsMatch(url1, url2) {
        try {
            const u1 = new URL(this.normalizeUrl(url1));
            const u2 = new URL(this.normalizeUrl(url2));
            
            return u1.hostname.toLowerCase() === u2.hostname.toLowerCase() &&
                   u1.pathname.toLowerCase() === u2.pathname.toLowerCase() &&
                   u1.search === u2.search;
        } catch (e) {
            console.error('Error comparing URLs:', e);
            return false;
        }
    }
};

/**
 * Content Utilities
 */
const ContentUtils = {
    /**
     * Check if text is primarily English by checking ASCII ratio
     * @param {string} text - Text to analyze
     * @returns {boolean} - True if content is likely English
     */
    isEnglishContent() {
        try {
            // Get text from title and first paragraph or relevant content
            const title = document.title || '';
            const firstParagraphs = Array.from(document.getElementsByTagName('p'))
                .slice(0, 3)
                .map(p => p.textContent)
                .join(' ');
            
            const textToAnalyze = (title + ' ' + firstParagraphs)
                .slice(0, CONFIG.CHARS_TO_CHECK)
                .replace(/\s+/g, ' ')
                .trim();

            if (!textToAnalyze) return true; // If no text found, assume English

            // Count ASCII characters (excluding spaces and common punctuation)
            const asciiChars = textToAnalyze.replace(/[\s\.,\-_'"!?()]/g, '')
                .split('')
                .filter(char => char.charCodeAt(0) <= 127).length;
            
            const totalChars = textToAnalyze.replace(/[\s\.,\-_'"!?()]/g, '').length;
            
            if (totalChars === 0) return true;
            
            const asciiRatio = asciiChars / totalChars;
            console.log('🈂️ ASCII Ratio:', asciiRatio.toFixed(2));
            
            return asciiRatio >= CONFIG.MIN_ASCII_RATIO;
        } catch (e) {
            console.error('Error checking content language:', e);
            return true; // Default to allowing English in case of error
        }
    }
};

/**
 * HackerNews API Handler
 */
const HNApi = {
    /**
     * Search for a URL on HackerNews
     * @param {string} normalizedUrl - URL to search for
     * @param {Function} updateUI - Callback function to update UI with results
     */
    checkHackerNews(normalizedUrl, updateUI) {
        const apiUrl = `https://hn.algolia.com/api/v1/search?query=${encodeURIComponent(normalizedUrl)}&restrictSearchableAttributes=url`;
        
        GM_xmlhttpRequest({
            method: 'GET',
            url: apiUrl,
            onload: (response) => this.handleApiResponse(response, normalizedUrl, updateUI),
            onerror: (error) => {
                console.error('Error fetching from HN API:', error);
                updateUI(null);
            }
        });
    },

    /**
     * Handle the API response
     * @param {Object} response - API response
     * @param {string} normalizedUrl - Original normalized URL
     * @param {Function} updateUI - Callback function to update UI with results
     */
    handleApiResponse(response, normalizedUrl, updateUI) {
        try {
            const data = JSON.parse(response.responseText);
            const matchingHits = data.hits.filter(hit => URLUtils.urlsMatch(hit.url, normalizedUrl));
            
            if (matchingHits.length === 0) {
                console.log('🔍 URL not found on Hacker News');
                updateUI(null);
                return;
            }

            const topHit = matchingHits.sort((a, b) => (b.points || 0) - (a.points || 0))[0];
            const result = {
                title: topHit.title,
                points: topHit.points || 0,
                comments: topHit.num_comments || 0,
                link: `https://news.ycombinator.com/item?id=${topHit.objectID}`,
                posted: new Date(topHit.created_at).toLocaleDateString()
            };

            console.log('📰 Found on Hacker News:', result);
            updateUI(result);
        } catch (e) {
            console.error('Error parsing HN API response:', e);
            updateUI(null);
        }
    }
};
欢迎访问Greasy Fork镜像