Any Hackernews Link Utils

Utility functions for Any Hackernews Link

当前为 2025-01-24 提交的版本,查看 最新版本

此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.gf.qytechs.cn/scripts/524693/1525919/Any%20Hackernews%20Link%20Utils.js

您需要先安装一个扩展,例如 篡改猴Greasemonkey暴力猴,之后才能安装此脚本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安装一个扩展,例如 篡改猴暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴Userscripts ,之后才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。

您需要先安装用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         Any Hackernews Link Utils
// @namespace    http://tampermonkey.net/
// @version      0.1.0
// @description  Utility functions for Any Hackernews Link
// @author       RoCry
// @grant        GM_xmlhttpRequest
// @connect      hn.algolia.com
// @license      MIT
// ==/UserScript==

/**
 * Configuration
 */
const CONFIG = {
    // Additional domains to ignore that couldn't be handled by @exclude
    IGNORED_DOMAINS: [
        'gmail.com',
        'accounts.google.com',
        'accounts.youtube.com',
        'signin.',
        'login.',
        'auth.',
        'oauth.',
    ],

    // Patterns that indicate a search page
    SEARCH_PATTERNS: [
        '/search',
        '/webhp',
        '/results',
        '?q=',
        '?query=',
        '?search=',
        '?s='
    ],

    // URL parameters to remove during normalization
    TRACKING_PARAMS: [
        'utm_source',
        'utm_medium',
        'utm_campaign',
        'utm_term',
        'utm_content',
        'fbclid',
        'gclid',
        '_ga',
        'ref',
        'source'
    ],

    // Minimum ratio of ASCII characters to consider content as English
    MIN_ASCII_RATIO: 0.9,
    
    // Number of characters to check for language detection
    CHARS_TO_CHECK: 300
};

/**
 * URL Utilities
 */
const URLUtils = {
    /**
     * Check if a URL should be ignored based on domain or search patterns
     * @param {string} url - URL to check
     * @returns {boolean} - True if URL should be ignored
     */
    shouldIgnoreUrl(url) {
        try {
            const urlObj = new URL(url);
            
            // Check remaining ignored domains
            if (CONFIG.IGNORED_DOMAINS.some(domain => urlObj.hostname.includes(domain))) {
                return true;
            }

            // Check if it's a search page
            if (CONFIG.SEARCH_PATTERNS.some(pattern => 
                urlObj.pathname.includes(pattern) || urlObj.search.includes(pattern))) {
                return true;
            }

            return false;
        } catch (e) {
            console.error('Error checking URL:', e);
            return false;
        }
    },

    /**
     * Normalize URL by removing tracking parameters and standardizing format
     * @param {string} url - URL to normalize
     * @returns {string} - Normalized URL
     */
    normalizeUrl(url) {
        try {
            const urlObj = new URL(url);
            
            // Remove tracking parameters
            CONFIG.TRACKING_PARAMS.forEach(param => urlObj.searchParams.delete(param));
            
            // Remove sepecial parameter for all hosts
            // https://github.com/HackerNews/API?tab=readme-ov-file -> https://github.com/HackerNews/API
            urlObj.searchParams.delete('tab');

            // Handle GitHub repository paths
            if (urlObj.hostname === 'github.com') {
                // Split path into segments
                const pathSegments = urlObj.pathname.split('/').filter(Boolean);
                
                // Only process if we have at least username/repo
                if (pathSegments.length >= 2) {
                    const [username, repo, ...rest] = pathSegments;
                    
                    // If path contains tree/master, blob/master, or similar, remove them
                    if (rest.length > 0 && (rest[0] === 'tree' || rest[0] === 'blob')) {
                        urlObj.pathname = `/${username}/${repo}`;
                    }
                }
            }
            // for arxiv
            // https://arxiv.org/pdf/1706.03762 -> https://arxiv.org/abs/1706.03762
            if (urlObj.hostname === 'arxiv.org') {
                urlObj.pathname = urlObj.pathname.replace('/pdf/', '/abs/');
            }

            // Remove hash
            urlObj.hash = '';
            
            // Remove trailing slash for consistency
            let normalizedUrl = urlObj.toString();
            if (normalizedUrl.endsWith('/')) {
                normalizedUrl = normalizedUrl.slice(0, -1);
            }
            
            return normalizedUrl;
        } catch (e) {
            console.error('Error normalizing URL:', e);
            return url;
        }
    },

    /**
     * Compare two URLs for equality after normalization
     * @param {string} url1 - First URL
     * @param {string} url2 - Second URL
     * @returns {boolean} - True if URLs match
     */
    urlsMatch(url1, url2) {
        try {
            const u1 = new URL(this.normalizeUrl(url1));
            const u2 = new URL(this.normalizeUrl(url2));
            
            return u1.hostname.toLowerCase() === u2.hostname.toLowerCase() &&
                   u1.pathname.toLowerCase() === u2.pathname.toLowerCase() &&
                   u1.search === u2.search;
        } catch (e) {
            console.error('Error comparing URLs:', e);
            return false;
        }
    }
};

/**
 * Content Utilities
 */
const ContentUtils = {
    /**
     * Check if text is primarily English by checking ASCII ratio
     * @param {string} text - Text to analyze
     * @returns {boolean} - True if content is likely English
     */
    isEnglishContent() {
        try {
            // Get text from title and first paragraph or relevant content
            const title = document.title || '';
            const firstParagraphs = Array.from(document.getElementsByTagName('p'))
                .slice(0, 3)
                .map(p => p.textContent)
                .join(' ');
            
            const textToAnalyze = (title + ' ' + firstParagraphs)
                .slice(0, CONFIG.CHARS_TO_CHECK)
                .replace(/\s+/g, ' ')
                .trim();

            if (!textToAnalyze) return true; // If no text found, assume English

            // Count ASCII characters (excluding spaces and common punctuation)
            const asciiChars = textToAnalyze.replace(/[\s\.,\-_'"!?()]/g, '')
                .split('')
                .filter(char => char.charCodeAt(0) <= 127).length;
            
            const totalChars = textToAnalyze.replace(/[\s\.,\-_'"!?()]/g, '').length;
            
            if (totalChars === 0) return true;
            
            const asciiRatio = asciiChars / totalChars;
            console.log('🈂️ ASCII Ratio:', asciiRatio.toFixed(2));
            
            return asciiRatio >= CONFIG.MIN_ASCII_RATIO;
        } catch (e) {
            console.error('Error checking content language:', e);
            return true; // Default to allowing English in case of error
        }
    }
};

/**
 * HackerNews API Handler
 */
const HNApi = {
    /**
     * Search for a URL on HackerNews
     * @param {string} normalizedUrl - URL to search for
     * @param {Function} updateUI - Callback function to update UI with results
     */
    checkHackerNews(normalizedUrl, updateUI) {
        const apiUrl = `https://hn.algolia.com/api/v1/search?query=${encodeURIComponent(normalizedUrl)}&restrictSearchableAttributes=url`;
        
        GM_xmlhttpRequest({
            method: 'GET',
            url: apiUrl,
            onload: (response) => this.handleApiResponse(response, normalizedUrl, updateUI),
            onerror: (error) => {
                console.error('Error fetching from HN API:', error);
                updateUI(null);
            }
        });
    },

    /**
     * Handle the API response
     * @param {Object} response - API response
     * @param {string} normalizedUrl - Original normalized URL
     * @param {Function} updateUI - Callback function to update UI with results
     */
    handleApiResponse(response, normalizedUrl, updateUI) {
        try {
            const data = JSON.parse(response.responseText);
            const matchingHits = data.hits.filter(hit => URLUtils.urlsMatch(hit.url, normalizedUrl));
            
            if (matchingHits.length === 0) {
                console.log('🔍 URL not found on Hacker News');
                updateUI(null);
                return;
            }

            const topHit = matchingHits.sort((a, b) => (b.points || 0) - (a.points || 0))[0];
            const result = {
                title: topHit.title,
                points: topHit.points || 0,
                comments: topHit.num_comments || 0,
                link: `https://news.ycombinator.com/item?id=${topHit.objectID}`,
                posted: new Date(topHit.created_at).toLocaleDateString()
            };

            console.log('📰 Found on Hacker News:', result);
            updateUI(result);
        } catch (e) {
            console.error('Error parsing HN API response:', e);
            updateUI(null);
        }
    }
};