mxz_crawler

B站评论区原神玩家纯度检测

目前为 2024-05-27 提交的版本。查看 最新版本

// ==UserScript==
// @name         mxz_crawler
// @namespace    www.cber.ltd
// @version      0.1.0
// @description  B站评论区原神玩家纯度检测
// @author       Tom
// @match        https://www.bilibili.com/video/*
// @match        https://t.bilibili.com/*
// @match        https://space.bilibili.com/*
// @match        https://www.bilibili.com/read/*
// @match        https://www.bilibili.com/opus/*
// @icon         https://static.hdslb.com/images/favicon.ico
// @connect      bilibili.com
// @grant        GM_xmlhttpRequest
// @license MIT
// @run-at document-end
// ==/UserScript==

// TODO: 如果已经有相同的人在不同的地方留言,应该直接给他加上level: DONE
// TODO: 如何解决验证的问题?调低每个人需要的动态数量? : DONE
// TODO: 取名!发帖!
// TODO: 翻页有bug,在一个评论区下翻页无法显示新的: DONE
// TODO: 加入缓存机制? 存在 localStorage 里? user-id: level, timestamp (设置30天): DONE
// TODO: 仙的tag?
// TODO: 在个人空间主页分析?更加详细的分析?在评论区粗略分析?
// TODO: 结合关注列表进行分析?看过的主播?

var thread_number = 0;
const mxz_tags = ["原神", "原宝", "崩坏", "星铁", "星穹铁道", "米哈游", "芙芙", "提瓦特", "旅行者", "派蒙", "稻妻", "枫丹", "蒙德", "璃月", "尘歌壶"];

// 从一个 object 中提取出所有含有汉字的字符串
function extractStringsWithChineseFromObject(obj) {
    let strings = [];
    const chineseCharPattern = /[\u4e00-\u9fa5]/;

    function recurse(currentObj) {
        if (typeof currentObj === 'string' && chineseCharPattern.test(currentObj)) {
            strings.push(currentObj);
        } else if (typeof currentObj === 'object' && currentObj !== null) {
            for (let key in currentObj) {
                if (Object.hasOwn(currentObj, key)) {
                    recurse(currentObj[key]);
                }
            }
        }
    }

    recurse(obj);
    return strings;
}


function getRandomUserAgent() {
    let userAgent = [
        "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36",
        "Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) EdgiOS/121.0.2277.107 Version/17.0 Mobile/15E148 Safari/604.1\n",
        "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36 EdgA/121.0.0.0",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0",
        "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1 OPX/2.1.0"
    ];
    let randomIndex = Math.floor(Math.random() * userAgent.length);
    return userAgent[randomIndex];
}

// 计算关键词出现次数
function getKeywordCount(items, keywords) {
    const checkIfKeywordsInTextList = (text_list, keywords) => {
        for (let text of text_list) {
            // 忽略 null 和 undefined 的值
            if (typeof text === 'string') {
                for (let keyword of keywords) {
                    if (text.includes(keyword)) {
                        return 1;
                    }
                }
            }
        }
        return 0; // 如果没有找到匹配的关键字,则返回0
    }

    let count = 0;
    // 将 content 转换为字符串
    for (let item of items) {  // 每个 item 是一个动态
        let text_list = extractStringsWithChineseFromObject(item);
        count += checkIfKeywordsInTextList(text_list, keywords);
    }
    return count;
}

// 随机延迟函数
function randomDelay(minDelay, maxDelay) {
    return new Promise(resolve => setTimeout(resolve, Math.floor(Math.random() * (maxDelay - minDelay + 1)) + minDelay));
}

function get_uid(html) {
    let userId = html.dataset['userId'];
    if (userId) {
        return userId;
    }

    // 如果上一步失败,尝试通过 children[0].href 获取 userId
    try {
        userId = html.children[0]['href'].replace(/[^\d]/g, "");
        if (userId) {
            return userId;
        }
    } catch (error) {
        console.error("Failed to get userId from children[0].href", error);
    }

    // 如果两种方法都失败,返回 null 或 undefined
    return null;
}

function get_comment_list() {
    const is_new = document.getElementsByClassName('fixed-header').length !== 0;
    if (is_new) {
        let lst = new Set();
        let allComments = [];

        // 获取所有 user-name 和 sub-user-name 元素,并按它们在 DOM 中的顺序排列
        const userNames = Array.from(document.getElementsByClassName('user-name'));
        const subUserNames = Array.from(document.getElementsByClassName('sub-user-name'));

        // 将 user-name 和 sub-user-name 都放入 allComments 中,并排序
        allComments.push(...userNames, ...subUserNames);
        allComments.sort((a, b) => a.compareDocumentPosition(b) & Node.DOCUMENT_POSITION_FOLLOWING ? -1 : 1);

        // 将排序后的元素添加到集合中
        allComments.forEach(c => lst.add(c));

        return lst;
    } else {
        return document.getElementsByClassName('user')
    }
}

async function readCommentListHtml(current_thread_number) {
    console.log(`starting crawling...`);
    const is_new = document.getElementsByClassName('fixed-header').length !== 0;

    let comment_list = get_comment_list();
    console.log("comment_list.size = " + comment_list.size);
    if (comment_list.size !== 0) {
        for (let html of comment_list) {
            if (current_thread_number !== thread_number) break;
            await updateUserHtml(html, current_thread_number, true);
        }
        for (let html of comment_list) {
            if (current_thread_number !== thread_number) break;
            await updateUserHtml(html, current_thread_number, false);
        }
    }
}

// 分析 mxz 纯度
function analyze(count, total_count) {
    const tags = [
        "LV.0",
        "LV.1",
        "LV.2",
        "LV.3",
        "LV.4",
        "LV.5",
        "LV.6",
        "LV.7",
        "LV.8",
        "LV.9",
        "LV.10",
        "LV.11",
        "LV.12",
        "LV.13",
        "LV.14",
        "LV.15",
        "LV.16",
        "LV.17",
        "LV.18",
    ]

    if (count <= 10) return [count, tags[count]];

    let level = 11;
    if (count >= 12) level = 11;
    if (count >= 15) level = 12;
    if (count >= 20) level = 13;
    if (count >= 25) level = 14;
    if (count >= 35) level = 15;
    if (count >= 50) level = 16;
    if (count >= 75) level = 17;
    if (count >= 100) level = 18;
    return [level, tags[level]];
}

function getColorFromLevel(level) {
    if (level <= 4) return "rgb(84,93,101)";
    if (level <= 8) return "rgb(94,228,65)";
    if (level <= 12) return "rgb(28,71,209)";
    if (level <= 15) return "rgb(156,7,234)";
    return "rgb(243,137,6)";
}


function updateHtmlWithCount(html, count, total_count) {
    let [level, tag] = analyze(count, total_count);
    const applyStyles = (element, tag, level) => {
        element.innerHTML = tag;
        element.style.color = getColorFromLevel(level);
        element.style.fontWeight = '900';  // 设置字体加粗
        element.style.fontSize = '120%';   // 设置字体字号为原先的120%
    };

    let existingB = html.querySelector('b.analyze-result');
    if (existingB) {
        applyStyles(existingB, tag, level);
    } else {
        let newB = document.createElement('b');
        newB.className = 'analyze-result';
        applyStyles(newB, tag, level);
        html.appendChild(newB);
    }
}


// 如果 load_local_only, 则会跳过所有需要 http request 的
async function updateUserHtml(html, current_thread_number, load_local_only) {
    let existingB = html.querySelector('b.analyze-result');
    if (existingB && existingB.classList.contains('analyze-done')) return;  // 如果这个元素已经被分析过了

    let uid = get_uid(html);
    const headers = {
        'authority': 'api.bilibili.com',
        'method': 'GET',
        'path': `/x/polymer/web-dynamic/v1/feed/space?offset=&host_mid=${uid}&timezone_offset=420&platform=web&features=itemOpusStyle,listOnlyfans,opusBigCover,onlyfansVote&web_location=333.999`,
        'scheme': 'https',
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br, zstd',
        'Accept-Language': 'en-US,en;q=0.9',
        'Origin': 'https://space.bilibili.com',
        'Priority': 'u=1, i',
        'Referer': `https://space.bilibili.com/${uid}/dynamic`,
        'Sec-Ch-Ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': getRandomUserAgent()
    };

    const localStorageKey = 'uidData';
    let uidDataMap = JSON.parse(localStorage.getItem(localStorageKey)) || {};
    let currentData = uidDataMap[uid];
    let now = Date.now();
    let DAY30 = 30 * 24 * 60 * 60 * 1000;

    if (currentData && now - currentData.updated_timestamp < DAY30) {
        // 使用 LocalStorage 中的数据
        // console.log(`Using LocalStorage data for uid ${uid}`);
        let count = currentData["count"];
        let total_count = currentData["total_count"]
        updateHtmlWithCount(html, count, total_count);
    } else if (!load_local_only) {
        let count = 0, offset = "", has_more = true, total_count = 0;
        var minDelay = 200, maxDelay = 500;
        while (has_more && total_count <= 222 && count <= 100 && current_thread_number === thread_number) {
            const url = `https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/space?offset=${offset}&host_mid=${uid}&timezone_offset=420&platform=web&features=itemOpusStyle,listOnlyfans,opusBigCover,onlyfansVote&web_location=333.999`;
            await randomDelay(minDelay, maxDelay);
            await new Promise((resolve, reject) => {
                GM_xmlhttpRequest({
                    method: 'GET',
                    url: url,
                    headers: headers,
                    onload: function (res) {
                        if (res.status === 200) {
                            let data = JSON.parse(res.response);
                            if (data.code === 0) {
                                minDelay = 700;
                                maxDelay = 1800;
                                count += getKeywordCount(data.data.items, mxz_tags);
                                total_count += data.data.items.length;
                                has_more = data.data.has_more;
                                offset = data.data.offset;

                                console.log(`uid = ${uid}, count = ${count}, total_count = ${total_count}, thread_number = ${current_thread_number}`);
                                updateHtmlWithCount(html, count, total_count);
                            } else {
                                console.log(`Request success with status 200, but code is ${data.code}, minDelay = ${minDelay}, maxDelay = ${maxDelay}`);
                                minDelay *= 3;
                                maxDelay *= 3;
                            }
                        } else {
                            console.log(`Request failed: ${res.status} ${res.statusText}`);
                        }
                        resolve(1);
                    },
                    onerror: function (error) {
                        console.error(error);
                        reject(error);
                    }
                });
            });
        }
        uidDataMap[uid] = { "updated_timestamp": now, "count": count, "total_count": total_count };
        localStorage.setItem(localStorageKey, JSON.stringify(uidDataMap));
    }
    existingB = html.querySelector('b.analyze-result');
    if (existingB) existingB.classList.add('analyze-done');  // 这个元素已经被分析过了
}

async function computeHash(str) {
    const buffer = new TextEncoder().encode(str);
    const hashBuffer = await crypto.subtle.digest('SHA-256', buffer);
    const hashArray = Array.from(new Uint8Array(hashBuffer));
    const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
    return hashHex;
}

// main function
(function () {
    let lastCommentListSize = 0;
    let counter = 0;
    let lastCommentListHash = "";

    setInterval(async () => {
        const commentList = get_comment_list();
        const currentSize = commentList.size;
        counter++;

        const extractUserIds = (commentList) => {
            return Array.from(commentList).map(div => get_uid(div));
        }

        const userIds = extractUserIds(commentList);
        const commentListString = JSON.stringify(userIds);
        const commentListHash = await computeHash(commentListString);

        // console.log(`hash = ${commentListHash}, lasthash = ${lastCommentListHash}`);

        if (currentSize !== lastCommentListSize || lastCommentListHash !== commentListHash) {
            lastCommentListSize = currentSize;
            thread_number++;
            readCommentListHtml(thread_number);
        }
        lastCommentListHash = commentListHash;
    }, 4000);
})();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址