// ==UserScript==
// @name mxz_crawler
// @namespace www.cber.ltd
// @version 0.1.0
// @description B站评论区原神玩家纯度检测
// @author Tom
// @match https://www.bilibili.com/video/*
// @match https://t.bilibili.com/*
// @match https://space.bilibili.com/*
// @match https://www.bilibili.com/read/*
// @match https://www.bilibili.com/opus/*
// @icon https://static.hdslb.com/images/favicon.ico
// @connect bilibili.com
// @grant GM_xmlhttpRequest
// @license MIT
// @run-at document-end
// ==/UserScript==
// TODO: 如果已经有相同的人在不同的地方留言,应该直接给他加上level: DONE
// TODO: 如何解决验证的问题?调低每个人需要的动态数量? : DONE
// TODO: 取名!发帖!
// TODO: 翻页有bug,在一个评论区下翻页无法显示新的: DONE
// TODO: 加入缓存机制? 存在 localStorage 里? user-id: level, timestamp (设置30天): DONE
// TODO: 仙的tag?
// TODO: 在个人空间主页分析?更加详细的分析?在评论区粗略分析?
// TODO: 结合关注列表进行分析?看过的主播?
var thread_number = 0;
const mxz_tags = ["原神", "原宝", "崩坏", "星铁", "星穹铁道", "米哈游", "芙芙", "提瓦特", "旅行者", "派蒙", "稻妻", "枫丹", "蒙德", "璃月", "尘歌壶"];
// 从一个 object 中提取出所有含有汉字的字符串
function extractStringsWithChineseFromObject(obj) {
let strings = [];
const chineseCharPattern = /[\u4e00-\u9fa5]/;
function recurse(currentObj) {
if (typeof currentObj === 'string' && chineseCharPattern.test(currentObj)) {
strings.push(currentObj);
} else if (typeof currentObj === 'object' && currentObj !== null) {
for (let key in currentObj) {
if (Object.hasOwn(currentObj, key)) {
recurse(currentObj[key]);
}
}
}
}
recurse(obj);
return strings;
}
function getRandomUserAgent() {
let userAgent = [
"Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36",
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) EdgiOS/121.0.2277.107 Version/17.0 Mobile/15E148 Safari/604.1\n",
"Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36 EdgA/121.0.0.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0",
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1 OPX/2.1.0"
];
let randomIndex = Math.floor(Math.random() * userAgent.length);
return userAgent[randomIndex];
}
// 计算关键词出现次数
function getKeywordCount(items, keywords) {
const checkIfKeywordsInTextList = (text_list, keywords) => {
for (let text of text_list) {
// 忽略 null 和 undefined 的值
if (typeof text === 'string') {
for (let keyword of keywords) {
if (text.includes(keyword)) {
return 1;
}
}
}
}
return 0; // 如果没有找到匹配的关键字,则返回0
}
let count = 0;
// 将 content 转换为字符串
for (let item of items) { // 每个 item 是一个动态
let text_list = extractStringsWithChineseFromObject(item);
count += checkIfKeywordsInTextList(text_list, keywords);
}
return count;
}
// 随机延迟函数
function randomDelay(minDelay, maxDelay) {
return new Promise(resolve => setTimeout(resolve, Math.floor(Math.random() * (maxDelay - minDelay + 1)) + minDelay));
}
function get_uid(html) {
let userId = html.dataset['userId'];
if (userId) {
return userId;
}
// 如果上一步失败,尝试通过 children[0].href 获取 userId
try {
userId = html.children[0]['href'].replace(/[^\d]/g, "");
if (userId) {
return userId;
}
} catch (error) {
console.error("Failed to get userId from children[0].href", error);
}
// 如果两种方法都失败,返回 null 或 undefined
return null;
}
function get_comment_list() {
const is_new = document.getElementsByClassName('fixed-header').length !== 0;
if (is_new) {
let lst = new Set();
let allComments = [];
// 获取所有 user-name 和 sub-user-name 元素,并按它们在 DOM 中的顺序排列
const userNames = Array.from(document.getElementsByClassName('user-name'));
const subUserNames = Array.from(document.getElementsByClassName('sub-user-name'));
// 将 user-name 和 sub-user-name 都放入 allComments 中,并排序
allComments.push(...userNames, ...subUserNames);
allComments.sort((a, b) => a.compareDocumentPosition(b) & Node.DOCUMENT_POSITION_FOLLOWING ? -1 : 1);
// 将排序后的元素添加到集合中
allComments.forEach(c => lst.add(c));
return lst;
} else {
return document.getElementsByClassName('user')
}
}
async function readCommentListHtml(current_thread_number) {
console.log(`starting crawling...`);
const is_new = document.getElementsByClassName('fixed-header').length !== 0;
let comment_list = get_comment_list();
console.log("comment_list.size = " + comment_list.size);
if (comment_list.size !== 0) {
for (let html of comment_list) {
if (current_thread_number !== thread_number) break;
await updateUserHtml(html, current_thread_number, true);
}
for (let html of comment_list) {
if (current_thread_number !== thread_number) break;
await updateUserHtml(html, current_thread_number, false);
}
}
}
// 分析 mxz 纯度
function analyze(count, total_count) {
const tags = [
"LV.0",
"LV.1",
"LV.2",
"LV.3",
"LV.4",
"LV.5",
"LV.6",
"LV.7",
"LV.8",
"LV.9",
"LV.10",
"LV.11",
"LV.12",
"LV.13",
"LV.14",
"LV.15",
"LV.16",
"LV.17",
"LV.18",
]
if (count <= 10) return [count, tags[count]];
let level = 11;
if (count >= 12) level = 11;
if (count >= 15) level = 12;
if (count >= 20) level = 13;
if (count >= 25) level = 14;
if (count >= 35) level = 15;
if (count >= 50) level = 16;
if (count >= 75) level = 17;
if (count >= 100) level = 18;
return [level, tags[level]];
}
function getColorFromLevel(level) {
if (level <= 4) return "rgb(84,93,101)";
if (level <= 8) return "rgb(94,228,65)";
if (level <= 12) return "rgb(28,71,209)";
if (level <= 15) return "rgb(156,7,234)";
return "rgb(243,137,6)";
}
function updateHtmlWithCount(html, count, total_count) {
let [level, tag] = analyze(count, total_count);
const applyStyles = (element, tag, level) => {
element.innerHTML = tag;
element.style.color = getColorFromLevel(level);
element.style.fontWeight = '900'; // 设置字体加粗
element.style.fontSize = '120%'; // 设置字体字号为原先的120%
};
let existingB = html.querySelector('b.analyze-result');
if (existingB) {
applyStyles(existingB, tag, level);
} else {
let newB = document.createElement('b');
newB.className = 'analyze-result';
applyStyles(newB, tag, level);
html.appendChild(newB);
}
}
// 如果 load_local_only, 则会跳过所有需要 http request 的
async function updateUserHtml(html, current_thread_number, load_local_only) {
let existingB = html.querySelector('b.analyze-result');
if (existingB && existingB.classList.contains('analyze-done')) return; // 如果这个元素已经被分析过了
let uid = get_uid(html);
const headers = {
'authority': 'api.bilibili.com',
'method': 'GET',
'path': `/x/polymer/web-dynamic/v1/feed/space?offset=&host_mid=${uid}&timezone_offset=420&platform=web&features=itemOpusStyle,listOnlyfans,opusBigCover,onlyfansVote&web_location=333.999`,
'scheme': 'https',
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'en-US,en;q=0.9',
'Origin': 'https://space.bilibili.com',
'Priority': 'u=1, i',
'Referer': `https://space.bilibili.com/${uid}/dynamic`,
'Sec-Ch-Ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': '"Windows"',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': getRandomUserAgent()
};
const localStorageKey = 'uidData';
let uidDataMap = JSON.parse(localStorage.getItem(localStorageKey)) || {};
let currentData = uidDataMap[uid];
let now = Date.now();
let DAY30 = 30 * 24 * 60 * 60 * 1000;
if (currentData && now - currentData.updated_timestamp < DAY30) {
// 使用 LocalStorage 中的数据
// console.log(`Using LocalStorage data for uid ${uid}`);
let count = currentData["count"];
let total_count = currentData["total_count"]
updateHtmlWithCount(html, count, total_count);
} else if (!load_local_only) {
let count = 0, offset = "", has_more = true, total_count = 0;
var minDelay = 200, maxDelay = 500;
while (has_more && total_count <= 222 && count <= 100 && current_thread_number === thread_number) {
const url = `https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/space?offset=${offset}&host_mid=${uid}&timezone_offset=420&platform=web&features=itemOpusStyle,listOnlyfans,opusBigCover,onlyfansVote&web_location=333.999`;
await randomDelay(minDelay, maxDelay);
await new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: 'GET',
url: url,
headers: headers,
onload: function (res) {
if (res.status === 200) {
let data = JSON.parse(res.response);
if (data.code === 0) {
minDelay = 700;
maxDelay = 1800;
count += getKeywordCount(data.data.items, mxz_tags);
total_count += data.data.items.length;
has_more = data.data.has_more;
offset = data.data.offset;
console.log(`uid = ${uid}, count = ${count}, total_count = ${total_count}, thread_number = ${current_thread_number}`);
updateHtmlWithCount(html, count, total_count);
} else {
console.log(`Request success with status 200, but code is ${data.code}, minDelay = ${minDelay}, maxDelay = ${maxDelay}`);
minDelay *= 3;
maxDelay *= 3;
}
} else {
console.log(`Request failed: ${res.status} ${res.statusText}`);
}
resolve(1);
},
onerror: function (error) {
console.error(error);
reject(error);
}
});
});
}
uidDataMap[uid] = { "updated_timestamp": now, "count": count, "total_count": total_count };
localStorage.setItem(localStorageKey, JSON.stringify(uidDataMap));
}
existingB = html.querySelector('b.analyze-result');
if (existingB) existingB.classList.add('analyze-done'); // 这个元素已经被分析过了
}
async function computeHash(str) {
const buffer = new TextEncoder().encode(str);
const hashBuffer = await crypto.subtle.digest('SHA-256', buffer);
const hashArray = Array.from(new Uint8Array(hashBuffer));
const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
return hashHex;
}
// main function
(function () {
let lastCommentListSize = 0;
let counter = 0;
let lastCommentListHash = "";
setInterval(async () => {
const commentList = get_comment_list();
const currentSize = commentList.size;
counter++;
const extractUserIds = (commentList) => {
return Array.from(commentList).map(div => get_uid(div));
}
const userIds = extractUserIds(commentList);
const commentListString = JSON.stringify(userIds);
const commentListHash = await computeHash(commentListString);
// console.log(`hash = ${commentListHash}, lasthash = ${lastCommentListHash}`);
if (currentSize !== lastCommentListSize || lastCommentListHash !== commentListHash) {
lastCommentListSize = currentSize;
thread_number++;
readCommentListHtml(thread_number);
}
lastCommentListHash = commentListHash;
}, 4000);
})();