// ==UserScript==
// @name 信息抓取
// @namespace http://tampermonkey.net/
// @version 0.0.2
// @description 文章、笔记信息抓取
// @author dabennn
// @match https://*.xiaohongshu.com/explore/*
// @match https://*.douyin.com/video/*
// @match https://baijiahao.baidu.com/*
// @match https://mp.weixin.qq.com/*
// @match https://*.sohu.com/a/*
// @match https://*.toutiao.com/article/*
// @match https://*.toutiao.com/w/*
// @icon https://www.google.com/s2/favicons?sz=64&domain=github.com
// @license MIT
// @grant GM_notification
// @grant unsafeWindow
// ==/UserScript==
(function() {
'use strict';
function formatTimestamp(timestamp) {
const date = new Date(timestamp);
const year = date.getFullYear();
const month = String(date.getMonth() + 1).padStart(2, '0');
const day = String(date.getDate()).padStart(2, '0');
const hours = String(date.getHours()).padStart(2, '0');
const minutes = String(date.getMinutes()).padStart(2, '0');
const seconds = String(date.getSeconds()).padStart(2, '0');
return `${year}-${month}-${day} ${hours}:${minutes}:${seconds}`;
}
function getElementText(domSelector) {
const el = document.querySelector(domSelector);
return el ? el.textContent.trim() : '';
}
function getElementTextByXPath(xpath) {
const el = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
return el ? el.textContent.trim() : '';
}
function createInfoModal(infoText, copyText) {
// 创建一个用于显示信息的div元素作为浮窗
const infoDiv = document.createElement('div');
infoDiv.style.position = 'fixed';
infoDiv.style.top = '0';
infoDiv.style.right = '0';
infoDiv.style.backgroundColor = 'rgba(255, 255, 255, 0.8)';
infoDiv.style.padding = '10px';
infoDiv.style.border = '1px solid gray';
infoDiv.style.zIndex = '9999';
// 创建关闭按钮元素
const closeButton = document.createElement('span');
closeButton.id = '__close_btn__';
closeButton.textContent = '×';
closeButton.style.cursor = 'pointer';
closeButton.style.float = 'right';
closeButton.style.fontSize = '18px';
closeButton.style.color = 'red';
// 创建复制按钮元素
const copyButton = document.createElement('span');
copyButton.id = '__copy__';
copyButton.textContent = '复制';
copyButton.style.cursor = 'pointer';
copyButton.style.float = 'right';
copyButton.style.marginRight = '5px';
copyButton.style.fontSize = '18px';
copyButton.style.color = 'blue';
// 先将关闭按钮添加到浮窗中
infoDiv.appendChild(closeButton);
// 再将复制按钮添加到浮窗中
infoDiv.appendChild(copyButton);
// 将信息拼接成字符串并设置到浮窗的innerHTML中
infoDiv.innerHTML += infoText;
// 将浮窗添加到页面的body元素中
document.body.appendChild(infoDiv);
document.querySelector('#__close_btn__').addEventListener('click', function() {
infoDiv.parentNode.removeChild(infoDiv);
});
document.querySelector('#__copy__').onclick = function() {
navigator.clipboard.writeText(copyText).then(function() {
GM_notification('已复制到剪贴板');
}).catch(function(err) {
GM_notification('复制失败');
console.error('复制失败:', err);
});
};
}
function createFloatButton() {
const button = document.createElement('div')
button.textContent = '信息抓取'
button.id = '__float_btn__'
button.style.position = 'fixed'
button.style.top = '150px'
button.style.right = '0'
button.style.zIndex = '9999'
button.style.backgroundColor = 'rgba(255, 100, 100, 0.9)'
button.style.color = '#fff'
button.style.borderRadius = '4px'
button.style.padding = '10px'
button.style.cursor = 'pointer'
// 关闭按钮
const closeButton = document.createElement('span')
closeButton.textContent = '×';
closeButton.style.position = 'absolute'
closeButton.style.top = '-3px'
closeButton.style.right = '2px'
closeButton.style.fontSize = '16px'
closeButton.style.lineHeight = '1'
button.appendChild(closeButton)
closeButton.addEventListener('click', (e) => {
e.stopPropagation()
button.parentNode.removeChild(button)
})
document.body.appendChild(button)
}
const formatTextInfo = (texts) => texts.join('<br>').replace(/undefined/g, '未获取到')
const formatNumInfo = (texts) => texts.join(' ').replace(/undefined/g, '0')
const getCopyTime = (time) => time.replace(/[年月]/g, '/').replace(/日/, '')
const getInfoTexts = () => {
let textInfo = ''
let copyInfo = ''
if (window.location.href.includes('xiaohongshu.com/explore')) {
const state = unsafeWindow.__INITIAL_STATE__
const title = getElementText('.note-content .title')
const author = getElementText('.username')
let time = ''
try {
time = formatTimestamp(state.note.noteDetailMap[state.note.firstNoteId.value].note.lastUpdateTime)
} catch (e) {
console.error(e)
}
const wordCount = getElementText('.note-content').length
const likeNum = getElementText('.interact-container .like-wrapper .count')
const commentNum = getElementText('.interact-container .chat-wrapper .count')
const collectNum = getElementText('.interact-container .collect-wrapper .count')
textInfo = formatTextInfo([
`标题:${title}`,
`作者:${author}`,
`发布时间:${time}`,
`字数:${wordCount}`,
`点赞数:${likeNum}`,
`评论数:${commentNum}`,
`收藏数:${collectNum}`,
])
copyInfo = [
getCopyTime(time),
'小红书',
author,
title,
window.location.href,
formatNumInfo([`点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`]),
wordCount
].join('\t')
} else if (window.location.href.includes('douyin.com/video')) {
const title= getElementText('h1 span span + span span span span span span')
const author= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[4]/div/div[1]/div[2]/a/div/span/span/span/span/span/span')
const time= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[2]/span/text()[2]')
const duration= getElementText('.time-duration')
const likeNum= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[1]/span')
const commentNum= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[2]/span')
const collectNum= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[3]/span')
const shareNum= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[4]/span')
textInfo = formatTextInfo([
`标题:${title}`,
`作者:${author}`,
`发布时间:${time}`,
`时长:${duration}`,
`点赞数:${likeNum}`,
`评论数:${commentNum}`,
`收藏数:${collectNum}`,
`分享数:${shareNum}`,
])
copyInfo = [
getCopyTime(time),
'抖音视频',
author,
title,
window.location.href,
formatNumInfo([`点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`, `分享${shareNum}`]),
duration
].join('\t')
} else if (window.location.href.includes('sohu.com/a')) {
const title = getElementText('h1')
const author = getElementText('#user-info h4 a')
const time = getElementText('#news-time')
const wordCount = getElementText('mp-editor').length
const readNum = getElementText('.read-num em')
const likeNum = getElementText('.like-c .count')
const commentNum = getElementText('.comment-count')
const collectNum = getElementText('.collection-c .count')
const shareNum = getElementText('.share-c .count')
textInfo = formatTextInfo([
`标题:${title}`,
`作者:${author}`,
`发布时间:${time}`,
`字数:${wordCount}`,
`阅读数:${readNum}`,
`点赞数:${likeNum}`,
`评论数:${commentNum}`,
`收藏数:${collectNum}`,
`分享数:${shareNum}`,
])
copyInfo = [
getCopyTime(time),
'搜狐',
author,
title,
window.location.href,
formatNumInfo([`阅读${readNum}`, `点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`, `分享${shareNum}`]),
wordCount
].join('\t')
} else if (window.location.href.includes('toutiao.com/article')) {
const title= getElementText('h1')
const author= getElementText('.article-meta .name')
const time= getElementText('.article-meta span')
const wordCount= getElementText('.tt-article-content').length
const likeNum= getElementText('.detail-like span')
const commentNum= getElementText('.detail-interaction-comment span')
const collectNum= getElementText('.detail-interaction-collect span')
textInfo = formatTextInfo([
`标题:${title}`,
`作者:${author}`,
`发布时间:${time}`,
`字数:${wordCount}`,
`点赞数:${likeNum}`,
`评论数:${commentNum}`,
`收藏数:${collectNum}`,
])
copyInfo = [
getCopyTime(time),
'今日头条',
author,
title,
window.location.href,
formatNumInfo([`点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`]),
wordCount
].join('\t')
} else if (window.location.href.includes('toutiao.com/w')) {
const title= getElementText('h1')
const author= getElementText('.desc .name')
const time= getElementText('.abstract .time')
const wordCount= getElementText('article').length
const likeNum= getElementText('.detail-like span')
const commentNum= getElementText('.detail-interaction-comment span')
const collectNum= getElementText('.detail-interaction-collect span')
textInfo = formatTextInfo([
`标题:${title}`,
`作者:${author}`,
`发布时间:${time}`,
`字数:${wordCount}`,
`点赞数:${likeNum}`,
`评论数:${commentNum}`,
`收藏数:${collectNum}`,
])
copyInfo = [
getCopyTime(time),
'今日头条',
author,
title,
window.location.href,
formatNumInfo([`点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`]),
wordCount
].join('\t')
} else if (window.location.href.includes('baijiahao.baidu.com')) {
const title = getElementText('#header div')
const author = getElementText('#header [data-testid=author-name]')
const time = getElementText('#header [data-testid=updatetime]')
const wordCount = getElementText('[data-testid=article]').length
const likeNum = getElementText('[data-testid=like-btn] .interact-desc')
const commentNum = getElementText('[data-testid=comment-btn] .interact-desc')
const collectNum = getElementText('[data-testid=favor-btn] .interact-desc')
const shareNum = getElementText('[data-testid=share-btn] .interact-desc')
textInfo = formatTextInfo([
`标题:${title}`,
`作者:${author}`,
`发布时间:${time}`,
`字数:${wordCount}`,
`点赞数:${likeNum}`,
`评论数:${commentNum}`,
`收藏数:${collectNum}`,
`分享数:${shareNum}`,
])
copyInfo = [
getCopyTime(time),
'百度',
author,
title,
window.location.href,
formatNumInfo([`点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`, `分享${shareNum}`]),
wordCount
].join('\t')
} else if (window.location.href.includes('mp.weixin.qq.com')) {
const title = getElementText('h1')
const author = getElementText('#js_name')
const time = getElementText('#publish_time')
const wordCount = getElementText('#js_content').length
textInfo = formatTextInfo([
`标题:${title}`,
`作者:${author}`,
`发布时间:${time}`,
`字数:${wordCount}`,
])
copyInfo = [
getCopyTime(time),
'公众号',
author,
title,
window.location.href,
formatNumInfo([`点赞`, `评论`, `收藏`, `分享`]),
wordCount
].join('\t')
}
return {
textInfo,
copyInfo
}
}
window.addEventListener('load', function() {
createFloatButton()
document.querySelector('#__float_btn__').addEventListener('click', () => {
const { textInfo, copyInfo } = getInfoTexts()
createInfoModal(textInfo, copyInfo);
})
});
})();