信息抓取

文章、笔记信息抓取

目前为 2024-12-30 提交的版本。查看 最新版本

// ==UserScript==
// @name         信息抓取
// @namespace    http://tampermonkey.net/
// @version      0.0.2
// @description  文章、笔记信息抓取
// @author       dabennn
// @match        https://*.xiaohongshu.com/explore/*
// @match        https://*.douyin.com/video/*
// @match        https://baijiahao.baidu.com/*
// @match        https://mp.weixin.qq.com/*
// @match        https://*.sohu.com/a/*
// @match        https://*.toutiao.com/article/*
// @match        https://*.toutiao.com/w/*
// @icon         https://www.google.com/s2/favicons?sz=64&domain=github.com
// @license      MIT
// @grant        GM_notification
// @grant        unsafeWindow
// ==/UserScript==

(function() {
  'use strict';
  function formatTimestamp(timestamp) {
    const date = new Date(timestamp);
    const year = date.getFullYear();
    const month = String(date.getMonth() + 1).padStart(2, '0');
    const day = String(date.getDate()).padStart(2, '0');
    const hours = String(date.getHours()).padStart(2, '0');
    const minutes = String(date.getMinutes()).padStart(2, '0');
    const seconds = String(date.getSeconds()).padStart(2, '0');
    return `${year}-${month}-${day} ${hours}:${minutes}:${seconds}`;
  }
  function getElementText(domSelector) {
    const el = document.querySelector(domSelector);
    return el ? el.textContent.trim() : '';
  }
  function getElementTextByXPath(xpath) {
    const el = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
    return el ? el.textContent.trim() : '';
  }
  function createInfoModal(infoText, copyText) {
    // 创建一个用于显示信息的div元素作为浮窗
    const infoDiv = document.createElement('div');
    infoDiv.style.position = 'fixed';
    infoDiv.style.top = '0';
    infoDiv.style.right = '0';
    infoDiv.style.backgroundColor = 'rgba(255, 255, 255, 0.8)';
    infoDiv.style.padding = '10px';
    infoDiv.style.border = '1px solid gray';
    infoDiv.style.zIndex = '9999';

    // 创建关闭按钮元素
    const closeButton = document.createElement('span');
    closeButton.id = '__close_btn__';
    closeButton.textContent = '×';
    closeButton.style.cursor = 'pointer';
    closeButton.style.float = 'right';
    closeButton.style.fontSize = '18px';
    closeButton.style.color = 'red';

    // 创建复制按钮元素
    const copyButton = document.createElement('span');
    copyButton.id = '__copy__';
    copyButton.textContent = '复制';
    copyButton.style.cursor = 'pointer';
    copyButton.style.float = 'right';
    copyButton.style.marginRight = '5px';
    copyButton.style.fontSize = '18px';
    copyButton.style.color = 'blue';

    // 先将关闭按钮添加到浮窗中
    infoDiv.appendChild(closeButton);
    // 再将复制按钮添加到浮窗中
    infoDiv.appendChild(copyButton);

    // 将信息拼接成字符串并设置到浮窗的innerHTML中
    infoDiv.innerHTML += infoText;

    // 将浮窗添加到页面的body元素中
    document.body.appendChild(infoDiv);

    document.querySelector('#__close_btn__').addEventListener('click', function() {
      infoDiv.parentNode.removeChild(infoDiv);
    });

    document.querySelector('#__copy__').onclick = function() {
      navigator.clipboard.writeText(copyText).then(function() {
        GM_notification('已复制到剪贴板');
      }).catch(function(err) {
        GM_notification('复制失败');
        console.error('复制失败:', err);
      });
    };
  }
  function createFloatButton() {
    const button = document.createElement('div')
    button.textContent = '信息抓取'
    button.id = '__float_btn__'
    button.style.position = 'fixed'
    button.style.top = '150px'
    button.style.right = '0'
    button.style.zIndex = '9999'
    button.style.backgroundColor = 'rgba(255, 100, 100, 0.9)'
    button.style.color = '#fff'
    button.style.borderRadius = '4px'
    button.style.padding = '10px'
    button.style.cursor = 'pointer'

    // 关闭按钮
    const closeButton = document.createElement('span')
    closeButton.textContent = '×';
    closeButton.style.position = 'absolute'
    closeButton.style.top = '-3px'
    closeButton.style.right = '2px'
    closeButton.style.fontSize = '16px'
    closeButton.style.lineHeight = '1'

    button.appendChild(closeButton)
    closeButton.addEventListener('click', (e) => {
      e.stopPropagation()
      button.parentNode.removeChild(button)
    })
    document.body.appendChild(button)
  }
  const formatTextInfo = (texts) => texts.join('<br>').replace(/undefined/g, '未获取到')
  const formatNumInfo = (texts) => texts.join(' ').replace(/undefined/g, '0')
  const getCopyTime = (time) => time.replace(/[年月]/g, '/').replace(/日/, '')
  const getInfoTexts = () => {
    let textInfo = ''
    let copyInfo = ''
    if (window.location.href.includes('xiaohongshu.com/explore')) {
      const state = unsafeWindow.__INITIAL_STATE__
      const title = getElementText('.note-content .title')
      const author = getElementText('.username')
      let time = ''
      try {
        time = formatTimestamp(state.note.noteDetailMap[state.note.firstNoteId.value].note.lastUpdateTime)
      } catch (e) {
        console.error(e)
      }
      const wordCount = getElementText('.note-content').length
      const likeNum = getElementText('.interact-container .like-wrapper .count')
      const commentNum = getElementText('.interact-container .chat-wrapper .count')
      const collectNum = getElementText('.interact-container .collect-wrapper .count')
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
      ])
      copyInfo = [
        getCopyTime(time),
        '小红书',
        author,
        title,
        window.location.href,
        formatNumInfo([`点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`]),
        wordCount
      ].join('\t')
    } else if (window.location.href.includes('douyin.com/video')) {
      const title= getElementText('h1 span span + span span span span span span')
      const author= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[4]/div/div[1]/div[2]/a/div/span/span/span/span/span/span')
      const time= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[2]/span/text()[2]')
      const duration= getElementText('.time-duration')
      const likeNum= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[1]/span')
      const commentNum= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[2]/span')
      const collectNum= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[3]/span')
      const shareNum= getElementTextByXPath('//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[4]/span')
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `时长:${duration}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
        `分享数:${shareNum}`,
      ])
      copyInfo = [
        getCopyTime(time),
        '抖音视频',
        author,
        title,
        window.location.href,
        formatNumInfo([`点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`, `分享${shareNum}`]),
        duration
      ].join('\t')
    } else if (window.location.href.includes('sohu.com/a')) {
      const title = getElementText('h1')
      const author = getElementText('#user-info h4 a')
      const time = getElementText('#news-time')
      const wordCount = getElementText('mp-editor').length
      const readNum = getElementText('.read-num em')
      const likeNum = getElementText('.like-c .count')
      const commentNum = getElementText('.comment-count')
      const collectNum = getElementText('.collection-c .count')
      const shareNum = getElementText('.share-c .count')
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
        `阅读数:${readNum}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
        `分享数:${shareNum}`,
      ])
      copyInfo = [
        getCopyTime(time),
        '搜狐',
        author,
        title,
        window.location.href,
        formatNumInfo([`阅读${readNum}`, `点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`, `分享${shareNum}`]),
        wordCount
      ].join('\t')
    } else if (window.location.href.includes('toutiao.com/article')) {
      const title= getElementText('h1')
      const author= getElementText('.article-meta .name')
      const time= getElementText('.article-meta span')
      const wordCount= getElementText('.tt-article-content').length
      const likeNum= getElementText('.detail-like span')
      const commentNum= getElementText('.detail-interaction-comment span')
      const collectNum= getElementText('.detail-interaction-collect span')
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
      ])
      copyInfo = [
        getCopyTime(time),
        '今日头条',
        author,
        title,
        window.location.href,
        formatNumInfo([`点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`]),
        wordCount
      ].join('\t')
    } else if (window.location.href.includes('toutiao.com/w')) {
      const title= getElementText('h1')
      const author= getElementText('.desc .name')
      const time= getElementText('.abstract .time')
      const wordCount= getElementText('article').length
      const likeNum= getElementText('.detail-like span')
      const commentNum= getElementText('.detail-interaction-comment span')
      const collectNum= getElementText('.detail-interaction-collect span')
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
      ])
      copyInfo = [
        getCopyTime(time),
        '今日头条',
        author,
        title,
        window.location.href,
        formatNumInfo([`点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`]),
        wordCount
      ].join('\t')
    } else if (window.location.href.includes('baijiahao.baidu.com')) {
      const title = getElementText('#header div')
      const author = getElementText('#header [data-testid=author-name]')
      const time = getElementText('#header [data-testid=updatetime]')
      const wordCount = getElementText('[data-testid=article]').length
      const likeNum = getElementText('[data-testid=like-btn] .interact-desc')
      const commentNum = getElementText('[data-testid=comment-btn] .interact-desc')
      const collectNum = getElementText('[data-testid=favor-btn] .interact-desc')
      const shareNum = getElementText('[data-testid=share-btn] .interact-desc')
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
        `分享数:${shareNum}`,
      ])
      copyInfo = [
        getCopyTime(time),
        '百度',
        author,
        title,
        window.location.href,
        formatNumInfo([`点赞${likeNum}`, `评论${commentNum}`, `收藏${collectNum}`, `分享${shareNum}`]),
        wordCount
      ].join('\t')
    } else if (window.location.href.includes('mp.weixin.qq.com')) {
      const title = getElementText('h1')
      const author = getElementText('#js_name')
      const time = getElementText('#publish_time')
      const wordCount = getElementText('#js_content').length
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
      ])
      copyInfo = [
        getCopyTime(time),
        '公众号',
        author,
        title,
        window.location.href,
        formatNumInfo([`点赞`, `评论`, `收藏`, `分享`]),
        wordCount
      ].join('\t')
    }
    return {
      textInfo,
      copyInfo
    }
  }

  window.addEventListener('load', function() {
    createFloatButton()
    document.querySelector('#__float_btn__').addEventListener('click', () => {
      const { textInfo, copyInfo } = getInfoTexts()
      createInfoModal(textInfo, copyInfo);
    })
  });
})();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址