信息抓取

文章、笔记信息抓取

// ==UserScript==
// @name         信息抓取
// @namespace    http://tampermonkey.net/
// @version      0.0.4
// @description  文章、笔记信息抓取
// @author       dabennn
// @match        https://*.xiaohongshu.com/explore/*
// @match        https://*.douyin.com/video/*
// @match        https://baijiahao.baidu.com/*
// @match        https://mp.weixin.qq.com/*
// @match        https://*.sohu.com/a/*
// @match        https://*.toutiao.com/article/*
// @match        https://*.toutiao.com/w/*
// @icon         https://www.google.com/s2/favicons?sz=64&domain=github.com
// @license      MIT
// @grant        GM_notification
// @grant        unsafeWindow
// @run-at       document-start
// ==/UserScript==

(function () {
  "use strict";
  function formatTimestamp(timestamp) {
    const date = new Date(timestamp);
    const year = date.getFullYear();
    const month = String(date.getMonth() + 1).padStart(2, "0");
    const day = String(date.getDate()).padStart(2, "0");
    const hours = String(date.getHours()).padStart(2, "0");
    const minutes = String(date.getMinutes()).padStart(2, "0");
    const seconds = String(date.getSeconds()).padStart(2, "0");
    return `${year}-${month}-${day} ${hours}:${minutes}:${seconds}`;
  }
  function getElementText(domSelector) {
    const el = document.querySelector(domSelector);
    return el ? el.textContent.trim() : "";
  }
  function getElementTextByXPath(xpath) {
    const el = document.evaluate(
      xpath,
      document,
      null,
      XPathResult.FIRST_ORDERED_NODE_TYPE,
      null
    ).singleNodeValue;
    return el ? el.textContent.trim() : "";
  }
  function createInfoModal(infoText, copyText) {
    // 创建一个用于显示信息的div元素作为浮窗
    const infoDiv = document.createElement("div");
    infoDiv.style.position = "fixed";
    infoDiv.style.top = "0";
    infoDiv.style.right = "0";
    infoDiv.style.backgroundColor = "rgba(255, 255, 255, 0.8)";
    infoDiv.style.padding = "10px";
    infoDiv.style.border = "1px solid gray";
    infoDiv.style.zIndex = "9999";

    // 创建关闭按钮元素
    const closeButton = document.createElement("span");
    closeButton.id = "__close_btn__";
    closeButton.textContent = "×";
    closeButton.style.cursor = "pointer";
    closeButton.style.float = "right";
    closeButton.style.fontSize = "18px";
    closeButton.style.color = "red";

    // 创建复制按钮元素
    const copyButton = document.createElement("span");
    copyButton.id = "__copy__";
    copyButton.textContent = "复制";
    copyButton.style.cursor = "pointer";
    copyButton.style.float = "right";
    copyButton.style.marginRight = "5px";
    copyButton.style.fontSize = "18px";
    copyButton.style.color = "blue";

    // 先将关闭按钮添加到浮窗中
    infoDiv.appendChild(closeButton);
    // 再将复制按钮添加到浮窗中
    infoDiv.appendChild(copyButton);

    // 将信息拼接成字符串并设置到浮窗的innerHTML中
    infoDiv.innerHTML += infoText;

    // 将浮窗添加到页面的body元素中
    document.body.appendChild(infoDiv);

    document
      .querySelector("#__close_btn__")
      .addEventListener("click", function () {
        infoDiv.parentNode.removeChild(infoDiv);
      });

    document.querySelector("#__copy__").onclick = function () {
      navigator.clipboard
        .writeText(copyText)
        .then(function () {
          GM_notification("已复制到剪贴板");
        })
        .catch(function (err) {
          GM_notification("复制失败");
          console.error("复制失败:", err);
        });
    };
  }
  function createFloatButton() {
    const button = document.createElement("div");
    button.textContent = "信息抓取";
    button.id = "__float_btn__";
    button.style.position = "fixed";
    button.style.top = "150px";
    button.style.right = "0";
    button.style.zIndex = "9999";
    button.style.backgroundColor = "rgba(255, 100, 100, 0.9)";
    button.style.color = "#fff";
    button.style.borderRadius = "4px";
    button.style.padding = "10px";
    button.style.cursor = "pointer";

    // 关闭按钮
    const closeButton = document.createElement("span");
    closeButton.textContent = "×";
    closeButton.style.position = "absolute";
    closeButton.style.top = "-3px";
    closeButton.style.right = "2px";
    closeButton.style.fontSize = "16px";
    closeButton.style.lineHeight = "1";

    button.appendChild(closeButton);
    closeButton.addEventListener("click", (e) => {
      e.stopPropagation();
      button.parentNode.removeChild(button);
    });
    document.body.appendChild(button);
  }
  const formatTextInfo = (texts) =>
    texts.join("<br>").replace(/undefined/g, "未获取到");
  const formatNumInfo = (texts) => texts.join(" ").replace(/undefined/g, "0");
  const formatCopyNum = (num) => (/\d/.test(num) ? num : 0);
  const getCopyTime = (time) => time.replace(/[年月]/g, "/").replace(/日/, "");
  const getInfoTexts = () => {
    let textInfo = "";
    let copyInfo = "";
    if (window.location.href.includes("xiaohongshu.com/explore")) {
      const state = unsafeWindow.__INITIAL_STATE__;
      const title = getElementText(".note-content .title");
      const author = getElementText(".username");
      let time = "";
      try {
        time = formatTimestamp(
          state.note.noteDetailMap[state.note.firstNoteId.value].note
            .lastUpdateTime
        );
      } catch (e) {
        console.error(e);
      }
      const wordCount = getElementText(".note-content").length;
      const likeNum = getElementText(
        ".interact-container .like-wrapper .count"
      );
      const commentNum = getElementText(
        ".interact-container .chat-wrapper .count"
      );
      const collectNum = getElementText(
        ".interact-container .collect-wrapper .count"
      );
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
      ]);
      copyInfo = [
        getCopyTime(time),
        "小红书",
        author,
        title,
        window.location.href,
        formatNumInfo([
          `点赞${formatCopyNum(likeNum)}`,
          `评论${formatCopyNum(commentNum)}`,
          `收藏${formatCopyNum(collectNum)}`,
        ]),
        wordCount,
      ].join("\t");
    } else if (window.location.href.includes("douyin.com/video")) {
      const title = getElementText(
        "h1 span span + span span span span span span"
      );
      const author = getElementTextByXPath(
        '//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[4]/div/div[1]/div[2]/a/div/span/span/span/span/span/span'
      );
      const time = getElementTextByXPath(
        '//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[2]/span/text()[2]'
      );
      const duration = getElementText(".time-duration");
      const likeNum = getElementTextByXPath(
        '//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[1]/span'
      );
      const commentNum = getElementTextByXPath(
        '//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[2]/span'
      );
      const collectNum = getElementTextByXPath(
        '//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[3]/span'
      );
      const shareNum = getElementTextByXPath(
        '//*[@id="douyin-right-container"]/div[2]/div/div/div[1]/div[3]/div/div[2]/div[1]/div[4]/span'
      );
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `时长:${duration}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
        `分享数:${shareNum}`,
      ]);
      copyInfo = [
        getCopyTime(time),
        "抖音视频",
        author,
        title,
        window.location.href,
        formatNumInfo([
          `点赞${formatCopyNum(likeNum)}`,
          `评论${formatCopyNum(commentNum)}`,
          `收藏${formatCopyNum(collectNum)}`,
          `分享${formatCopyNum(shareNum)}`,
        ]),
        duration,
      ].join("\t");
    } else if (window.location.href.includes("sohu.com/a")) {
      const title = getElementText("h1");
      const author = getElementText("#user-info h4 a");
      const time = getElementText("#news-time");
      const wordCount = getElementText("mp-editor").length;
      const readNum = getElementText(".read-num em");
      const likeNum = getElementText(".like-c .count");
      const commentNum = getElementText(".comment-count");
      const collectNum = getElementText(".collection-c .count");
      const shareNum = getElementText(".share-c .count");
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
        `阅读数:${readNum}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
        `分享数:${shareNum}`,
      ]);
      copyInfo = [
        getCopyTime(time),
        "搜狐",
        author,
        title,
        window.location.href,
        formatNumInfo([
          `阅读${formatCopyNum(readNum)}`,
          `点赞${formatCopyNum(likeNum)}`,
          `评论${formatCopyNum(commentNum)}`,
          `收藏${formatCopyNum(collectNum)}`,
          `分享${formatCopyNum(shareNum)}`,
        ]),
        wordCount,
      ].join("\t");
    } else if (window.location.href.includes("toutiao.com/article")) {
      const title = getElementText("h1");
      const author = getElementText(".article-meta .name");
      const time = getElementText(".article-meta span");
      const wordCount = getElementText(".tt-article-content").length;
      const likeNum = getElementText(".detail-like span");
      const commentNum = getElementText(".detail-interaction-comment span");
      const collectNum = getElementText(".detail-interaction-collect span");
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
      ]);
      copyInfo = [
        getCopyTime(time),
        "今日头条",
        author,
        title,
        window.location.href,
        formatNumInfo([
          `点赞${formatCopyNum(likeNum)}`,
          `评论${formatCopyNum(commentNum)}`,
          `收藏${formatCopyNum(collectNum)}`,
        ]),
        wordCount,
      ].join("\t");
    } else if (window.location.href.includes("toutiao.com/w")) {
      const title = getElementText("h1");
      const author = getElementText(".desc .name");
      const time = getElementText(".abstract .time");
      const wordCount = getElementText("article").length;
      const likeNum = getElementText(".detail-like span");
      const commentNum = getElementText(".detail-interaction-comment span");
      const collectNum = getElementText(".detail-interaction-collect span");
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
      ]);
      copyInfo = [
        getCopyTime(time),
        "今日头条",
        author,
        title,
        window.location.href,
        formatNumInfo([
          `点赞${formatCopyNum(likeNum)}`,
          `评论${formatCopyNum(commentNum)}`,
          `收藏${formatCopyNum(collectNum)}`,
        ]),
        wordCount,
      ].join("\t");
    } else if (window.location.href.includes("baijiahao.baidu.com")) {
      const title = getElementText("#header div");
      const author = getElementText("#header [data-testid=author-name]");
      const time = getElementText("#header [data-testid=updatetime]");
      const wordCount = getElementText("[data-testid=article]").length;
      const likeNum = getElementText("[data-testid=like-btn] .interact-desc");
      const commentNum = getElementText(
        "[data-testid=comment-btn] .interact-desc"
      );
      const collectNum = getElementText(
        "[data-testid=favor-btn] .interact-desc"
      );
      const shareNum = getElementText("[data-testid=share-btn] .interact-desc");
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
        `点赞数:${likeNum}`,
        `评论数:${commentNum}`,
        `收藏数:${collectNum}`,
        `分享数:${shareNum}`,
      ]);
      copyInfo = [
        getCopyTime(time),
        "百度",
        author,
        title,
        window.location.href,
        formatNumInfo([
          `点赞${formatCopyNum(likeNum)}`,
          `评论${formatCopyNum(commentNum)}`,
          `收藏${formatCopyNum(collectNum)}`,
          `分享${formatCopyNum(shareNum)}`,
        ]),
        wordCount,
      ].join("\t");
    } else if (window.location.href.includes("mp.weixin.qq.com")) {
      const title = getElementText("h1");
      const author = getElementText("#js_name");
      const time = getElementText("#publish_time");
      const wordCount = getElementText("#js_content").length;
      textInfo = formatTextInfo([
        `标题:${title}`,
        `作者:${author}`,
        `发布时间:${time}`,
        `字数:${wordCount}`,
      ]);
      copyInfo = [
        getCopyTime(time),
        "公众号",
        author,
        title,
        window.location.href,
        formatNumInfo([`点赞`, `转发`, `喜欢`, `评论`]),
        wordCount,
      ].join("\t");
    }
    return {
      textInfo,
      copyInfo,
    };
  };

  window.addEventListener("load", function () {
    createFloatButton();
    document.querySelector("#__float_btn__").addEventListener("click", () => {
      const { textInfo, copyInfo } = getInfoTexts();
      createInfoModal(textInfo, copyInfo);
    });
  });
})();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址