Ximalaya Opus Data Collector

自动采集喜马拉雅“个人中心”专辑“播放”和“订阅”数据。

  1. // ==UserScript==
  2. // @name Ximalaya Opus Data Collector
  3. // @namespace http://tampermonkey.net/
  4. // @version 2.9
  5. // @description 自动采集喜马拉雅“个人中心”专辑“播放”和“订阅”数据。
  6. // @author You
  7. // @match https://studio.ximalaya.com/opus
  8. // @grant none
  9. // @license MIT
  10. // ==/UserScript==
  11. (function() {
  12. 'use strict';
  13.  
  14. let isCollecting = false;
  15. let allData = [];
  16. let timeoutId = null;
  17.  
  18. // 创建按钮和输入框
  19. function createControls() {
  20. // 停止采集按钮
  21. const stopButton = createButton('停止采集', '210px', '#dc3545', () => {
  22. if (isCollecting) {
  23. isCollecting = false;
  24. if (timeoutId) {
  25. clearTimeout(timeoutId);
  26. timeoutId = null;
  27. }
  28. stopButton.disabled = true;
  29. stopButton.textContent = '已停止';
  30. saveAndDownloadCurrentData();
  31. alert('采集已停止,当前数据已保存为文件!');
  32. returnToFirstPage();
  33. setTimeout(() => {
  34. stopButton.disabled = false;
  35. stopButton.textContent = '停止采集';
  36. allData = [];
  37. }, 2000);
  38. } else {
  39. alert('当前未在采集数据!');
  40. }
  41. });
  42. document.body.appendChild(stopButton);
  43.  
  44. // 指定页码范围采集区域
  45. const rangeContainer = document.createElement('div');
  46. rangeContainer.style.position = 'fixed';
  47. rangeContainer.style.top = '260px'; // 原230px + 30px
  48. rangeContainer.style.right = '0px';
  49. rangeContainer.style.zIndex = '9999';
  50.  
  51. const startLabel = document.createElement('label');
  52. startLabel.textContent = '起始页: ';
  53. startLabel.style.marginRight = '5px';
  54. startLabel.style.backgroundColor = '#666';
  55. startLabel.style.color = 'white';
  56. startLabel.style.padding = '2px 5px';
  57. const startInput = document.createElement('input');
  58. startInput.type = 'number';
  59. startInput.value = '1';
  60. startInput.min = '1';
  61. startInput.id = 'startPage';
  62. startInput.style.width = '60px';
  63. startInput.style.marginRight = '10px';
  64. startInput.style.backgroundColor = '#666';
  65. startInput.style.color = 'white';
  66.  
  67. const endLabel = document.createElement('label');
  68. endLabel.textContent = '结束页: ';
  69. endLabel.style.marginRight = '5px';
  70. endLabel.style.backgroundColor = '#666';
  71. endLabel.style.color = 'white';
  72. endLabel.style.padding = '2px 5px';
  73. const endInput = document.createElement('input');
  74. endInput.type = 'number';
  75. endInput.value = '57';
  76. endInput.min = '1';
  77. endInput.max = '100';
  78. endInput.id = 'endPage';
  79. endInput.style.width = '60px';
  80. endInput.style.marginRight = '0px';
  81. endInput.style.backgroundColor = '#666';
  82. endInput.style.color = 'white';
  83.  
  84. const rangeButton = createButton('开始采集', '150px', '#17a2b8', () => {
  85. // [保持原有逻辑不变]
  86. const startInputEl = document.getElementById('startPage');
  87. const endInputEl = document.getElementById('endPage');
  88. if (!startInputEl || !endInputEl) {
  89. alert('输入框未找到,请刷新页面重试!');
  90. return;
  91. }
  92. const start = parseInt(startInputEl.value, 10);
  93. const end = parseInt(endInputEl.value, 10);
  94. if (isNaN(start) || isNaN(end) || start < 1 || end > 100 || start > end) {
  95. alert('输入的页码无效!请确保起始页大于等于 1,结束页小于等于 100,且起始页小于等于结束页。');
  96. return;
  97. }
  98. if (!isCollecting) {
  99. isCollecting = true;
  100. allData = [];
  101. rangeButton.textContent = '正在采集';
  102. rangeButton.disabled = true;
  103. stopButton.disabled = false;
  104. collectRangePages(start, end).then(() => {
  105. isCollecting = false;
  106. rangeButton.textContent = '开始采集';
  107. rangeButton.disabled = false;
  108. stopButton.disabled = false;
  109. checkAndTriggerNextCollection();
  110. }).catch(err => {
  111. isCollecting = false;
  112. rangeButton.textContent = '开始采集';
  113. rangeButton.disabled = false;
  114. console.error(err);
  115. alert('采集失败: ' + err.message);
  116. stopButton.disabled = false;
  117. });
  118. } else {
  119. alert('正在采集中,请先停止当前任务!');
  120. }
  121. });
  122. rangeContainer.appendChild(startLabel);
  123. rangeContainer.appendChild(startInput);
  124. rangeContainer.appendChild(endLabel);
  125. rangeContainer.appendChild(endInput);
  126. rangeContainer.appendChild(rangeButton);
  127. document.body.appendChild(rangeContainer);
  128.  
  129. // 循环控制和时间间隔区域
  130. const loopContainer = document.createElement('div');
  131. loopContainer.style.position = 'fixed';
  132. loopContainer.style.top = '300px'; // 原280px + 30px
  133. loopContainer.style.right = '0px';
  134. loopContainer.style.zIndex = '9999';
  135.  
  136. const loopCheckbox = document.createElement('input');
  137. loopCheckbox.type = 'checkbox';
  138. loopCheckbox.id = 'loopEnabled';
  139. loopCheckbox.checked = false;
  140. const loopLabel = document.createElement('label');
  141. loopLabel.textContent = ' 启用循环采集';
  142. loopLabel.style.marginRight = '10px';
  143. loopLabel.style.backgroundColor = '#666';
  144. loopLabel.style.color = 'white';
  145. loopLabel.style.padding = '2px 5px';
  146. loopLabel.htmlFor = 'loopEnabled';
  147.  
  148. const intervalLabel = document.createElement('label');
  149. intervalLabel.textContent = '循环间隔 (分钟): ';
  150. intervalLabel.style.marginRight = '5px';
  151. intervalLabel.style.backgroundColor = '#666';
  152. intervalLabel.style.color = 'white';
  153. intervalLabel.style.padding = '2px 5px';
  154. const intervalInput = document.createElement('input');
  155. intervalInput.type = 'number';
  156. intervalInput.value = '30';
  157. intervalInput.min = '1';
  158. intervalInput.id = 'intervalMinutes';
  159. intervalInput.style.width = '60px';
  160. intervalInput.style.backgroundColor = '#666';
  161. intervalInput.style.color = 'white';
  162. loopContainer.appendChild(loopCheckbox);
  163. loopContainer.appendChild(loopLabel);
  164. loopContainer.appendChild(intervalLabel);
  165. loopContainer.appendChild(intervalInput);
  166. document.body.appendChild(loopContainer);
  167. }
  168.  
  169. // 创建按钮的辅助函数
  170. function createButton(text, top, color, onClick) {
  171. const button = document.createElement('button');
  172. button.textContent = text;
  173. button.style.position = 'fixed';
  174. button.style.top = top;
  175. button.style.right = '0px';
  176. button.style.zIndex = '9999';
  177. button.style.padding = '8px 16px';
  178. button.style.backgroundColor = color;
  179. button.style.color = 'white';
  180. button.style.border = 'none';
  181. button.style.borderRadius = '4px';
  182. button.style.cursor = 'pointer';
  183. button.addEventListener('click', onClick);
  184. return button;
  185. }
  186.  
  187.  
  188. // 等待 iframe 加载(3秒超时)
  189. function waitForIframe(timeout = 3000) {
  190. return new Promise((resolve, reject) => {
  191. const iframe = document.getElementById('contentWrapper');
  192. if (!iframe) {
  193. reject(new Error('未找到 iframe 元素'));
  194. return;
  195. }
  196.  
  197. const checkIframe = () => {
  198. if (iframe.contentDocument && iframe.contentDocument.body) {
  199. clearInterval(interval);
  200. clearTimeout(timeoutId);
  201. resolve(iframe.contentDocument);
  202. }
  203. };
  204.  
  205. const interval = setInterval(checkIframe, 100);
  206. const timeoutId = setTimeout(() => {
  207. clearInterval(interval);
  208. reject(new Error('等待 iframe 加载超时(3秒)'));
  209. }, timeout);
  210. });
  211. }
  212.  
  213. // 采集指定页码范围的数据
  214. async function collectRangePages(startPage, endPage) {
  215. console.log(`开始采集从第 ${startPage} 页到第 ${endPage} 页的数据...`);
  216. allData = []; // 清空之前的数据
  217. let currentPage = startPage;
  218.  
  219. try {
  220. // 刷新 iframe,确保内容重置但脚本状态不变
  221. await refreshIframe();
  222. const iframeDoc = await waitForIframe(3000); // 3 秒等待,确保加载
  223. let hasNextPage = true;
  224.  
  225. while (hasNextPage && currentPage <= endPage && isCollecting) {
  226. console.log(`正在采集第 ${currentPage} 页...`);
  227. try {
  228. const pageData = await collectPageData(iframeDoc, currentPage);
  229. allData = allData.concat(pageData.filter(item => item.plays !== '0')); // 过滤播放数为 0 的数据
  230.  
  231. // 等待 3 秒,确保数据加载完成
  232. await new Promise(resolve => setTimeout(resolve, 3000));
  233.  
  234. if (currentPage === endPage) {
  235. hasNextPage = false;
  236. saveToCsv(allData, 'all');
  237. returnToFirstPage();
  238. } else {
  239. // 检查是否有下一页
  240. const nextButton = iframeDoc.querySelector('.ant-pagination-next:not(.ant-pagination-disabled)');
  241. const paginationItems = iframeDoc.querySelectorAll('.ant-pagination-item');
  242. const lastPageItem = Array.from(paginationItems).pop();
  243. const isLastPage = !nextButton || nextButton.getAttribute('aria-disabled') === 'true' ||
  244. (lastPageItem && lastPageItem.textContent.trim() === currentPage.toString());
  245.  
  246. if (isLastPage) {
  247. hasNextPage = false;
  248. saveToCsv(allData, 'all');
  249. returnToFirstPage();
  250. } else {
  251. // 模拟点击下一页按钮
  252. nextButton.click();
  253. currentPage++;
  254.  
  255. // 等待 3 秒,确保页面翻转并加载完成
  256. await new Promise(resolve => setTimeout(resolve, 3000));
  257. }
  258. }
  259. } catch (err) {
  260. console.warn(`第 ${currentPage} 页采集失败: ${err.message},继续下一页`);
  261. currentPage++;
  262. continue;
  263. }
  264.  
  265. if (!isCollecting) break; // 检查是否需要停止
  266. }
  267.  
  268. if (allData.length === 0) {
  269. throw new Error('未采集到任何数据,请确保页面已加载专辑列表!');
  270. }
  271. } catch (err) {
  272. throw err;
  273. }
  274. }
  275.  
  276. // 采集单页数据
  277. async function collectPageData(iframeDoc, pageNumber) {
  278. console.log(`采集第 ${pageNumber} 页数据...`);
  279. const albumItems = iframeDoc.querySelectorAll('[class*="AlbumItem_listItem"]');
  280. console.log(`找到 ${albumItems.length} 个专辑项`);
  281.  
  282. if (albumItems.length === 0) {
  283. throw new Error(`第 ${pageNumber} 页未找到专辑数据!`);
  284. }
  285.  
  286. const pageData = [];
  287. const currentTime = new Date().toLocaleString('zh-CN', {
  288. year: 'numeric',
  289. month: '2-digit',
  290. day: '2-digit',
  291. hour: '2-digit',
  292. minute: '2-digit'
  293. }).replace(/\//g, '-').replace(' ', '-'); // 格式化为 "YYYY-MM-DD-HH:MM"
  294.  
  295. albumItems.forEach((item, index) => {
  296. let title = '未知专辑';
  297. let episodes = '0';
  298. let plays = '0';
  299. let subscribes = '0';
  300.  
  301. // 提取标题,增加容错
  302. const titleEl = item.querySelector('.AlbumItem_title__1q92X, [class*="title"]');
  303. if (titleEl) {
  304. title = titleEl.textContent.trim().replace('公开', '') || '未知专辑';
  305. }
  306.  
  307. // 提取集数,增加容错
  308. const episodeEl = item.querySelector('.AlbumItem_number__2lJS9 span, [class*="number"] span');
  309. if (episodeEl) {
  310. episodes = episodeEl.textContent.replace(/[^\d]/g, '').trim() || '0';
  311. }
  312.  
  313. // 提取播放和订阅,增加容错并处理“万”、“亿”格式
  314. const dataEls = item.querySelectorAll('.AlbumItem_data__upkkX span, [class*="data"] span');
  315. dataEls.forEach(data => {
  316. const text = data.textContent.trim();
  317. if (text.includes('播放')) {
  318. plays = convertNumericFormat(text) || '0';
  319. } else if (text.includes('订阅')) {
  320. subscribes = convertNumericFormat(text) || '0';
  321. }
  322. });
  323.  
  324. pageData.push({ title, episodes, plays, subscribes, time: currentTime });
  325. console.log(`采集第 ${pageNumber} 页的第 ${index + 1} 个专辑: ${title}, 集数: ${episodes}, 播放: ${plays}, 订阅: ${subscribes}, 时间: ${currentTime}`);
  326. });
  327.  
  328. return pageData;
  329. }
  330.  
  331. // 转换数值格式(如“2.64万”转换为 26400)
  332. function convertNumericFormat(text) {
  333. if (!text) return '0';
  334. const match = text.match(/([\d.]+)(万|亿)?/);
  335. if (!match) return '0';
  336. const value = parseFloat(match[1]);
  337. const unit = match[2];
  338. if (unit === '亿') {
  339. return Math.round(value * 100000000).toString(); // 亿 -> 整数
  340. } else if (unit === '万') {
  341. return Math.round(value * 10000).toString(); // 万 -> 整数
  342. } else {
  343. return Math.round(value).toString(); // 纯数字
  344. }
  345. }
  346.  
  347. // 保存并下载当前数据
  348. function saveAndDownloadCurrentData() {
  349. if (allData.length === 0) return;
  350.  
  351. let csvContent = '\ufeff' + '专辑名,集数,播放,订阅,时间\n'; // 确保 UTF-8 BOM 避免中文乱码
  352. allData.forEach(item => {
  353. csvContent += `"${item.title}",${item.episodes},${item.plays},${item.subscribes},"${item.time}"\n`;
  354. });
  355.  
  356. const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
  357. const link = document.createElement('a');
  358. const timestamp = new Date().toLocaleString('zh-CN', {
  359. year: 'numeric',
  360. month: '2-digit',
  361. day: '2-digit',
  362. hour: '2-digit',
  363. minute: '2-digit'
  364. }).replace(/\//g, '-').replace(' ', '-'); // 格式化为 "YYYY-MM-DD-HH:MM"
  365. link.setAttribute('href', URL.createObjectURL(blob));
  366. link.setAttribute('download', `中心数据${timestamp}.csv`);
  367. document.body.appendChild(link);
  368. link.click();
  369. document.body.removeChild(link);
  370. }
  371.  
  372. // 保存数据到 CSV 文件
  373. function saveToCsv(data, pageNumber) {
  374. let csvContent = '\ufeff' + '专辑名,集数,播放,订阅,时间\n'; // 确保 UTF-8 BOM 避免中文乱码
  375. data.forEach(item => {
  376. csvContent += `"${item.title}",${item.episodes},${item.plays},${item.subscribes},"${item.time}"\n`;
  377. });
  378.  
  379. const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
  380. const link = document.createElement('a');
  381. const timestamp = new Date().toLocaleString('zh-CN', {
  382. year: 'numeric',
  383. month: '2-digit',
  384. day: '2-digit',
  385. hour: '2-digit',
  386. minute: '2-digit'
  387. }).replace(/\//g, '-').replace(' ', '-'); // 格式化为 "YYYY-MM-DD-HH:MM"
  388. link.setAttribute('href', URL.createObjectURL(blob));
  389. link.setAttribute('download', `中心数据${timestamp}.csv`);
  390. document.body.appendChild(link);
  391. link.click();
  392. document.body.removeChild(link);
  393. }
  394.  
  395. // 返回到第 1 页
  396. function returnToFirstPage() {
  397. const iframe = document.getElementById('contentWrapper');
  398. if (iframe && iframe.contentDocument) {
  399. const firstPageButton = iframe.contentDocument.querySelector('.ant-pagination-item-1:not(.ant-pagination-item-active)');
  400. if (firstPageButton) {
  401. firstPageButton.click();
  402. console.log('已返回到第 1 页');
  403. }
  404. }
  405. }
  406.  
  407. // 刷新 iframe
  408. function refreshIframe() {
  409. return new Promise((resolve) => {
  410. const iframe = document.getElementById('contentWrapper');
  411. if (iframe) {
  412. iframe.contentWindow.location.reload(); // 仅刷新 iframe
  413. setTimeout(() => {
  414. waitForIframe(3000).then(iframeDoc => {
  415. resolve(iframeDoc);
  416. }).catch(err => {
  417. console.error('刷新 iframe 后等待超时:', err);
  418. resolve(null); // 即使超时也继续
  419. });
  420. }, 1000); // 等待 1 秒后检查,总计 3 秒
  421. } else {
  422. resolve(waitForIframe(3000)); // 如果 iframe 还未加载,直接等待
  423. }
  424. });
  425. }
  426.  
  427. // 检查是否启用循环并触发下一次采集
  428. function checkAndTriggerNextCollection() {
  429. const loopCheckbox = document.getElementById('loopEnabled');
  430. if (loopCheckbox && loopCheckbox.checked) {
  431. const intervalInput = document.getElementById('intervalMinutes');
  432. if (!intervalInput) {
  433. console.error('循环间隔输入框未找到');
  434. return;
  435. }
  436. const intervalMinutes = parseInt(intervalInput.value, 10) || 30; // 默认30分钟
  437. timeoutId = setTimeout(() => {
  438. refreshPageAndTriggerCollection();
  439. }, intervalMinutes * 60 * 1000); // 转换为毫秒
  440. }
  441. }
  442.  
  443. // 刷新 iframe 并触发下一次采集,增加 5 秒间隔
  444. function refreshPageAndTriggerCollection() {
  445. return new Promise((resolve) => {
  446. const iframe = document.getElementById('contentWrapper');
  447. if (iframe) {
  448. iframe.contentWindow.location.reload(); // 仅刷新 iframe
  449. setTimeout(() => {
  450. // 增加 5 秒间隔后模拟按下“开始采集”按钮
  451. setTimeout(() => {
  452. const rangeButton = document.querySelector('button[style*="top: 0px"][style*="right: 0px"]');
  453. if (rangeButton) {
  454. rangeButton.click(); // 模拟按下“开始采集”按钮
  455. console.log('已模拟按下“开始采集”按钮');
  456. } else {
  457. console.error('未找到“开始采集”按钮');
  458. }
  459. resolve();
  460. }, 5000); // 5 秒间隔
  461. }, 3000); // 等待 3 秒确保 iframe 加载
  462. } else {
  463. console.error('未找到 iframe 元素');
  464. resolve();
  465. }
  466. });
  467. }
  468.  
  469. // 页面加载完成后添加按钮和输入框
  470. window.addEventListener('load', () => {
  471. createControls();
  472. });
  473. })();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址