YouTube同声传译:字幕文本转语音TTS(适用于沉浸式翻译)

将YouTube上的沉浸式翻译双语字幕转换为语音播放,支持更改音色和调整语音速度,支持多语言

  1. // ==UserScript==
  2. // @name YouTube同声传译:字幕文本转语音TTS(适用于沉浸式翻译)
  3. // @namespace http://tampermonkey.net/
  4. // @version 1.12.2
  5. // @description 将YouTube上的沉浸式翻译双语字幕转换为语音播放,支持更改音色和调整语音速度,支持多语言
  6. // @author Sean2333
  7. // @match https://www.youtube.com/*
  8. // @grant GM_setValue
  9. // @grant GM_getValue
  10. // @license MIT
  11. // ==/UserScript==
  12.  
  13. (function () {
  14. 'use strict';
  15.  
  16. let lastCaptionText = '';
  17. const synth = window.speechSynthesis;
  18. let selectedVoice = null;
  19. let pendingUtterance = null;
  20. let isWaitingToSpeak = false;
  21. let voiceSelectUI = null;
  22. let isDragging = false;
  23. let startX;
  24. let startY;
  25. let followVideoSpeed = GM_getValue('followVideoSpeed', true);
  26. let customSpeed = GM_getValue('customSpeed', 1.0);
  27. let isSpeechEnabled = GM_getValue('isSpeechEnabled', true);
  28. let speechVolume = GM_getValue('speechVolume', 1.0);
  29. let isCollapsed = GM_getValue('isCollapsed', false);
  30. let selectedVoiceName = GM_getValue('selectedVoiceName', null);
  31. let windowPosX = GM_getValue('windowPosX', null);
  32. let windowPosY = GM_getValue('windowPosY', null);
  33. let autoVideoPause = GM_getValue('autoVideoPause', true);
  34. let currentObserver = null;
  35. let currentVideoId = null;
  36. let videoObserver = null;
  37. let originalPushState = null;
  38. let originalReplaceState = null;
  39. let timeoutIds = [];
  40. let currentUtterance = null;
  41.  
  42. function setupShortcuts() {
  43. document.addEventListener('keydown', (e) => {
  44. if (e.altKey && e.key.toLowerCase() === 't') { // 添加 toLowerCase() 以兼容大小写
  45. const speechToggleCheckbox = document.querySelector('#speechToggleCheckbox');
  46. if (speechToggleCheckbox) {
  47. speechToggleCheckbox.click();
  48. console.log('触发TTS开关快捷键');
  49. } else {
  50. console.log('未找到TTS开关元素');
  51. }
  52. }
  53. });
  54. }
  55.  
  56. function loadVoices() {
  57. return new Promise(function (resolve) {
  58. let voices = synth.getVoices();
  59. if (voices.length !== 0) {
  60. console.log('成功加载语音列表,共', voices.length, '个语音');
  61. resolve(voices);
  62. } else {
  63. console.log('等待语音列表加载...');
  64. synth.onvoiceschanged = function () {
  65. voices = synth.getVoices();
  66. console.log('语音列表加载完成,共', voices.length, '个语音');
  67. resolve(voices);
  68. };
  69.  
  70. const timeoutId = setTimeout(() => {
  71. voices = synth.getVoices();
  72. if (voices.length > 0) {
  73. console.log('通过重试加载到语音列表,共', voices.length, '个语音');
  74. resolve(voices);
  75. }
  76. }, 1000);
  77. timeoutIds.push(timeoutId);
  78. }
  79. });
  80. }
  81.  
  82. function createVoiceSelectUI() {
  83. function updateDropdownState(isOpen) {
  84. select.style.display = isOpen ? 'block' : 'none';
  85. dropdownArrow.textContent = isOpen ? '▲' : '▼';
  86. }
  87. const container = document.createElement('div');
  88. container.className = 'voice-select-container';
  89. Object.assign(container.style, {
  90. position: 'fixed',
  91. top: windowPosY || '10px',
  92. right: windowPosX || '10px',
  93. width: '260px',
  94. background: 'rgba(255, 255, 255, 0.75)',
  95. padding: '10px',
  96. border: '1px solid rgba(221, 221, 221, 0.8)',
  97. borderRadius: '5px',
  98. zIndex: '9999',
  99. boxShadow: '0 2px 5px rgba(0, 0, 0, 0.15)',
  100. userSelect: 'none',
  101. transition: 'all 0.2s'
  102. });
  103.  
  104. container.addEventListener('mouseenter', () => {
  105. container.style.background = 'rgba(255, 255, 255, 0.95)';
  106. container.style.boxShadow = '0 2px 8px rgba(0, 0, 0, 0.2)';
  107. });
  108.  
  109. container.addEventListener('mouseleave', () => {
  110. container.style.background = 'rgba(255, 255, 255, 0.75)';
  111. container.style.boxShadow = '0 2px 5px rgba(0, 0, 0, 0.15)';
  112. });
  113.  
  114. const titleBar = document.createElement('div');
  115. titleBar.className = 'title-bar';
  116. Object.assign(titleBar.style, {
  117. padding: '5px',
  118. marginBottom: '10px',
  119. borderBottom: '1px solid #eee',
  120. display: 'flex',
  121. justifyContent: 'space-between',
  122. alignItems: 'center',
  123. cursor: 'move'
  124. });
  125.  
  126. const title = document.createElement('span');
  127. title.textContent = '字幕语音设置';
  128.  
  129. const toggleButton = document.createElement('button');
  130. toggleButton.textContent = isCollapsed ? '+' : '−';
  131. Object.assign(toggleButton.style, {
  132. border: 'none',
  133. background: 'none',
  134. cursor: 'pointer',
  135. fontSize: '16px',
  136. padding: '0 5px'
  137. });
  138.  
  139. const content = document.createElement('div');
  140. if (isCollapsed) {
  141. content.style.display = 'none';
  142. }
  143.  
  144. // 语音开关
  145. const speechToggleDiv = document.createElement('div');
  146. Object.assign(speechToggleDiv.style, {
  147. marginBottom: '10px',
  148. borderBottom: '1px solid #eee',
  149. paddingBottom: '10px'
  150. });
  151.  
  152. const speechToggleCheckbox = document.createElement('input');
  153. speechToggleCheckbox.type = 'checkbox';
  154. speechToggleCheckbox.checked = isSpeechEnabled;
  155. speechToggleCheckbox.id = 'speechToggleCheckbox';
  156.  
  157. const speechToggleLabel = document.createElement('label');
  158. speechToggleLabel.textContent = '启用语音播放(Alt+T)';
  159. speechToggleLabel.htmlFor = 'speechToggleCheckbox';
  160. Object.assign(speechToggleLabel.style, {
  161. marginLeft: '5px'
  162. });
  163.  
  164. speechToggleCheckbox.onchange = function () {
  165. isSpeechEnabled = this.checked;
  166. select.disabled = !isSpeechEnabled;
  167. testButton.disabled = !isSpeechEnabled;
  168. followSpeedCheckbox.disabled = !isSpeechEnabled;
  169. customSpeedSelect.disabled = !isSpeechEnabled || followVideoSpeed;
  170. volumeSlider.disabled = !isSpeechEnabled;
  171. autoVideoPauseCheckbox.disabled = !isSpeechEnabled;
  172. searchInput.disabled = !isSpeechEnabled;
  173.  
  174. GM_setValue('isSpeechEnabled', isSpeechEnabled);
  175.  
  176. if (!isSpeechEnabled) {
  177. if (synth.speaking) {
  178. synth.cancel();
  179. }
  180. if (isWaitingToSpeak) {
  181. const video = document.querySelector('video');
  182. if (video && video.paused) {
  183. video.play();
  184. }
  185. isWaitingToSpeak = false;
  186. }
  187. pendingUtterance = null;
  188.  
  189. disconnectObservers();
  190. } else {
  191. setupCaptionObserver();
  192. setupNavigationListeners();
  193. }
  194.  
  195. console.log('语音播放已' + (isSpeechEnabled ? '启用' : '禁用'));
  196. };
  197.  
  198. speechToggleDiv.appendChild(speechToggleCheckbox);
  199. speechToggleDiv.appendChild(speechToggleLabel);
  200. content.insertBefore(speechToggleDiv, content.firstChild);
  201.  
  202. // 自动暂停视频开关
  203. const autoVideoPauseDiv = document.createElement('div');
  204. Object.assign(autoVideoPauseDiv.style, {
  205. marginBottom: '10px',
  206. borderBottom: '1px solid #eee',
  207. paddingBottom: '10px',
  208. display: 'flex',
  209. alignItems: 'center',
  210. gap: '5px'
  211. });
  212.  
  213. const autoVideoPauseCheckbox = document.createElement('input');
  214. autoVideoPauseCheckbox.type = 'checkbox';
  215. autoVideoPauseCheckbox.checked = autoVideoPause;
  216. autoVideoPauseCheckbox.id = 'autoVideoPauseCheckbox';
  217.  
  218. const autoVideoPauseLabel = document.createElement('label');
  219. autoVideoPauseLabel.textContent = '自动暂停视频,以完整播放语音(推荐开启)';
  220. autoVideoPauseLabel.htmlFor = 'autoVideoPauseCheckbox';
  221. Object.assign(autoVideoPauseLabel.style, {
  222. flex: '1'
  223. });
  224.  
  225. const helpIcon = document.createElement('span');
  226. helpIcon.textContent = '?';
  227. Object.assign(helpIcon.style, {
  228. display: 'inline-flex',
  229. justifyContent: 'center',
  230. alignItems: 'center',
  231. width: '14px',
  232. height: '14px',
  233. borderRadius: '50%',
  234. backgroundColor: '#e0e0e0',
  235. color: '#666',
  236. fontSize: '10px',
  237. cursor: 'help',
  238. marginLeft: '2px'
  239. });
  240.  
  241. const tooltip = document.createElement('div');
  242. tooltip.textContent = '开启后,当新字幕出现时,如果上一条语音还未播放完,会自动暂停视频等待语音播放完成。这样可以确保每条字幕都被完整朗读。由于文字转语音存在一定延迟,建议开启此选项以获得最佳体验。';
  243. Object.assign(tooltip.style, {
  244. position: 'fixed',
  245. display: 'none',
  246. backgroundColor: 'rgba(0, 0, 0, 0.8)',
  247. color: 'white',
  248. padding: '8px 12px',
  249. borderRadius: '4px',
  250. fontSize: '12px',
  251. width: '220px',
  252. zIndex: '10000',
  253. pointerEvents: 'none',
  254. lineHeight: '1.5',
  255. boxShadow: '0 2px 8px rgba(0, 0, 0, 0.15)'
  256. });
  257. helpIcon.appendChild(tooltip);
  258.  
  259. helpIcon.addEventListener('mousemove', (e) => {
  260. tooltip.style.display = 'block';
  261. const gap = 10;
  262. let left = e.clientX + gap;
  263. let top = e.clientY + gap;
  264.  
  265. if (left + tooltip.offsetWidth > window.innerWidth) {
  266. left = e.clientX - tooltip.offsetWidth - gap;
  267. }
  268.  
  269. if (top + tooltip.offsetHeight > window.innerHeight) {
  270. top = e.clientY - tooltip.offsetHeight - gap;
  271. }
  272.  
  273. tooltip.style.left = left + 'px';
  274. tooltip.style.top = top + 'px';
  275. });
  276.  
  277. helpIcon.addEventListener('mouseleave', () => {
  278. tooltip.style.display = 'none';
  279. });
  280.  
  281. const labelWrapper = document.createElement('div');
  282. Object.assign(labelWrapper.style, {
  283. display: 'flex',
  284. alignItems: 'center',
  285. flex: '1'
  286. });
  287.  
  288. labelWrapper.appendChild(autoVideoPauseLabel);
  289. labelWrapper.appendChild(helpIcon);
  290.  
  291. autoVideoPauseCheckbox.onchange = function () {
  292. autoVideoPause = this.checked;
  293. GM_setValue('autoVideoPause', autoVideoPause);
  294. console.log('自动暂停视频已' + (autoVideoPause ? '启用' : '禁用'));
  295. };
  296.  
  297. autoVideoPauseDiv.appendChild(autoVideoPauseCheckbox);
  298. autoVideoPauseDiv.appendChild(labelWrapper);
  299. content.insertBefore(autoVideoPauseDiv, content.firstChild.nextSibling);
  300.  
  301. // 音色选择
  302. const voiceDiv = document.createElement('div');
  303. Object.assign(voiceDiv.style, {
  304. marginBottom: '10px',
  305. position: 'relative'
  306. });
  307.  
  308. const voiceLabel = document.createElement('div');
  309. voiceLabel.textContent = '切换音色(支持多语言,与字幕语言匹配即可):';
  310. Object.assign(voiceLabel.style, {
  311. marginBottom: '5px'
  312. });
  313.  
  314. const dropdownContainer = document.createElement('div');
  315. Object.assign(dropdownContainer.style, {
  316. position: 'relative',
  317. width: '100%'
  318. });
  319.  
  320. const inputContainer = document.createElement('div');
  321. Object.assign(inputContainer.style, {
  322. position: 'relative',
  323. width: '100%'
  324. });
  325.  
  326. const searchInput = document.createElement('input');
  327. searchInput.type = 'text';
  328. searchInput.placeholder = '搜索或选择音色...';
  329. Object.assign(searchInput.style, {
  330. width: '100%',
  331. padding: '5px 25px 5px 8px',
  332. marginBottom: '5px',
  333. borderRadius: '3px',
  334. boxSizing: 'border-box'
  335. });
  336.  
  337. const dropdownArrow = document.createElement('span');
  338. dropdownArrow.textContent = '▼';
  339. Object.assign(dropdownArrow.style, {
  340. position: 'absolute',
  341. right: '8px',
  342. top: '50%',
  343. transform: 'translateY(-50%)',
  344. color: '#666',
  345. fontSize: '12px',
  346. cursor: 'pointer',
  347. padding: '5px'
  348. });
  349.  
  350. dropdownArrow.addEventListener('click', (e) => {
  351. e.stopPropagation();
  352. if (!isSpeechEnabled) {
  353. return;
  354. }
  355. const isOpen = select.style.display === 'none';
  356. updateDropdownState(isOpen);
  357. });
  358.  
  359. const select = document.createElement('ul');
  360. Object.assign(select.style, {
  361. position: 'absolute',
  362. width: '100%',
  363. maxHeight: '200px',
  364. overflowY: 'auto',
  365. border: '1px solid #ccc',
  366. borderRadius: '3px',
  367. backgroundColor: 'white',
  368. zIndex: '10000',
  369. listStyle: 'none',
  370. padding: '0',
  371. margin: '0',
  372. display: 'none',
  373. boxShadow: '0 2px 4px rgba(0,0,0,0.1)'
  374. });
  375.  
  376. searchInput.addEventListener('click', (e) => {
  377. e.stopPropagation();
  378. updateDropdownState(true);
  379. });
  380.  
  381. document.addEventListener('click', () => {
  382. updateDropdownState(false);
  383. });
  384.  
  385. select.addEventListener('click', (e) => {
  386. e.stopPropagation();
  387. const clickedOption = e.target;
  388. if (clickedOption.tagName === 'LI') {
  389. const voiceIndex = parseInt(clickedOption.dataset.value);
  390. if (!isNaN(voiceIndex)) {
  391. // 获取当前可用的语音列表
  392. const voices = window.speechSynthesis.getVoices();
  393. selectedVoice = voices[voiceIndex];
  394. selectedVoiceName = selectedVoice.name;
  395. searchInput.value = clickedOption.textContent;
  396. GM_setValue('selectedVoiceName', selectedVoiceName);
  397. updateDropdownState(false);
  398. }
  399. }
  400. });
  401.  
  402. searchInput.oninput = function () {
  403. const searchTerm = this.value.toLowerCase();
  404. Array.from(select.children).forEach(item => {
  405. const text = item.textContent.toLowerCase();
  406. item.style.display = text.includes(searchTerm) ? 'block' : 'none';
  407. });
  408. updateDropdownState(true);
  409. };
  410.  
  411. // 测试音色按钮
  412. const testButton = document.createElement('button');
  413. testButton.textContent = '测试音色';
  414. Object.assign(testButton.style, {
  415. padding: '5px 10px',
  416. borderRadius: '3px',
  417. cursor: 'pointer',
  418. width: '100%',
  419. marginTop: '5px'
  420. });
  421.  
  422. // 音量控制
  423. const volumeControl = document.createElement('div');
  424. Object.assign(volumeControl.style, {
  425. marginTop: '10px',
  426. borderTop: '1px solid #eee',
  427. paddingTop: '10px'
  428. });
  429.  
  430. const volumeLabel = document.createElement('div');
  431. volumeLabel.textContent = '音量控制:';
  432. Object.assign(volumeLabel.style, {
  433. marginBottom: '5px'
  434. });
  435.  
  436. const volumeSlider = document.createElement('input');
  437. volumeSlider.type = 'range';
  438. volumeSlider.min = '0';
  439. volumeSlider.max = '1';
  440. volumeSlider.step = '0.1';
  441. volumeSlider.value = speechVolume;
  442. Object.assign(volumeSlider.style, {
  443. width: '100%',
  444. margin: '5px 0',
  445. });
  446.  
  447. const volumeValue = document.createElement('span');
  448. volumeValue.textContent = `${Math.round(speechVolume * 100)}%`;
  449. Object.assign(volumeValue.style, {
  450. fontSize: '12px',
  451. color: '#666',
  452. marginLeft: '5px'
  453. });
  454.  
  455. volumeSlider.onchange = function () {
  456. speechVolume = parseFloat(this.value);
  457. volumeValue.textContent = `${Math.round(speechVolume * 100)}%`;
  458. GM_setValue('speechVolume', speechVolume);
  459. console.log('音量已设置为:', speechVolume);
  460. };
  461.  
  462. volumeSlider.oninput = function () {
  463. volumeValue.textContent = `${Math.round(this.value * 100)}%`;
  464. };
  465.  
  466. volumeControl.appendChild(volumeLabel);
  467. volumeControl.appendChild(volumeSlider);
  468. volumeControl.appendChild(volumeValue);
  469.  
  470. // 语音速度控制
  471. const speedControl = document.createElement('div');
  472. Object.assign(speedControl.style, {
  473. marginTop: '10px',
  474. borderTop: '1px solid #eee',
  475. paddingTop: '10px',
  476. display: 'flex',
  477. alignItems: 'center',
  478. gap: '10px'
  479. });
  480.  
  481. const followSpeedDiv = document.createElement('div');
  482. Object.assign(followSpeedDiv.style, {
  483. flex: '1'
  484. });
  485.  
  486. const followSpeedCheckbox = document.createElement('input');
  487. followSpeedCheckbox.type = 'checkbox';
  488. followSpeedCheckbox.checked = followVideoSpeed;
  489. followSpeedCheckbox.id = 'followSpeedCheckbox';
  490.  
  491. const followSpeedLabel = document.createElement('label');
  492. followSpeedLabel.textContent = '跟随视频倍速';
  493. followSpeedLabel.htmlFor = 'followSpeedCheckbox';
  494. Object.assign(followSpeedLabel.style, {
  495. marginLeft: '5px'
  496. });
  497.  
  498. const customSpeedDiv = document.createElement('div');
  499. Object.assign(customSpeedDiv.style, {
  500. flex: '1'
  501. });
  502.  
  503. const customSpeedLabel = document.createElement('div');
  504. customSpeedLabel.textContent = '自定义倍速:';
  505. Object.assign(customSpeedLabel.style, {
  506. marginBottom: '5px'
  507. });
  508.  
  509. const customSpeedSelect = document.createElement('select');
  510. const speedOptions = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0];
  511. speedOptions.forEach(speed => {
  512. const option = document.createElement('option');
  513. option.value = speed;
  514. option.textContent = `${speed}x`;
  515. if (speed === customSpeed) option.selected = true;
  516. customSpeedSelect.appendChild(option);
  517. });
  518.  
  519. Object.assign(customSpeedSelect.style, {
  520. width: '100%',
  521. padding: '5px',
  522. borderRadius: '3px'
  523. });
  524.  
  525. followSpeedCheckbox.onchange = function () {
  526. followVideoSpeed = this.checked;
  527. customSpeedSelect.disabled = this.checked;
  528. GM_setValue('followVideoSpeed', followVideoSpeed);
  529. console.log('语音速度模式:', followVideoSpeed ? '跟随视频' : '自定义');
  530. };
  531.  
  532. customSpeedSelect.onchange = function () {
  533. customSpeed = parseFloat(this.value);
  534. GM_setValue('customSpeed', customSpeed);
  535. console.log('自定义语音速度设置为:', customSpeed);
  536. };
  537.  
  538.  
  539. const testPhrases = {
  540. 'zh': '这是一个中文测试语音',
  541. 'zh-CN': '这是一个中文测试语音',
  542. 'zh-TW': '這是一個中文測試語音',
  543. 'zh-HK': '這是一個中文測試語音',
  544. 'en': 'This is a test voice in English',
  545. 'ja': 'これは日本語のテスト音声です',
  546. 'ko': '이것은 한국어 테스트 음성입니다',
  547. 'fr': 'Ceci est un test vocal en français',
  548. 'de': 'Dies ist eine Testsprache auf Deutsch',
  549. 'es': 'Esta es una voz de prueba en español',
  550. 'it': 'Questa è una voce di prova in italiano',
  551. 'ru': 'Это тестовый голос на русском языке',
  552. 'pt': 'Esta é uma voz de teste em português',
  553. 'default': 'This is a test voice' // 默认测试文本
  554. };
  555.  
  556. testButton.onclick = (e) => {
  557. e.stopPropagation();
  558. if (selectedVoice) {
  559. // 获取语音的基础语言代码(例如 'zh-CN' 转为 'zh')
  560. const baseLang = selectedVoice.lang.split('-')[0];
  561. const fullLang = selectedVoice.lang;
  562.  
  563. // 按优先级选择测试文本:完整语言代码 > 基础语言代码 > 默认文本
  564. const testText = testPhrases[fullLang] || testPhrases[baseLang] || testPhrases['default'];
  565.  
  566. console.log(`使用测试文本(${selectedVoice.lang}): ${testText}`);
  567. speakText(testText, false);
  568. }
  569. };
  570.  
  571. customSpeedSelect.disabled = followVideoSpeed;
  572.  
  573. titleBar.appendChild(title);
  574. titleBar.appendChild(toggleButton);
  575.  
  576. inputContainer.appendChild(searchInput);
  577. inputContainer.appendChild(dropdownArrow);
  578. dropdownContainer.appendChild(inputContainer);
  579. dropdownContainer.appendChild(select);
  580. dropdownContainer.appendChild(testButton);
  581. voiceDiv.appendChild(voiceLabel);
  582. voiceDiv.appendChild(dropdownContainer);
  583.  
  584. followSpeedDiv.appendChild(followSpeedCheckbox);
  585. followSpeedDiv.appendChild(followSpeedLabel);
  586.  
  587. customSpeedDiv.appendChild(customSpeedLabel);
  588. customSpeedDiv.appendChild(customSpeedSelect);
  589.  
  590. speedControl.appendChild(followSpeedDiv);
  591. speedControl.appendChild(customSpeedDiv);
  592.  
  593. content.appendChild(voiceDiv);
  594. content.appendChild(volumeControl);
  595. content.appendChild(speedControl);
  596.  
  597. container.appendChild(titleBar);
  598. container.appendChild(content);
  599.  
  600. if (isCollapsed) {
  601. container.style.width = 'auto';
  602. container.style.minWidth = '100px';
  603. }
  604.  
  605. document.body.appendChild(container);
  606.  
  607. toggleButton.onclick = (e) => {
  608. e.stopPropagation();
  609. isCollapsed = !isCollapsed;
  610.  
  611. const currentRight = container.style.right;
  612.  
  613. if (isCollapsed) {
  614. container.dataset.expandedWidth = container.offsetWidth + 'px';
  615. content.style.display = 'none';
  616. container.style.width = 'auto';
  617. container.style.minWidth = '100px';
  618. } else {
  619. content.style.display = 'block';
  620. container.style.width = container.dataset.expandedWidth;
  621. }
  622.  
  623. container.style.right = currentRight;
  624. toggleButton.textContent = isCollapsed ? '+' : '−';
  625.  
  626. GM_setValue('isCollapsed', isCollapsed);
  627. };
  628.  
  629. document.addEventListener('mousedown', dragStart);
  630. document.addEventListener('mousemove', drag);
  631. document.addEventListener('mouseup', dragEnd);
  632. document.addEventListener('mouseleave', dragEnd);
  633.  
  634. return { container, select, content };
  635. }
  636.  
  637. function dragStart(e) {
  638. if (e.target.closest('.title-bar')) {
  639. isDragging = true;
  640. const container = e.target.closest('.voice-select-container');
  641.  
  642. const rect = container.getBoundingClientRect();
  643. startX = e.clientX - rect.left;
  644. startY = e.clientY - rect.top;
  645.  
  646. container.style.transition = 'none';
  647. }
  648. }
  649.  
  650. function dragEnd(e) {
  651. if (isDragging) {
  652. isDragging = false;
  653. const container = document.querySelector('.voice-select-container');
  654. if (container) {
  655. container.style.transition = 'all 0.2s';
  656.  
  657. const rect = container.getBoundingClientRect();
  658. windowPosX = `${window.innerWidth - rect.right}px`;
  659. windowPosY = `${rect.top}px`;
  660. GM_setValue('windowPosX', windowPosX);
  661. GM_setValue('windowPosY', windowPosY);
  662. console.log('保存浮窗位置:', windowPosX, windowPosY);
  663. }
  664. }
  665. }
  666.  
  667. function drag(e) {
  668. if (isDragging) {
  669. e.preventDefault();
  670. const container = document.querySelector('.voice-select-container');
  671. if (container) {
  672. let newX = e.clientX - startX;
  673. let newY = e.clientY - startY;
  674.  
  675. const maxX = window.innerWidth - container.offsetWidth;
  676. const maxY = window.innerHeight - container.offsetHeight;
  677.  
  678. newX = Math.min(Math.max(0, newX), maxX);
  679. newY = Math.min(Math.max(0, newY), maxY);
  680.  
  681. container.style.right = `${window.innerWidth - newX - container.offsetWidth}px`;
  682. container.style.top = `${newY}px`;
  683. container.style.left = '';
  684. }
  685. }
  686. }
  687.  
  688. function selectVoice() {
  689. loadVoices().then(function (voices) {
  690. if (!voiceSelectUI) {
  691. voiceSelectUI = createVoiceSelectUI();
  692. }
  693.  
  694. const select = voiceSelectUI.select;
  695. const searchInput = voiceSelectUI.container.querySelector('input[type="text"]');
  696. while (select.firstChild) {
  697. select.removeChild(select.firstChild);
  698. }
  699.  
  700. voices.forEach((voice, index) => {
  701. const option = document.createElement('li');
  702. option.dataset.value = index;
  703. option.textContent = `${voice.name} (${voice.lang})`;
  704. Object.assign(option.style, {
  705. padding: '8px 10px',
  706. cursor: 'pointer',
  707. borderBottom: '1px solid #eee'
  708. });
  709.  
  710. option.addEventListener('mouseover', () => {
  711. option.style.backgroundColor = '#f0f0f0';
  712. });
  713.  
  714. option.addEventListener('mouseout', () => {
  715. option.style.backgroundColor = '';
  716. });
  717.  
  718. option.addEventListener('click', () => {
  719. selectedVoice = voices[index];
  720. selectedVoiceName = selectedVoice.name;
  721. searchInput.value = option.textContent;
  722. GM_setValue('selectedVoiceName', selectedVoiceName);
  723. select.style.display = 'none';
  724. console.log('已切换语音到:', selectedVoice.name);
  725. });
  726.  
  727. select.appendChild(option);
  728. });
  729.  
  730. // 添加默认选中值设置:
  731. if (selectedVoice) {
  732. searchInput.value = `${selectedVoice.name} (${selectedVoice.lang})`;
  733. }
  734.  
  735. if (!selectedVoice) {
  736. selectedVoice = voices.find(voice =>
  737. voice.name === selectedVoiceName
  738. ) || voices.find(voice =>
  739. voice.name === 'Microsoft Xiaoxiao Online (Natural) - Chinese (Mainland)'
  740. ) || voices.find(voice => voice.lang.includes('zh')) || voices[0];
  741. }
  742.  
  743. const selectedIndex = voices.indexOf(selectedVoice);
  744. if (selectedIndex >= 0) {
  745. searchInput.value = `${selectedVoice.name} (${selectedVoice.lang})`;
  746. }
  747. });
  748. }
  749.  
  750. function speakText(text, isNewCaption = false) {
  751. if (!isSpeechEnabled || !text) {
  752. return;
  753. }
  754.  
  755. const video = document.querySelector('video');
  756.  
  757. // 准备新的语音合成实例
  758. const utterance = new SpeechSynthesisUtterance(text);
  759. if (selectedVoice) {
  760. utterance.voice = selectedVoice;
  761. utterance.lang = selectedVoice.lang;
  762. }
  763. utterance.volume = speechVolume;
  764. if (followVideoSpeed && video) {
  765. utterance.rate = video.playbackRate;
  766. } else {
  767. utterance.rate = customSpeed;
  768. }
  769.  
  770. // 设置语音事件处理
  771. utterance.onstart = () => {
  772. currentUtterance = utterance;
  773. console.log('开始播放语音:', text);
  774. };
  775.  
  776. utterance.onend = () => {
  777. console.log('语音播放完成');
  778.  
  779. // 只有当前播放的语音完成时才清除currentUtterance
  780. if (currentUtterance === utterance) {
  781. currentUtterance = null;
  782. }
  783.  
  784. if (pendingUtterance) {
  785. console.log('播放准备好的语音');
  786. const nextUtterance = pendingUtterance;
  787. pendingUtterance = null;
  788. // 确保下一句话开始播放
  789. synth.speak(nextUtterance);
  790. } else if (autoVideoPause && isWaitingToSpeak && video && video.paused) {
  791. isWaitingToSpeak = false;
  792. video.play();
  793. console.log('所有语音播放完成,视频继续播放');
  794. }
  795. };
  796.  
  797. utterance.onerror = (event) => {
  798. console.error('语音播放出错:', event);
  799. // 只有当前播放的语音出错时才清除currentUtterance
  800. if (currentUtterance === utterance) {
  801. currentUtterance = null;
  802. }
  803. if (autoVideoPause && isWaitingToSpeak && video && video.paused) {
  804. isWaitingToSpeak = false;
  805. video.play();
  806. }
  807. // 如果出错的是待播放的语音,也需要清除
  808. if (pendingUtterance === utterance) {
  809. pendingUtterance = null;
  810. }
  811. };
  812.  
  813. if (synth.speaking) {
  814. // 当前有语音在播放,将新语音存为待播放
  815. console.log('当前正在播放语音,新语音准备完成:', text);
  816.  
  817. // 如果已经有待播放的语音,先取消它
  818. if (pendingUtterance) {
  819. console.log('更新待播放语音');
  820. // 可以选择是否保留之前的待播放语音
  821. // synth.cancel(); // 取消之前的待播放语音
  822. }
  823.  
  824. if (autoVideoPause && !isWaitingToSpeak) {
  825. // 只在这时暂停视频
  826. if (video && !video.paused) {
  827. video.pause();
  828. isWaitingToSpeak = true;
  829. console.log('新语音准备完成,视频暂停等待当前语音完成');
  830. }
  831. }
  832.  
  833. // 更新待播放的语音
  834. pendingUtterance = utterance;
  835. } else {
  836. // 没有语音在播放,直接开始播放
  837. console.log('直接播放语音');
  838. synth.speak(utterance);
  839. }
  840. }
  841.  
  842. function getCaptionText() {
  843. const immersiveCaptionWindow = document.querySelector('#immersive-translate-caption-window');
  844. if (immersiveCaptionWindow && immersiveCaptionWindow.shadowRoot) {
  845. const targetCaptions = immersiveCaptionWindow.shadowRoot.querySelectorAll('.target-cue');
  846. let captionText = '';
  847. targetCaptions.forEach(span => {
  848. captionText += span.textContent + ' ';
  849. });
  850. captionText = captionText.trim();
  851. return captionText;
  852. }
  853. return '';
  854. }
  855.  
  856. function setupCaptionObserver() {
  857. if (!isSpeechEnabled) {
  858. return;
  859. }
  860.  
  861. let retryCount = 0;
  862. const maxRetries = 10;
  863.  
  864. function waitForCaptionContainer() {
  865. if (!isSpeechEnabled) {
  866. return;
  867. }
  868.  
  869. const immersiveCaptionWindow = document.querySelector('#immersive-translate-caption-window');
  870. if (immersiveCaptionWindow && immersiveCaptionWindow.shadowRoot) {
  871. const rootContainer = immersiveCaptionWindow.shadowRoot.querySelector('div');
  872. if (rootContainer) {
  873. console.log('找到字幕根容器,开始监听变化');
  874.  
  875. if (currentObserver) {
  876. currentObserver.disconnect();
  877. console.log('断开旧的字幕观察者连接');
  878. }
  879.  
  880. lastCaptionText = '';
  881. pendingUtterance = null;
  882. if (synth.speaking) {
  883. synth.cancel();
  884. console.log('取消当前正在播放的语音');
  885. }
  886. isWaitingToSpeak = false;
  887.  
  888. currentObserver = new MutationObserver(() => {
  889. const currentText = getCaptionText();
  890. if (currentText && currentText !== lastCaptionText) {
  891. lastCaptionText = currentText;
  892. speakText(currentText, true);
  893. }
  894. });
  895.  
  896. const config = {
  897. childList: true,
  898. subtree: true,
  899. characterData: true
  900. };
  901.  
  902. currentObserver.observe(rootContainer, config);
  903. console.log('新的字幕观察者设置完成');
  904.  
  905. const initialText = getCaptionText();
  906. if (initialText) {
  907. lastCaptionText = initialText;
  908. speakText(initialText, true);
  909. }
  910. } else {
  911. if (retryCount < maxRetries) {
  912. console.log('未找到字幕容器,1秒后重试');
  913. retryCount++;
  914. const timeoutId = setTimeout(waitForCaptionContainer, 1000);
  915. timeoutIds.push(timeoutId);
  916. } else {
  917. console.log('达到最大重试次数,放弃寻找字幕容器');
  918. }
  919. }
  920. } else {
  921. if (retryCount < maxRetries) {
  922. console.log('等待字幕窗口加载,1秒后重试');
  923. retryCount++;
  924. const timeoutId = setTimeout(waitForCaptionContainer, 1000);
  925. timeoutIds.push(timeoutId);
  926. } else {
  927. console.log('达到最大重试次数,放弃寻找字幕窗口');
  928. }
  929. }
  930. }
  931.  
  932. waitForCaptionContainer();
  933. }
  934.  
  935. function checkForVideoChange() {
  936. if (!isSpeechEnabled) {
  937. return;
  938. }
  939.  
  940. const videoId = new URLSearchParams(window.location.search).get('v');
  941.  
  942. if (videoId && videoId !== currentVideoId) {
  943. console.log('检测到视频切换,从', currentVideoId, '切换到', videoId);
  944. currentVideoId = videoId;
  945.  
  946. if (currentObserver) {
  947. currentObserver.disconnect();
  948. console.log('断开旧的字幕观察者连接');
  949. }
  950. if (synth.speaking) {
  951. synth.cancel();
  952. console.log('取消当前正在播放的语音');
  953. }
  954.  
  955. let retryCount = 0;
  956. const maxRetries = 10;
  957.  
  958. function trySetupObserver() {
  959. if (!isSpeechEnabled) {
  960. return;
  961. }
  962.  
  963. if (retryCount >= maxRetries) {
  964. console.log('达到最大重试次数,放弃设置字幕监听');
  965. return;
  966. }
  967.  
  968. const immersiveCaptionWindow = document.querySelector('#immersive-translate-caption-window');
  969. if (immersiveCaptionWindow && immersiveCaptionWindow.shadowRoot) {
  970. console.log('找到字幕容器,开始设置监听');
  971. setupCaptionObserver();
  972. } else {
  973. console.log(`未找到字幕容器,1秒后重试`);
  974. retryCount++;
  975. const timeoutId = setTimeout(trySetupObserver, 1000);
  976. timeoutIds.push(timeoutId);
  977. }
  978. }
  979.  
  980. const timeoutId = setTimeout(trySetupObserver, 1500);
  981. timeoutIds.push(timeoutId);
  982. }
  983. }
  984.  
  985. function setupNavigationListeners() {
  986. if (!isSpeechEnabled) {
  987. return;
  988. }
  989.  
  990. videoObserver = new MutationObserver((mutations) => {
  991. for (const mutation of mutations) {
  992. if (mutation.type === 'childList') {
  993. checkForVideoChange();
  994. }
  995. }
  996. });
  997.  
  998. function observeVideoPlayer() {
  999. const playerContainer = document.querySelector('#player-container');
  1000. if (playerContainer) {
  1001. videoObserver.observe(playerContainer, {
  1002. childList: true,
  1003. subtree: true
  1004. });
  1005. }
  1006. }
  1007.  
  1008. observeVideoPlayer();
  1009.  
  1010. originalPushState = history.pushState;
  1011. history.pushState = function () {
  1012. originalPushState.apply(history, arguments);
  1013. checkForVideoChange();
  1014. };
  1015.  
  1016. originalReplaceState = history.replaceState;
  1017. history.replaceState = function () {
  1018. originalReplaceState.apply(history, arguments);
  1019. checkForVideoChange();
  1020. };
  1021.  
  1022. window.addEventListener('hashchange', checkForVideoChange);
  1023. window.addEventListener('popstate', checkForVideoChange);
  1024.  
  1025. window.addEventListener('yt-navigate-start', onNavigateStart);
  1026. window.addEventListener('yt-navigate-finish', onNavigateFinish);
  1027. }
  1028.  
  1029. function onNavigateStart() {
  1030. if (isSpeechEnabled) {
  1031. console.log('YouTube导航开始');
  1032. checkForVideoChange();
  1033. }
  1034. }
  1035.  
  1036. function onNavigateFinish() {
  1037. if (isSpeechEnabled) {
  1038. console.log('YouTube导航完成');
  1039. checkForVideoChange();
  1040. }
  1041. }
  1042.  
  1043. function disconnectObservers() {
  1044. if (currentObserver) {
  1045. currentObserver.disconnect();
  1046. currentObserver = null;
  1047. console.log('已断开字幕观察者');
  1048. }
  1049.  
  1050. if (videoObserver) {
  1051. videoObserver.disconnect();
  1052. videoObserver = null;
  1053. console.log('已断开视频观察者');
  1054. }
  1055.  
  1056. window.removeEventListener('hashchange', checkForVideoChange);
  1057. window.removeEventListener('popstate', checkForVideoChange);
  1058. window.removeEventListener('yt-navigate-start', onNavigateStart);
  1059. window.removeEventListener('yt-navigate-finish', onNavigateFinish);
  1060.  
  1061. if (originalPushState) {
  1062. history.pushState = originalPushState;
  1063. originalPushState = null;
  1064. }
  1065.  
  1066. if (originalReplaceState) {
  1067. history.replaceState = originalReplaceState;
  1068. originalReplaceState = null;
  1069. }
  1070.  
  1071. timeoutIds.forEach(id => clearTimeout(id));
  1072. timeoutIds = [];
  1073. }
  1074.  
  1075. function cleanup() {
  1076. document.removeEventListener('mousedown', dragStart);
  1077. document.removeEventListener('mousemove', drag);
  1078. document.removeEventListener('mouseup', dragEnd);
  1079. document.removeEventListener('mouseleave', dragEnd);
  1080.  
  1081. window.removeEventListener('resize', onWindowResize);
  1082.  
  1083. disconnectObservers();
  1084.  
  1085. if (synth.speaking) {
  1086. synth.cancel();
  1087. }
  1088. }
  1089.  
  1090. function onWindowResize() {
  1091. const container = document.querySelector('.voice-select-container');
  1092. if (container) {
  1093. const rect = container.getBoundingClientRect();
  1094. const maxY = window.innerHeight - container.offsetHeight;
  1095.  
  1096. let newY = Math.min(Math.max(0, rect.top), maxY);
  1097. container.style.top = `${newY}px`;
  1098. }
  1099. }
  1100.  
  1101. window.addEventListener('load', function () {
  1102. console.log('页面加载完成,开始初始化脚本');
  1103. setTimeout(() => {
  1104. selectVoice();
  1105. setupShortcuts();
  1106.  
  1107. if (isSpeechEnabled) {
  1108. setupCaptionObserver();
  1109. setupNavigationListeners();
  1110.  
  1111. currentVideoId = new URLSearchParams(window.location.search).get('v');
  1112. console.log('初始视频ID:', currentVideoId);
  1113. }
  1114. }, 1000);
  1115. });
  1116.  
  1117. window.addEventListener('unload', cleanup);
  1118.  
  1119. window.addEventListener('resize', onWindowResize);
  1120.  
  1121. })();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址