Improved Claude Voice Interface

Voice interface for Claude with TTS functionality, speech recognition, auto-reading and drag positioning

目前为 2025-02-27 提交的版本。查看 最新版本

// ==UserScript==
// @name         Improved Claude Voice Interface
// @namespace    http://tampermonkey.net/
// @version      0.5
// @description  Voice interface for Claude with TTS functionality, speech recognition, auto-reading and drag positioning
// @author       You
// @match        https://claude.ai/chat/*
// @grant        none
// @license      MIT
// ==/UserScript==

(function() {
    'use strict';

    // Configuration
    const config = {
        buttonHeight: '50px',
        activeBackgroundColor: '#4a86e8',
        inactiveBackgroundColor: '#6c757d',
        activeTextColor: 'white',
        inactiveTextColor: '#e0e0e0',
        readRateMultiplier: 1.0,
        autoReadNewMessages: true,
        checkInterval: 1000, // Check for new messages every 1 second
        readingDelay: 700    // Delay before reading to ensure message is complete
    };

    // State variables
    let isSpeaking = false;
    let isRecording = false;
    let currentMessageElement = null;
    let lastMessageId = null;
    let checkIntervalId = null;
    let processingMessage = false;
    let recognition = null;

    // Drag state variables
    let isDragging = false;
    let startY = 0;
    let startBottom = 0;

    // Wait for the page to fully load
    window.addEventListener('load', function() {
        setTimeout(initializeVoiceInterface, 1500);
    });

    function initializeVoiceInterface() {
        console.log('Claude Voice Interface: Initializing...');

        // CSS for highlighting (can be customized)
        const highlightStyle = document.createElement('style');
        highlightStyle.textContent = `
          text-reader.active-highlight {
          background-color: yellow;
        }`;
        document.head.appendChild(highlightStyle);

        // Create the voice interface bar
        const voiceBar = document.createElement('div');
        voiceBar.id = 'claude-voice-bar';
        voiceBar.style.width = '100%';
        voiceBar.style.display = 'flex';
        voiceBar.style.justifyContent = 'space-between';
        voiceBar.style.alignItems = 'center';
        voiceBar.style.boxSizing = 'border-box';
        voiceBar.style.padding = '10px';
        voiceBar.style.backgroundColor = '#f8f9fa';
        voiceBar.style.borderTop = '1px solid #dee2e6';
        voiceBar.style.position = 'fixed';
        voiceBar.style.bottom = localStorage.getItem('claudeVoiceBarPosition') || '0';
        voiceBar.style.left = '0';
        voiceBar.style.zIndex = '10000'; // Increased z-index to make sure it's above other elements

        // Create the left button (Read/Pause)
        const speakButton = document.createElement('button');
        speakButton.id = 'claude-speak-button';
        speakButton.innerHTML = `${createPlayIcon()} <span style="margin-left: 8px;">READ</span>`;
        speakButton.style.width = '48%';
        speakButton.style.height = config.buttonHeight;
        speakButton.style.borderRadius = '8px';
        speakButton.style.border = 'none';
        speakButton.style.backgroundColor = config.inactiveBackgroundColor;
        speakButton.style.color = config.inactiveTextColor;
        speakButton.style.cursor = 'pointer';
        speakButton.style.display = 'flex';
        speakButton.style.justifyContent = 'center';
        speakButton.style.alignItems = 'center';
        speakButton.style.transition = 'all 0.3s';
        speakButton.style.fontWeight = 'bold';

        // Create the right button (Speech-to-Text)
        const recordButton = document.createElement('button');
        recordButton.id = 'claude-record-button';
        recordButton.innerHTML = `${createEmptyCircleIcon()} <span style="margin-left: 8px;">LISTEN</span>`;
        recordButton.style.width = '48%';
        recordButton.style.height = config.buttonHeight;
        recordButton.style.borderRadius = '8px';
        recordButton.style.border = 'none';
        recordButton.style.backgroundColor = config.inactiveBackgroundColor;
        recordButton.style.color = config.inactiveTextColor;
        recordButton.style.cursor = 'pointer';
        recordButton.style.display = 'flex';
        recordButton.style.justifyContent = 'center';
        recordButton.style.alignItems = 'center';
        recordButton.style.transition = 'all 0.3s';
        recordButton.style.fontWeight = 'bold';

        // Create drag handle
        const dragHandle = document.createElement('div');
        dragHandle.id = 'claude-voice-drag-handle';
		dragHandle.innerHTML = createDragIcon();
        dragHandle.style.width = '50px';
        dragHandle.style.left = '50%'; // Center it
        dragHandle.style.cursor = 'ns-resize';
        dragHandle.style.display = 'flex';
        dragHandle.style.justifyContent = 'center';
        dragHandle.style.alignItems = 'center';

        // Add elements to voice bar
        voiceBar.appendChild(speakButton);
        voiceBar.appendChild(recordButton);
        voiceBar.appendChild(dragHandle);

        // Simply append to the body
        document.body.appendChild(voiceBar);

        console.log('Claude Voice Interface: Interface added to page');

        // Add event listeners
        speakButton.addEventListener('click', toggleSpeaking);
        recordButton.addEventListener('click', toggleRecording);

        // Add drag functionality
        setupDragHandlers(voiceBar, dragHandle);

        // Initialize speech recognition if available
        initializeSpeechRecognition();

        // Start monitoring for new messages if auto-read is enabled
        if (config.autoReadNewMessages) {
            startMessageMonitoring();
        }
    }

    function initializeSpeechRecognition() {
        // Check if browser supports speech recognition
        if ('webkitSpeechRecognition' in window || 'SpeechRecognition' in window) {
            const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
            recognition = new SpeechRecognition();
            recognition.continuous = true;

            recognition.onend = function (e) {
                if (isRecording) {
                    recognition.start();
                }
            }

            recognition.onresult = function (e) {
                let results = e.results

                let str = results[results.length - 1][0].transcript;

                if (str[0] === ' ') {
                    str =  ' ' + str[1].toUpperCase() + str.slice(2) + '. ';
                } else {
                    str = str[0].toUpperCase() + str.slice(1) + '. ';
                }

                document.querySelector('.ProseMirror').innerText += str;
            }

            recognition.onerror = function(event) {
                console.error('Recognition error:', event.error);
            };

            // Enable the record button
            const recordButton = document.getElementById('claude-record-button');
            if (recordButton) {
                recordButton.disabled = false;
                recordButton.style.opacity = '1';
            }
        } else {
            console.warn('Speech Recognition not supported in this browser');
        }
    }

    function setupDragHandlers(voiceBar, dragHandle) {
        // Mouse events for desktop
        dragHandle.addEventListener('mousedown', function(e) {
            e.preventDefault();
            startDrag(e.clientY);
        });

        document.addEventListener('mousemove', function(e) {
            if (isDragging) {
                doDrag(e.clientY);
            }
        });

        document.addEventListener('mouseup', function() {
            endDrag();
        });

        // Touch events for mobile
        dragHandle.addEventListener('touchstart', function(e) {
            e.preventDefault();
            startDrag(e.touches[0].clientY);
        });

        document.addEventListener('touchmove', function(e) {
            if (isDragging) {
                doDrag(e.touches[0].clientY);
            }
        });

        document.addEventListener('touchend', function() {
            endDrag();
        });

        function startDrag(clientY) {
            isDragging = true;
            startY = clientY;
            startBottom = parseInt(voiceBar.style.bottom || '0');
        }

        function doDrag(clientY) {
            const deltaY = startY - clientY;
            const newBottom = Math.max(0, startBottom + deltaY);
            voiceBar.style.bottom = `${newBottom}px`;
        }

        function endDrag() {
            if (isDragging) {
                isDragging = false;
                // Save position to localStorage
                localStorage.setItem('claudeVoiceBarPosition', voiceBar.style.bottom);
            }
        }
    }

    function startMessageMonitoring() {
        console.log('Claude Voice Interface: Starting message monitoring');

        // Store the current last message ID
        const currentLastMessage = findLatestClaudeMessage();
        if (currentLastMessage) {
            lastMessageId = getMessageIdentifier(currentLastMessage);
            console.log('Claude Voice Interface: Initial message ID:', lastMessageId);
        }

        // Start checking for new messages
        checkIntervalId = setInterval(checkForNewMessages, config.checkInterval);
    }

    function checkForNewMessages() {
        if (processingMessage) return; // Don't check for new messages if we're already processing one

        const latestMessage = findLatestClaudeMessage();
        if (!latestMessage) return;

        //Character count + the inner text to track if it's finished generating
        const currentId = getMessageIdentifier(latestMessage);

        // If this is a new message and we're not currently speaking
        if (currentId !== lastMessageId && !isSpeaking) {
            console.log('Claude Voice Interface: New message detected');
            lastMessageId = currentId;

            // Check if the message has finished generating (no loading indicators)
            const isLoading = latestMessage.dataset.isStreaming === 'true'
            console.log(isLoading, 'isLoading');

            if (!isLoading) {
                processingMessage = true;

                // Delay reading to make sure the message is complete
                console.log('Claude Voice Interface: Waiting for message to complete...');
                setTimeout(() => {
                    console.log('Claude Voice Interface: Reading new message');
                    readMessage(latestMessage);
                    processingMessage = false;
                }, config.readingDelay);
            }
        }
    }

    function getMessageIdentifier(element) {
        // Create a more unique identifier for the message
        // Using text content length, first 20 chars, and position
        const content = element.textContent.trim();
        const contentStart = content.substring(0, 20);
        const contentLength = content.length;
        const position = Array.from(element.parentNode.children).indexOf(element);
        return `${contentLength}-${contentStart}-${position}`;
    }

    function toggleSpeaking() {
        const speakButton = document.getElementById('claude-speak-button');

        // Always cancel existing speech first to avoid buggy behavior
        window.speechSynthesis.cancel();

        if (!isSpeaking) {
            console.log('Claude Voice Interface: Starting speech');
            // Start reading the last message
            const message = findLatestClaudeMessage();
            if (message) {
                readMessage(message);
            }
            return;
        } else {
            console.log('Claude Voice Interface: Stopping speech');
            // Instead of pausing, we'll cancel and remember we want to resume
            resetSpeechState();
            speakButton.innerHTML = `${createPlayIcon()} <span style="margin-left: 8px;">RESUME</span>`;
            return;
        }
    }

    function readMessage(message) {
        if (!message) {
            console.log('Claude Voice Interface: No message to read');
            return;
        }

        currentMessageElement = message;

        // Prepare the text to be read
        if (!message.textContent.trim()) {
            console.log('Claude Voice Interface: Message has no text content');
            return;
        }

        let segments = [];

        // Recursively process the DOM to wrap words for highlighting
        function traverse(node) {
            // Skip script, style, and other non-content elements
            if (node.nodeType === Node.ELEMENT_NODE) {
                const tagName = node.tagName.toLowerCase();
                if (['script', 'style', 'noscript', 'svg', 'canvas', 'button', 'text-reader'].includes(tagName)) {
                    return;
                }

                // Process this element's children recursively
                // Clone childNodes as the tree will be modified during traversal
                const children = Array.from(node.childNodes);
                for (let child of children) {traverse(child);}
                return;
            }

            // Process text nodes that have content
            if (node.nodeType === Node.TEXT_NODE) {
                const text = node.textContent.trim();
                if (text === '') return;

                // Split into words
                const words = node.textContent.split(' ');

                // Create wrapper elements for each word, returning the space we removed in the split if not the last element
                const wrappers = [];
                for (const [i, word] of words.entries()) {
                    const wrapper = document.createElement('text-reader');
                    if (i < words.length - 1) {
                        wrapper.textContent = word + ' ';
                    } else {
                        wrapper.textContent = word;
                    }
                    wrapper.setAttribute('data-reader-segment', 'true');
                    wrappers.push(wrapper);
                    segments.push(wrapper);
                }

                // Replace the text node with our wrapped sentences
                const fragment = document.createDocumentFragment();
                for (let w of wrappers) {fragment.appendChild(w)}
                wrappers.forEach(w => fragment.appendChild(w));
                node.parentNode.replaceChild(fragment, node);
            }
        }

        // Start the traversal
        traverse(message);

        //Collect all the readable fragments and create a map to track the current word location
        let text = '',
            characterMap = {},
            wordElements = Array.from(message.querySelectorAll('text-reader'));

        for (let wordEl of wordElements) {
            //Create a map of every character location from the previous max to our new max after appending the text together
            let oldLength = text.length;
            text += wordEl.innerText;
            let newLength = text.length;
            for (let i = oldLength; i < newLength; i++) {
                characterMap[i] = wordEl;
            }
        }

        // Cancel any ongoing speech first
        window.speechSynthesis.cancel();

        // Create new utterance
        const utterance = new SpeechSynthesisUtterance(text);
        utterance.rate = config.readRateMultiplier;
        /*let voice = speechSynthesis.getVoices().find(voice => {return voice.voiceURI === localStorage['claude-text-reader-voice'];})
        if (voice) {
            utterance.voice = voice;
        }*/

        // Set up events
        utterance.onboundary = function (e) {
            try {
                console.log(e.name, e.charIndex, characterMap[e.charIndex].classList, characterMap[e.charIndex]);
                let el = characterMap[e.charIndex];
                if (!el.classList.contains('active-highlight')) {
                    //Remove the class from all the other elements
                    for (let oldEl of Array.from(message.querySelectorAll('.active-highlight'))) {
                        oldEl.classList.remove('active-highlight');
                    }
                    //Add highlight class to the new element
                    el.classList.add('active-highlight');
                }

                characterMap[e.charIndex].addClass
            } catch (e) {console.log('error!');}
        }

        utterance.onstart = function() {
            console.log('Claude Voice Interface: Speech started');
            isSpeaking = true;
            const speakButton = document.getElementById('claude-speak-button');
            speakButton.innerHTML = `${createPauseIcon()} <span style="margin-left: 8px;">PAUSE</span>`;
            speakButton.style.backgroundColor = config.activeBackgroundColor;
            speakButton.style.color = config.activeTextColor;
            speakButton.style.fontWeight = 'bold';
        };

        utterance.onend = function() {
            console.log('Claude Voice Interface: Speech ended');
            resetSpeechState();
        };

        utterance.onerror = function(event) {
            console.error('Claude Voice Interface: Speech synthesis error:', event.error);
            resetSpeechState();
        };

        // Start speaking - slight delay to try and get the boundary event to fire consistently
        setTimeout(() => {
            window.speechSynthesis.speak(utterance);
        }, 50);
    }

    function findLatestClaudeMessage() {
        try {
			return document.querySelectorAll('div[data-test-render-count]')[Array.from(document.querySelectorAll('div[data-test-render-count]')).length - 1].querySelector('div[data-is-streaming]')
		} catch (e) {
			return null;
		}
    }

    function resetSpeechState() {
        // Cancel any ongoing speech
        window.speechSynthesis.cancel();

        // Reset speech state
        isSpeaking = false;

        // Reset button appearance
        const speakButton = document.getElementById('claude-speak-button');
        speakButton.innerHTML = `${createPlayIcon()} <span style="margin-left: 8px;">READ</span>`;
        speakButton.style.backgroundColor = config.inactiveBackgroundColor;
        speakButton.style.color = config.inactiveTextColor;
        speakButton.style.fontWeight = 'normal';
    }

    function toggleRecording() {
        if (!recognition) {
            console.warn('Speech Recognition not initialized');
            alert('Speech Recognition is not supported in this browser');
            return;
        }

        const recordButton = document.getElementById('claude-record-button');

        if (!isRecording) {
            // Start recording
            isRecording = true;

            try {
                recognition.start();

                recordButton.innerHTML = `${createFilledCircleIcon()} <span style="margin-left: 8px;">LISTENING</span>`;
                recordButton.style.backgroundColor = config.activeBackgroundColor;
                recordButton.style.color = config.activeTextColor;
                recordButton.style.fontWeight = 'bold';

                console.log('Claude Voice Interface: Started listening');
            } catch (e) {
                console.error('Error starting recognition:', e);
                isRecording = false;
                alert('Error starting speech recognition: ' + e.message);
            }
        } else {
            // Stop recording
            isRecording = false;

            try {
                recognition.stop();
            } catch (e) {
                console.error('Error stopping recognition:', e);
            }

            recordButton.innerHTML = `${createEmptyCircleIcon()} <span style="margin-left: 8px;">LISTEN</span>`;
            recordButton.style.backgroundColor = config.inactiveBackgroundColor;
            recordButton.style.color = config.inactiveTextColor;
            recordButton.style.fontWeight = 'normal';

            //And try and click the submit button
            document.querySelector('button svg path[d="M208.49,120.49a12,12,0,0,1-17,0L140,69V216a12,12,0,0,1-24,0V69L64.49,120.49a12,12,0,0,1-17-17l72-72a12,12,0,0,1,17,0l72,72A12,12,0,0,1,208.49,120.49Z"]').parentElement.parentElement.click()
        }
    }

    function insertTextToInput(text) {
        if (!text.trim()) return;

        // First, try the specific selector for Claude's input field
        let inputField = document.querySelector('div.ProseMirror[contenteditable="true"]');

        if (!inputField) {
            console.log('Could not find Claude input with ProseMirror class, trying alternative selectors');

            // If not found, try backup selectors
            const potentialSelectors = [
                'textarea',
                '[role="textbox"]',
                '[contenteditable="true"]',
                'input[type="text"]',
                '.chat-input',
                '[class*="promptTextarea"]',
                '[class*="TextInput"]'
            ];

            for (const selector of potentialSelectors) {
                const elements = document.querySelectorAll(selector);
                for (const element of elements) {
                    // Skip hidden elements or elements not visible in the viewport
                    if (element.offsetParent === null) continue;

                    // Check if this is likely to be the chat input
                    const placeholder = element.getAttribute('placeholder') || '';
                    const aria = element.getAttribute('aria-label') || '';
                    const classes = element.className || '';

                    if (placeholder.includes('message') ||
                        placeholder.includes('Message') ||
                        aria.includes('message') ||
                        aria.includes('Message') ||
                        classes.includes('message') ||
                        classes.includes('Message') ||
                        classes.includes('input') ||
                        classes.includes('Input')) {
                        inputField = element;
                        break;
                    }
                }
                if (inputField) break;
            }
        } else {
            console.log('Found Claude input with ProseMirror class');
        }

        if (inputField) {
            console.log('Claude Voice Interface: Found input field, inserting text');

            // For textarea or input elements
            if (inputField.tagName === 'TEXTAREA' || inputField.tagName === 'INPUT') {
                inputField.value = text;

                // Trigger multiple events to ensure the UI updates
                inputField.dispatchEvent(new Event('input', { bubbles: true }));
                inputField.dispatchEvent(new Event('change', { bubbles: true }));
                inputField.focus();
            }
            // For contenteditable divs (including ProseMirror)
            else if (inputField.getAttribute('contenteditable') === 'true') {
                // Focus the element first
                inputField.focus();

                // Clear existing content
                inputField.innerHTML = '';

                // Insert text properly for contenteditable
                // Using document.execCommand for better compatibility with contenteditable
                document.execCommand('insertText', false, text);

                // Backup - use innerHTML if document.execCommand didn't work
                if (!inputField.textContent) {
                    inputField.innerHTML = text;
                }

                // Fire several events to make sure Claude's UI notices the change
                inputField.dispatchEvent(new Event('input', { bubbles: true }));
                inputField.dispatchEvent(new Event('change', { bubbles: true }));

                // Additional event for React-based UIs
                const customEvent = new CustomEvent('input-change', { bubbles: true, detail: { text } });
                inputField.dispatchEvent(customEvent);
            }

            console.log('Text inserted into input field');
        } else {
            console.warn('Claude Voice Interface: Could not find input field');
            alert('Could not find input field to insert text. Check console for details.');
            console.error('Failed to find any matching input field. Text was:', text);
        }
    }

    function resetRecordingState() {
        // Stop recognition if active
        if (recognition && isRecording) {
            try {
                recognition.stop();
            } catch (e) {
                console.error('Error stopping recognition during reset:', e);
            }
        }

        // Reset recording state
        isRecording = false;
        recognizedText = '';
        collectedSpeechSegments = [];

        // Clear any pending timeouts
        if (listeningTimeoutId) {
            clearTimeout(listeningTimeoutId);
            listeningTimeoutId = null;
        }

        // Reset button appearance
        const recordButton = document.getElementById('claude-record-button');
        if (recordButton) {
            recordButton.innerHTML = `${createEmptyCircleIcon()} <span style="margin-left: 8px;">LISTEN</span>`;
            recordButton.style.backgroundColor = config.inactiveBackgroundColor;
            recordButton.style.color = config.inactiveTextColor;
            recordButton.style.fontWeight = 'normal';
        }
    }

    // Icon creation functions
    function createPlayIcon() {
        return `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">
            <path d="M11.596 8.697l-6.363 3.692c-.54.313-1.233-.066-1.233-.697V4.308c0-.63.692-1.01 1.233-.696l6.363 3.692a.802.802 0 0 1 0 1.393z"/>
        </svg>`;
    }

    function createPauseIcon() {
        return `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">
            <path d="M5.5 3.5A1.5 1.5 0 0 1 7 5v6a1.5 1.5 0 0 1-3 0V5a1.5 1.5 0 0 1 1.5-1.5zm5 0A1.5 1.5 0 0 1 12 5v6a1.5 1.5 0 0 1-3 0V5a1.5 1.5 0 0 1 1.5-1.5z"/>
        </svg>`;
    }

    function createEmptyCircleIcon() {
        return `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">
            <circle cx="8" cy="8" r="7" stroke="currentColor" stroke-width="1.5" fill="none"/>
        </svg>`;
    }

    function createFilledCircleIcon() {
        return `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">
            <circle cx="8" cy="8" r="7" fill="currentColor"/>
        </svg>`;
    }

    function createDragIcon() {
        return `<svg viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
			<g id="SVGRepo_bgCarrier" stroke-width="0"></g>
			<g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g>
			<g id="SVGRepo_iconCarrier">
				<path d="M5 9H13H19M5 15H19" stroke="#878787" stroke-width="2" stroke-linecap="round"></path>
			</g>
		</svg>`
    }
})();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址