Text to Speech (TTS) integration for JanitorAI using built-in voices, ElevenLabs TTS, and Gemini TTS with emotion analysis and audio segmentation.
// ==UserScript==
// @name JanitorAI - Text to Speech - Built-in/ElevenLabs/GeminiTTS
// @namespace http://tampermonkey.net/
// @version 3.9.5
// @license MIT
// @description Text to Speech (TTS) integration for JanitorAI using built-in voices, ElevenLabs TTS, and Gemini TTS with emotion analysis and audio segmentation.
// @author Zephyr (xzeph__ on Discord)
// @match https://janitorai.com/chats/*
// @icon https://www.google.com/s2/favicons?sz=64&domain=janitorai.com
// @grant GM_xmlhttpRequest
// @grant GM_addStyle
// @connect api.elevenlabs.io
// @connect generativelanguage.googleapis.com
// ==/UserScript==
// ==========================================================
// SECCIÓN A. UTILIDADES DE AUDIO (globales, antes del userscript)
// ----------------------------------------------------------
// - Inicializa/recupera AudioContext para ElevenLabs
// - Decodifica ArrayBuffer a AudioBuffer
// - Convierte AudioBuffer a WAV Blob
// - Convierte base64 a ArrayBuffer
// - Despacha evento con AudioBuffer decodificado
// - Logs para depurar AudioBuffer
// ==========================================================
// --- ElevenLabs TTS AudioContext and AudioBuffer integration ---
let elevenLabsAudioContext = null;
function getElevenLabsAudioContext() {
if (!elevenLabsAudioContext) {
elevenLabsAudioContext = new (window.AudioContext || window.webkitAudioContext)();
}
return elevenLabsAudioContext;
}
function decodeTTSArrayBuffer(arrayBuffer) {
try {
if (!arrayBuffer || arrayBuffer.byteLength === 0) {
throw new Error("ArrayBuffer is empty or null");
}
const audioContext = getElevenLabsAudioContext();
return audioContext.decodeAudioData(arrayBuffer.slice(0));
} catch (error) {
console.error("❌ Failed to decode ArrayBuffer to AudioBuffer:", error);
throw new Error(`decodeTTSArrayBuffer failed: ${error.message}`);
}
}
function dispatchTTSDecodedAudio(audioBuffer, playbackRate = 1.0, alignment) {
const event = new CustomEvent('ElevenLabsTTSDecodedAudio', {
detail: { audioBuffer, playbackRate, alignment }
});
window.dispatchEvent(event);
}
function logAudioBuffer(audioBuffer) {
if (!(audioBuffer instanceof AudioBuffer)) {
console.error('Provided object is not an AudioBuffer');
return;
}
console.log('AudioBuffer Info:');
console.log('Sample Rate:', audioBuffer.sampleRate);
console.log('Number of Channels:', audioBuffer.numberOfChannels);
console.log('Length (frames):', audioBuffer.length);
console.log('Duration (seconds):', audioBuffer.duration);
for (let i = 0; i < audioBuffer.numberOfChannels; i++) {
console.log(`Channel ${i} Data:`, audioBuffer.getChannelData(i));
}
}
function base64ToArrayBuffer(base64) {
try {
const binaryString = window.atob(base64);
const len = binaryString.length;
const bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
} catch (error) {
console.error("❌ Failed to decode base64 to ArrayBuffer:", error);
throw new Error(`base64ToArrayBuffer failed: ${error.message}`);
}
}
// Convierte un AudioBuffer a Blob WAV (para segmentación/exportación)
function bufferToWave(abuffer) {
try {
if (!abuffer || !(abuffer instanceof AudioBuffer)) {
throw new Error("Invalid AudioBuffer provided");
}
let numOfChan = abuffer.numberOfChannels,
length = abuffer.length * numOfChan * 2 + 44,
buffer = new ArrayBuffer(length),
view = new DataView(buffer),
channels = [], i, sample,
offset = 0,
pos = 0;
// WAVE header
setUint32(0x46464952); // "RIFF"
setUint32(length - 8); // file length - 8
setUint32(0x45564157); // "WAVE"
setUint32(0x20746d66); // "fmt "
setUint32(16); // length = 16
setUint16(1); // PCM
setUint16(numOfChan);
setUint32(abuffer.sampleRate);
setUint32(abuffer.sampleRate * 2 * numOfChan);
setUint16(numOfChan * 2);
setUint16(16); // 16-bit
setUint32(0x61746164); // "data"
setUint32(length - pos - 4);
// Datos intercalados
for (i = 0; i < abuffer.numberOfChannels; i++)
channels.push(abuffer.getChannelData(i));
while (pos < length) {
for (i = 0; i < numOfChan; i++) {
sample = Math.max(-1, Math.min(1, channels[i][offset])); // clamp
sample = (0.5 + sample < 0 ? sample * 32768 : sample * 32767) | 0; // 16-bit
view.setInt16(pos, sample, true);
pos += 2;
}
offset++;
}
return new Blob([buffer], { type: "audio/wav" });
function setUint16(data) { view.setUint16(pos, data, true); pos += 2; }
function setUint32(data) { view.setUint32(pos, data, true); pos += 4; }
} catch (error) {
console.error("❌ Failed to convert AudioBuffer to WAV:", error);
throw new Error(`bufferToWave failed: ${error.message}`);
}
}
// Simple PCM16 mono -> WAV helper for Gemini TTS
function createWavFromPCM(pcmBuffer, rate = 24000, ch = 1, bits = 16) {
const pcmBytes = pcmBuffer.byteLength;
const blockAlign = ch * bits / 8;
const byteRate = rate * blockAlign;
const wav = new ArrayBuffer(44 + pcmBytes);
const view = new DataView(wav);
function writeString(offset, str) {
for (let i = 0; i < str.length; i++) view.setUint8(offset + i, str.charCodeAt(i));
}
writeString(0, 'RIFF');
view.setUint32(4, 36 + pcmBytes, true);
writeString(8, 'WAVE');
writeString(12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, ch, true);
view.setUint32(24, rate, true);
view.setUint32(28, byteRate, true);
view.setUint16(32, blockAlign, true);
view.setUint16(34, bits, true);
writeString(36, 'data');
view.setUint32(40, pcmBytes, true);
new Uint8Array(wav).set(new Uint8Array(pcmBuffer), 44);
return wav;
}
(function () {
"use strict";
// ==========================================================
// SECCIÓN 0. ESTADO GLOBAL, RESET DE AJUSTES Y CONSTANTES
// ----------------------------------------------------------
// - Resetea formato viejo de settings si aplica
// - Define selectores y flags comunes
// ==========================================================
// Reset de settings legacy
try {
const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
if (settings.hasOwnProperty('charVoice') || settings.hasOwnProperty('userVoice')) {
console.log('TTS Userscript: Detected old voice setting format. Resetting to defaults.');
localStorage.removeItem("ttsSettings");
}
} catch (e) {
console.error("TTS Userscript: Could not parse settings, resetting to default.", e);
localStorage.removeItem("ttsSettings");
}
// Selectores de chat/control
const CHAT_CONTAINER_SELECTOR = '[class^="_messagesMain_"]';
const MESSAGE_CONTAINER_SELECTOR = '[data-testid="virtuoso-item-list"] > div[data-index]';
const BOT_NAME_ICON_SELECTOR = '[class^="_nameIcon_"]';
const LAST_MESSAGE_SWIPE_CONTAINER_SELECTOR = '[class^="_botChoicesContainer_"]';
const SWIPE_SLIDER_SELECTOR = '[class^="_botChoicesSlider_"]';
const MESSAGE_WRAPPER_SELECTOR = 'li[class^="_messageDisplayWrapper_"]';
const MESSAGE_BODY_SELECTOR = '[class^="_messageBody_"]';
const NAME_CONTAINER_SELECTOR = '[class^="_nameContainer_"]';
const EDIT_PANEL_SELECTOR = '[class^="_editPanel_"]';
const CONTROL_PANEL_SELECTOR = '[class^="_controlPanel_"]';
const BOT_NAME_SELECTOR = '[class^="_nameText_"]';
// Estado de último log y comunicación con Live2D
let lastLoggedText = "";
let lastLoggedStatus = "";
let lastLoggedSwipeIndex = -1;
let lastLoggedMessageIndex = -1;
let live2dScriptDetected = false;
// ==========================================================
// SECCIÓN 0.5. CONSTANTES Y FUNCIONES DE GEMINI
// ----------------------------------------------------------
// - API key y endpoint de Gemini
// - Lista de emociones soportadas
// - Función para analizar texto con Gemini
// - Función para calcular tiempos de segmentos según emoción
// ==========================================================
const GEMINI_API_KEY = "YOUR_GEMINI_API_KEY"; // Reemplaza con tu clave real
const GEMINI_MODEL = "gemini-2.5-flash";
const GEMINI_ENDPOINT = `https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;
const EMOTION_LIST = [
"Admiration", "Amusement", "Anger", "Annoyance", "Approval", "Caring", "Confusion",
"Curiosity", "Desire", "Disappointment", "Disapproval", "Disgust", "Embarrassment",
"Excitement", "Fear", "Gratitude", "Joy", "Love", "Nervousness", "Neutral",
"Optimism", "Pride", "Realization", "Relief", "Remorse", "Sadness", "Surprise"
];
async function analyzeTextWithGemini(text) {
console.log("🤖 Sending text to Gemini for emotion and action analysis...");
// Construir lista de acciones disponibles para el prompt
const actionsListText = availableActions.length > 0
? availableActions.join(", ")
: "No actions available";
const prompt = `You are a text analyzer that MUST preserve EVERY SINGLE CHARACTER from the input.
ABSOLUTE REQUIREMENTS - FAILURE TO COMPLY WILL BREAK THE SYSTEM:
1. CHARACTER PRESERVATION (CRITICAL):
✓ Keep EVERY letter (A-Z, a-z)
✓ Keep EVERY number (0-9)
✓ Keep EVERY punctuation mark (. , ! ? ; : - ' " etc.)
✓ Keep EVERY space character (leading, trailing, between words)
✓ Keep EVERY special symbol (@ # $ % & * + = etc.)
✓ Keep EVERY bracket/parenthesis ( ) [ ] { }
✓ Keep EVERY newline character (\\n)
✓ Keep EVERY asterisk (*) for actions
✓ Keep EVERY quotation mark (" ')
✓ Do NOT add, remove, or modify ANY character
✓ Do NOT trim whitespace
✓ Do NOT normalize text
✓ Do NOT fix typos or grammar
2. SEGMENT SPLITTING RULES:
✓ Split text ONLY when there is a SIGNIFICANT change in emotion or a physical action occurs
✓ BE REALISTIC: Most dialogue doesn't need many segments - only split when truly necessary
✓ MERGE CONSECUTIVE SEGMENTS: If two or more segments have the SAME emotion AND the SAME action (or both null), MERGE them into ONE segment
✓ Each segment's "text" must contain a COMPLETE, UNBROKEN portion of the original
✓ DO NOT cut words in half
✓ DO NOT break sentences mid-word
✓ Include complete phrases with their surrounding spaces
✓ When concatenating all segment "text" fields, the result MUST be IDENTICAL to the input
✓ QUALITY OVER QUANTITY: Fewer, meaningful segments are better than many unnecessary ones
3. MERGING EXAMPLES:
❌ BAD (too many segments):
[
{"emotion": "Joy", "action": null, "text": "Hi there! "},
{"emotion": "Joy", "action": null, "text": "I'm so happy to see you! "},
{"emotion": "Joy", "action": null, "text": "How are you?"}
]
✅ GOOD (merged):
[
{"emotion": "Joy", "action": null, "text": "Hi there! I'm so happy to see you! How are you?"}
]
❌ BAD (unnecessary split):
[
{"emotion": "Neutral", "action": null, "text": "I went to the "},
{"emotion": "Neutral", "action": null, "text": "store yesterday."}
]
✅ GOOD (kept together):
[
{"emotion": "Neutral", "action": null, "text": "I went to the store yesterday."}
]
4. VALIDATION CHECK:
Before responding, verify: input_text == segment[0].text + segment[1].text + ... + segment[n].text
If they don't match EXACTLY, you have failed.
RESPONSE FORMAT (JSON only, no other text):
[
{"emotion": "EmotionName", "action": "ActionName or null", "text": "exact text from input"},
...
]
EMOTION LIST (choose from): ${EMOTION_LIST.join(", ")}
ACTION LIST (choose from or use null): ${actionsListText}
EXAMPLE 1 (Simple, one segment):
Input: "Hi there! How are you today?"
Output:
[
{"emotion": "Joy", "action": null, "text": "Hi there! How are you today?"}
]
EXAMPLE 2 (With action):
Input: "Hi there! *waves enthusiastically* How are you today?"
Output:
[
{"emotion": "Joy", "action": null, "text": "Hi there! "},
{"emotion": "Joy", "action": "Wave hand", "text": "*waves enthusiastically* "},
{"emotion": "Curiosity", "action": null, "text": "How are you today?"}
]
EXAMPLE 3 (Emotion change):
Input: "I'm so happy! Wait... what's that noise? Oh no!"
Output:
[
{"emotion": "Joy", "action": null, "text": "I'm so happy! "},
{"emotion": "Curiosity", "action": null, "text": "Wait... what's that noise? "},
{"emotion": "Fear", "action": null, "text": "Oh no!"}
]
IMPORTANT NOTES:
- Notice that ALL spaces, punctuation, and characters are preserved EXACTLY
- Each segment text is COMPLETE and UNBROKEN
- Segments are MERGED when they share the same emotion and action
- Only split when there's a REAL, SIGNIFICANT change
- Concatenating all segments MUST equal the original input EXACTLY
INPUT TEXT TO ANALYZE:
"${text.replace(/"/g, '\\"').replace(/\n/g, '\\n')}"`;
const payload = {
contents: [{ parts: [{ text: prompt }] }],
generationConfig: { response_mime_type: "application/json" },
};
const response = await fetch(GEMINI_ENDPOINT, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(payload),
});
if (!response.ok) {
const errorBody = await response.text();
console.error("❌ Gemini API Error Body:", errorBody);
throw new Error(`Gemini API request failed: ${response.status} ${response.statusText}`);
}
const data = await response.json();
console.log("✓ Received response from Gemini:", data);
let emotionQueue;
try {
const responseText = data.candidates?.[0]?.content?.parts?.[0]?.text;
if (!responseText) {
throw new Error("No response text from Gemini");
}
emotionQueue = JSON.parse(responseText);
} catch (parseError) {
console.error("❌ Failed to parse Gemini response:", parseError);
console.warn("⚠️ Defaulting to single neutral segment.");
return [{ emotion: "Neutral", action: null, text: text }];
}
if (!Array.isArray(emotionQueue) || emotionQueue.length === 0) {
console.warn("⚠️ Gemini did not return a valid emotion queue. Defaulting to a single neutral segment.");
return [{ emotion: "Neutral", action: null, text: text }];
}
// Strict validation: ALL characters must be preserved
const reconstructedText = emotionQueue.map(seg => seg.text || '').join('');
if (reconstructedText !== text) {
console.error("❌ CRITICAL VALIDATION FAILURE:");
console.error(" Original length:", text.length);
console.error(" Reconstructed length:", reconstructedText.length);
console.error(" Character difference:", Math.abs(text.length - reconstructedText.length));
// Show character-by-character comparison for debugging
const maxLen = Math.max(text.length, reconstructedText.length);
let firstDiffIndex = -1;
for (let i = 0; i < maxLen; i++) {
if (text[i] !== reconstructedText[i]) {
firstDiffIndex = i;
break;
}
}
if (firstDiffIndex !== -1) {
console.error(" First difference at index:", firstDiffIndex);
console.error(" Expected char:", JSON.stringify(text[firstDiffIndex]));
console.error(" Got char:", JSON.stringify(reconstructedText[firstDiffIndex]));
const contextStart = Math.max(0, firstDiffIndex - 20);
const contextEnd = Math.min(text.length, firstDiffIndex + 20);
console.error(" Context (original):", JSON.stringify(text.substring(contextStart, contextEnd)));
console.error(" Context (reconstructed):", JSON.stringify(reconstructedText.substring(contextStart, contextEnd)));
}
console.warn("⚠️ Falling back to single neutral segment to ensure accuracy.");
return [{ emotion: "Neutral", action: null, text: text }];
}
// Additional validation: check each segment has text
const invalidSegments = emotionQueue.filter(seg => !seg.text || typeof seg.text !== 'string');
if (invalidSegments.length > 0) {
console.error("❌ Found segments with invalid text fields:", invalidSegments);
console.warn("⚠️ Falling back to single neutral segment.");
return [{ emotion: "Neutral", action: null, text: text }];
}
console.log("✅ Gemini response VALIDATED - all characters preserved!");
console.log(`📊 Emotion segments: ${emotionQueue.length}`);
emotionQueue.forEach((seg, i) => {
const actionText = seg.action ? ` [Action: ${seg.action}]` : '';
const preview = seg.text.length > 40 ? seg.text.substring(0, 40) + '...' : seg.text;
console.log(` ${i + 1}. [${seg.emotion}]${actionText} "${preview}" (${seg.text.length} chars)`);
});
return emotionQueue;
}
function calculateSegmentEndTimes(alignment, segments) {
console.log("--- Calculating Segment End Times (Character-Accurate) ---");
const { characters, character_start_times_seconds, character_end_times_seconds } = alignment;
const alignmentText = characters.join('');
const segmentsText = segments.map(s => s.text || '').join('');
console.log("Alignment text length:", alignmentText.length);
console.log("Segments text length:", segmentsText.length);
// CRITICAL: Verify exact character match
if (segmentsText !== alignmentText) {
console.error("❌ CRITICAL ERROR: Segments text does not match alignment text!");
console.error(" This means character preservation failed somewhere in the pipeline.");
console.error(" Alignment text:", JSON.stringify(alignmentText.substring(0, 100)));
console.error(" Segments text:", JSON.stringify(segmentsText.substring(0, 100)));
// Find first mismatch
for (let i = 0; i < Math.max(alignmentText.length, segmentsText.length); i++) {
if (alignmentText[i] !== segmentsText[i]) {
console.error(` First mismatch at index ${i}:`);
console.error(` Expected: ${JSON.stringify(alignmentText[i])}`);
console.error(` Got: ${JSON.stringify(segmentsText[i])}`);
break;
}
}
return [];
}
console.log("✅ Text validation passed - segments match alignment exactly!");
let currentCharIndex = 0;
const segmentTimings = [];
segments.forEach((segment, i) => {
const segmentLength = segment.text.length;
if (segmentLength === 0) {
console.warn(`⚠️ Segment ${i + 1} has zero length, skipping`);
return;
}
const startCharIndex = currentCharIndex;
const endCharIndex = currentCharIndex + segmentLength - 1;
// Safety check: ensure indices are within bounds
if (startCharIndex >= character_start_times_seconds.length ||
endCharIndex >= character_end_times_seconds.length) {
console.error(`❌ Segment ${i + 1} indices out of bounds!`);
console.error(` Start index: ${startCharIndex}, End index: ${endCharIndex}`);
console.error(` Available indices: 0-${character_start_times_seconds.length - 1}`);
return;
}
const startTime = character_start_times_seconds[startCharIndex];
const endTime = character_end_times_seconds[endCharIndex];
const actionText = segment.action ? ` [Action: ${segment.action}]` : '';
const textPreview = segment.text.length > 50 ? segment.text.substring(0, 50) + '...' : segment.text;
console.log(`Segment ${i + 1} [${segment.emotion}]${actionText}:`);
console.log(` Text: "${textPreview}" (${segmentLength} chars)`);
console.log(` Char indices: ${startCharIndex} → ${endCharIndex}`);
console.log(` Time range: ${startTime.toFixed(3)}s → ${endTime.toFixed(3)}s (${(endTime - startTime).toFixed(3)}s duration)`);
segmentTimings.push({
emotion: segment.emotion,
action: segment.action || null,
text: segment.text,
startTime: startTime,
endTime: endTime,
duration: endTime - startTime
});
currentCharIndex += segmentLength;
});
// Final validation
if (currentCharIndex !== alignmentText.length) {
console.error(`❌ Character counting error! Processed ${currentCharIndex} chars but expected ${alignmentText.length}`);
} else {
console.log(`✅ Segment timings calculated successfully for ${segmentTimings.length} segments`);
console.log(`✅ Total characters processed: ${currentCharIndex}`);
}
return segmentTimings;
}
async function splitAudioByTimestamps(audioBuffer, segmentTimings) {
console.log('--- Splitting Audio by Emotion Timestamps ---');
console.log(`Audio buffer: ${audioBuffer.duration.toFixed(3)}s, ${audioBuffer.numberOfChannels} channels, ${audioBuffer.sampleRate}Hz`);
const blobs = [];
for (let i = 0; i < segmentTimings.length; i++) {
const timing = segmentTimings[i];
const startOffset = timing.startTime;
const endOffset = timing.endTime;
// Calculate frame positions with proper rounding
const startFrame = Math.floor(startOffset * audioBuffer.sampleRate);
const endFrame = Math.ceil(endOffset * audioBuffer.sampleRate);
const frameCount = endFrame - startFrame;
const actionText = timing.action ? ` [Action: ${timing.action}]` : '';
const textPreview = timing.text.length > 30 ? timing.text.substring(0, 30) + '...' : timing.text;
console.log(`Segment ${i + 1}/${segmentTimings.length} [${timing.emotion}]${actionText}:`);
console.log(` Text: "${textPreview}"`);
console.log(` Time: ${startOffset.toFixed(3)}s → ${endOffset.toFixed(3)}s (${timing.duration.toFixed(3)}s)`);
console.log(` Frames: ${startFrame} → ${endFrame} (${frameCount} frames)`);
if (frameCount <= 0) {
console.error(` ❌ Invalid frame count (${frameCount}), skipping segment`);
continue;
}
if (startFrame >= audioBuffer.length) {
console.error(` ❌ Start frame ${startFrame} exceeds buffer length ${audioBuffer.length}, skipping`);
continue;
}
// Clamp end frame to buffer length
const actualEndFrame = Math.min(endFrame, audioBuffer.length);
const actualFrameCount = actualEndFrame - startFrame;
if (actualFrameCount <= 0) {
console.error(` ❌ Actual frame count after clamping is ${actualFrameCount}, skipping`);
continue;
}
try {
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const partBuffer = audioContext.createBuffer(
audioBuffer.numberOfChannels,
actualFrameCount,
audioBuffer.sampleRate
);
// Copy audio data for each channel with bounds checking
for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
const sourceData = audioBuffer.getChannelData(channel);
const targetData = partBuffer.getChannelData(channel);
for (let j = 0; j < actualFrameCount; j++) {
const sourceIndex = startFrame + j;
if (sourceIndex < sourceData.length) {
targetData[j] = sourceData[sourceIndex];
} else {
targetData[j] = 0; // Silence if we exceed source bounds
}
}
}
const wavBlob = bufferToWave(partBuffer);
blobs.push(wavBlob);
console.log(` ✅ Created WAV segment: ${(wavBlob.size / 1024).toFixed(2)} KB`);
} catch (error) {
console.error(` ❌ Error creating segment ${i + 1}:`, error);
continue;
}
}
console.log(`✅ Successfully created ${blobs.length}/${segmentTimings.length} emotion-based audio segments`);
return blobs;
}
// ==========================================================
// SECCIÓN 1. EVENTOS DE INTEGRACIÓN Live2D
// ----------------------------------------------------------
// - Live2DScriptReady: saber si Live2D está activo
// - TTSScriptReady: informar a Live2D que TTS está listo
// ==========================================================
// Informar a Live2D que el script TTS está presente y listo
console.log("[TTS] TTS Script initialized. Dispatching 'TTSScriptReady' event...");
const ttsReadyEvent = new CustomEvent('TTSScriptReady', {
detail: {
version: '3.9.1',
capabilities: {
emotionAnalysis: true,
segmentedAudio: true,
elevenLabs: true,
builtInVoices: true
}
}
});
window.dispatchEvent(ttsReadyEvent);
console.log("[TTS] 📢 'TTSScriptReady' event dispatched to Live2D script");
// Live2D indica que está listo
window.addEventListener("Live2DScriptReady", function () {
if (!live2dScriptDetected) {
live2dScriptDetected = true;
console.log("[TTS] Live2D script detected. TTS will NOT play audio directly when Live2D is active.");
}
});
// Variable global para almacenar las acciones disponibles
let availableActions = [];
// Listener para recibir las acciones disponibles desde Live2D
window.addEventListener("Live2DActionsReady", function (event) {
const { emotions, actions, modelName } = event.detail;
availableActions = actions || [];
console.log(`[TTS] Received actions list from Live2D model "${modelName}":`, availableActions);
console.log(`[TTS] Emotions available:`, emotions.length);
});
// ==========================================================
// SECCIÓN 2. DETECCIÓN DE MENSAJES DEL BOT/USUARIO
// ----------------------------------------------------------
// - Extrae el último mensaje terminado y dispara TTS si procede
// - Ofrece utilidades para formatear texto a leer
// ==========================================================
// Extrae y procesa el último mensaje del bot ya finalizado
function logMessageStatus() {
const allMessageNodes = document.querySelectorAll(MESSAGE_CONTAINER_SELECTOR);
if (allMessageNodes.length === 0) return;
// Encuentra el último del bot finalizado
let lastBotMessageContainer = null;
let activeMessageNode = null;
let activeSwipeIndex = 0;
let messageIndex = -1;
for (let i = allMessageNodes.length - 1; i >= 0; i--) {
const node = allMessageNodes[i];
if (node.querySelector(BOT_NAME_ICON_SELECTOR)) {
let candidateNode;
const swipeContainer = node.querySelector(LAST_MESSAGE_SWIPE_CONTAINER_SELECTOR);
if (swipeContainer) {
const slider = swipeContainer.querySelector(SWIPE_SLIDER_SELECTOR);
if (!slider) continue;
const transform = slider.style.transform;
const translateX = transform ? parseFloat(transform.match(/translateX\(([-0-9.]+)%\)/)?.[1] || "0") : 0;
activeSwipeIndex = Math.round(Math.abs(translateX) / 100);
const allSwipes = slider.querySelectorAll(MESSAGE_WRAPPER_SELECTOR);
if (allSwipes.length <= activeSwipeIndex) continue;
candidateNode = allSwipes[activeSwipeIndex];
} else {
candidateNode = node.querySelector(MESSAGE_WRAPPER_SELECTOR);
}
if (!candidateNode) continue;
if (candidateNode.querySelector(EDIT_PANEL_SELECTOR)) continue;
if (!candidateNode.querySelector(CONTROL_PANEL_SELECTOR)) continue;
lastBotMessageContainer = node;
activeMessageNode = candidateNode;
messageIndex = parseInt(node.dataset.index, 10);
break;
}
}
if (!activeMessageNode) return;
const messageText = extractFormattedMessageText(activeMessageNode);
const { processed: processedTTS } = processTTSOutput(messageText);
const status = "Finished";
const shouldLog =
status !== lastLoggedStatus ||
activeSwipeIndex !== lastLoggedSwipeIndex ||
messageIndex !== lastLoggedMessageIndex ||
(status !== "Streaming" && messageText !== lastLoggedText);
if (shouldLog) {
lastLoggedStatus = status;
lastLoggedSwipeIndex = activeSwipeIndex;
lastLoggedMessageIndex = messageIndex;
lastLoggedText = messageText;
console.log("📜 Raw extracted text (Auto):");
console.log(messageText);
console.log("\n🎤 Processed TTS (Auto):");
console.log(processedTTS || "[No TTS output]");
console.log("--------------------");
if (processedTTS) {
playTTS(processedTTS, true); // isBot = true
}
}
}
// Versión que detecta último mensaje finalizado (bot o usuario)
function logLastFinishedMessage() {
const allMessageNodes = document.querySelectorAll(MESSAGE_CONTAINER_SELECTOR);
if (allMessageNodes.length === 0) return;
let lastFinishedNode = null;
let messageIndex = -1;
let isBot = false;
for (let i = allMessageNodes.length - 1; i >= 0; i--) {
const node = allMessageNodes[i];
let candidateNode;
if (node.querySelector(BOT_NAME_ICON_SELECTOR)) {
const swipeContainer = node.querySelector(LAST_MESSAGE_SWIPE_CONTAINER_SELECTOR);
if (swipeContainer) {
const slider = swipeContainer.querySelector(SWIPE_SLIDER_SELECTOR);
if (!slider) continue;
const transform = slider.style.transform;
const translateX = transform ? parseFloat(transform.match(/translateX\(([-0-9.]+)%\)/)?.[1] || "0") : 0;
const activeSwipeIndex = Math.round(Math.abs(translateX) / 100);
const allSwipes = slider.querySelectorAll(MESSAGE_WRAPPER_SELECTOR);
if (allSwipes.length <= activeSwipeIndex) continue;
candidateNode = allSwipes[activeSwipeIndex];
} else {
candidateNode = node.querySelector(MESSAGE_WRAPPER_SELECTOR);
}
if (!candidateNode) continue;
if (candidateNode.querySelector(EDIT_PANEL_SELECTOR)) continue;
if (!candidateNode.querySelector(CONTROL_PANEL_SELECTOR)) continue;
lastFinishedNode = candidateNode;
messageIndex = parseInt(node.dataset.index, 10);
isBot = true;
break;
} else {
candidateNode = node.querySelector(MESSAGE_WRAPPER_SELECTOR);
if (!candidateNode) continue;
if (candidateNode.querySelector(EDIT_PANEL_SELECTOR)) continue;
if (!candidateNode.querySelector(CONTROL_PANEL_SELECTOR)) continue;
lastFinishedNode = candidateNode;
messageIndex = parseInt(node.dataset.index, 10);
isBot = false;
break;
}
}
if (!lastFinishedNode) return;
const messageText = extractFormattedMessageText(lastFinishedNode);
const { processed: processedTTS } = processTTSOutput(messageText);
const status = "Finished";
if (
status !== lastLoggedStatus ||
messageIndex !== lastLoggedMessageIndex ||
(status !== "Streaming" && messageText !== lastLoggedText)
) {
lastLoggedStatus = status;
lastLoggedSwipeIndex = -1;
lastLoggedMessageIndex = messageIndex;
lastLoggedText = messageText;
console.log("📜 Raw extracted text (Auto, User+Bot):");
console.log(messageText);
console.log("\n🎤 Processed TTS (Auto, User+Bot):");
console.log(processedTTS || "[No TTS output]");
console.log("--------------------");
if (processedTTS) {
playTTS(processedTTS, isBot);
}
}
}
// Extrae texto formateado a partir del nodo del mensaje (respeta cursivas, etc.)
function extractFormattedMessageText(messageNode) {
// Find the message text container dynamically
// Structure: _messageBody_ > [_nameContainer_, textContainer]
// The text container is a direct child of _messageBody_ that is NOT _nameContainer_
const messageBody = messageNode.querySelector(MESSAGE_BODY_SELECTOR);
if (!messageBody) return "[No text found]";
// Find the text container: it's a div child of messageBody that is not the name container
let textContainer = null;
for (const child of messageBody.children) {
if (child.tagName === 'DIV' && !child.className.match(/_nameContainer_/)) {
// This should be the text container (has dynamic css-XXXXX class)
textContainer = child;
break;
}
}
if (!textContainer) return "[No text found]";
let result = [];
// Find text blocks - they have class 'css-0' or are direct children with content
const blocks = textContainer.querySelectorAll('[class^="css-"]');
blocks.forEach(block => {
const p = block.querySelector('p');
if (p) {
let line = '';
p.childNodes.forEach(child => {
if (child.nodeType === Node.ELEMENT_NODE) {
if (child.tagName === 'EM') line += '_' + child.textContent + '_';
else if (child.tagName === 'STRONG') line += '**' + child.textContent + '**';
else if (child.tagName === 'CODE') line += '`' + child.textContent + '`';
else line += child.textContent;
} else if (child.nodeType === Node.TEXT_NODE) {
line += child.textContent;
}
});
if (line.trim()) result.push(line.trim());
return;
}
const ul = block.querySelector('ul');
if (ul) {
ul.querySelectorAll('li').forEach(li => result.push('• ' + li.textContent.trim()));
return;
}
const code = block.querySelector('code');
if (code && !p) { result.push('`' + code.textContent.trim() + '`'); return; }
if (!block.textContent.trim()) return;
result.push(block.textContent.trim());
});
return result.length ? result.join('\n') : "[No text found]";
}
// Limpia/filtra el texto de entrada según ajustes del usuario
function processTTSOutput(rawText) {
const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
const provider = settings.provider || 'builtin';
const prefix = provider === 'elevenlabs' ? 'elevenlabs_' : provider === 'gemini' ? 'gemini_' : '';
let processed = rawText;
let needsDelay = false;
// Saltar bloques de código
if (settings[`${prefix}tts-skip-codeblocks`]) {
const codeblockRegex = /```[\s\S]*?```/g;
if (codeblockRegex.test(processed)) needsDelay = true;
processed = processed.replace(codeblockRegex, "");
const inlineCodeRegex = /`[^`]*`/g;
if (inlineCodeRegex.test(processed)) needsDelay = true;
processed = processed.replace(inlineCodeRegex, "");
} else {
processed = processed.replace(/```([\s\S]*?)```/g, (m, p1) => p1.trim());
processed = processed.replace(/`([^`]*)`/g, (m, p1) => p1);
}
// Omitir bullets
if (settings[`${prefix}tts-skip-bulletpoints`]) {
const lines = processed.split("\n");
let found = false;
processed = lines.filter(line => {
if (/^\s*([•\-*])\s+/.test(line)) { found = true; return false; }
return true;
}).join("\n");
if (found) needsDelay = true;
}
// Asteriscos y énfasis
if (settings[`${prefix}tts-ignore-asterisks`]) {
let found = false;
processed = processed.replace(/\*\*[^*\n]+\*\*/g, () => { found = true; return ""; });
processed = processed.replace(/\*[^*\n]+\*/g, () => { found = true; return ""; });
processed = processed.replace(/_[^_\n]+_/g, () => { found = true; return ""; });
if (found) needsDelay = true;
} else {
processed = processed.replace(/\*\*([^*\n]+)\*\*/g, (m, p1) => p1);
processed = processed.replace(/\*([^*\n]+)\*/g, (m, p1) => p1);
processed = processed.replace(/_([^_\n]+)_/g, (m, p1) => p1);
}
// Solo narrar comillas dobles
if (settings[`${prefix}tts-only-quotes`]) {
const matches = [];
let match;
const regex = /"([^"]+)"/g;
while ((match = regex.exec(processed)) !== null) matches.push(match[1]);
processed = matches.length > 0 ? matches.join(" ") : "";
}
processed = processed.replace(/\n{2,}/g, "\n").trim();
return { processed, needsDelay };
}
// ==========================================================
// SECCIÓN 3. OBSERVADOR DEL CHAT (activa detección automática)
// ----------------------------------------------------------
// - Observa cambios de DOM y llama a detectores adecuados
// - Auto-narración de usuario si está activa
// ==========================================================
function initializeObserver() {
const container = document.querySelector(CHAT_CONTAINER_SELECTOR);
if (container) {
const observer = new MutationObserver(() => {
const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
const provider = settings.provider || 'builtin';
const prefix = provider === 'elevenlabs' ? 'elevenlabs_' : provider === 'gemini' ? 'gemini_' : '';
const ttsEnabled = !!settings[`${prefix}tts-enabled`];
const autoGen = !!settings[`${prefix}tts-auto-gen`];
const narrateUser = !!settings[`${prefix}tts-narrate-user`];
// Only proceed if TTS is enabled and auto-gen is on
if (ttsEnabled && autoGen) {
// If narrate user is enabled, use logLastFinishedMessage (handles both bot and user)
// Otherwise, use logMessageStatus (handles only bot messages)
if (narrateUser) {
logLastFinishedMessage();
} else {
logMessageStatus();
}
}
});
observer.observe(container, {
childList: true, subtree: true, attributes: true, attributeFilter: ['style'],
});
// Initial check
const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
const provider = settings.provider || 'builtin';
const prefix = provider === 'elevenlabs' ? 'elevenlabs_' : provider === 'gemini' ? 'gemini_' : '';
if (settings[`${prefix}tts-enabled`] && settings[`${prefix}tts-auto-gen`]) {
if (settings[`${prefix}tts-narrate-user`]) {
logLastFinishedMessage();
} else {
logMessageStatus();
}
}
} else {
setTimeout(initializeObserver, 1000);
}
}
if (document.readyState === "loading") {
document.addEventListener("DOMContentLoaded", initializeObserver);
} else {
initializeObserver();
}
// ==========================================================
// SECCIÓN 4. VOCES INTEGRADAS (Web Speech) Y MENÚ DE AJUSTES
// ----------------------------------------------------------
// - Carga de voces integradas y popup de prueba
// - CSS y construcción del modal de ajustes TTS
// - Guardado de settings (Built-in y ElevenLabs)
// ==========================================================
// Voces integradas
let builtinVoices = [];
function loadBuiltinVoices(callback) {
function updateVoices() {
builtinVoices = window.speechSynthesis?.getVoices() || [];
if (typeof callback === "function") callback(builtinVoices);
}
if (!window.speechSynthesis) {
builtinVoices = [];
if (typeof callback === "function") callback([]);
return;
}
window.speechSynthesis.onvoiceschanged = updateVoices;
updateVoices();
}
function showVoicesPopup() {
loadBuiltinVoices(function(voices) {
if (!voices || voices.length === 0) {
alert("No built-in voices available or still loading. Try again in a moment.");
return;
}
let msg = "Available Built-in Voices:\n\n";
voices.forEach((v, i) => { msg += `${i + 1}. ${v.name} (${v.lang})${v.default ? " [default]" : ""}\n`; });
alert(msg);
});
}
// Atajo de teclado temporal: Ctrl+Alt+V para ver voces
window.addEventListener("keydown", function(e) {
if (e.ctrlKey && e.altKey && e.key.toLowerCase() === "v") showVoicesPopup();
});
// CSS del menú TTS - Glassmorphism Style (matching Live2D)
const TTS_MENU_CSS = `
/* === GLASSMORPHISM BASE VARIABLES === */
.tts-modal-overlay {
--glass-bg: rgba(18, 18, 22, 0.78);
--glass-bg-light: rgba(30, 30, 36, 0.7);
--glass-border: rgba(255, 255, 255, 0.08);
--glass-border-hover: rgba(176, 196, 222, 0.4);
--accent-primary: rgba(176, 196, 222, 0.9);
--accent-gradient: linear-gradient(135deg, rgba(176, 196, 222, 0.9), rgba(147, 197, 253, 0.8));
--accent-glow: 0 0 15px rgba(176, 196, 222, 0.4);
--accent-glow-strong: 0 0 20px rgba(176, 196, 222, 0.6), 0 0 40px rgba(176, 196, 222, 0.2);
--text-primary: rgba(255, 255, 255, 0.95);
--text-secondary: rgba(200, 200, 220, 0.8);
--text-muted: rgba(160, 160, 180, 0.7);
--blur-amount: 12px;
--radius-sm: 8px;
--radius-md: 15px;
--radius-lg: 20px;
}
/* === MODAL OVERLAY === */
.tts-modal-overlay {
position: fixed; z-index: 9999; inset: 0;
background: rgba(0, 0, 0, 0.6);
backdrop-filter: blur(4px);
-webkit-backdrop-filter: blur(4px);
display: flex; align-items: center; justify-content: center;
animation: ttsFadeIn 0.2s ease-out;
}
@keyframes ttsFadeIn { from { opacity: 0; } to { opacity: 1; } }
@keyframes ttsSlideUp { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } }
/* === MODAL CONTAINER === */
.tts-modal-container {
background: var(--glass-bg);
backdrop-filter: blur(var(--blur-amount));
-webkit-backdrop-filter: blur(var(--blur-amount));
border-radius: var(--radius-lg);
border: 1px solid var(--glass-border);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4), inset 0 1px 0 rgba(255, 255, 255, 0.05);
min-width: 480px; max-width: 95vw; min-height: 320px; max-height: 90vh; padding: 0;
display: flex; flex-direction: column; font-family: 'Segoe UI', system-ui, sans-serif;
animation: ttsSlideUp 0.3s ease-out;
}
/* === HEADER === */
.tts-modal-header {
display: flex; align-items: center; justify-content: space-between;
padding: 20px 28px 16px 28px;
border-bottom: 1px solid var(--glass-border);
}
.tts-modal-title {
font-size: 1.35rem; font-weight: 600;
background: linear-gradient(135deg, #fff 0%, rgba(176, 196, 222, 1) 100%);
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
background-clip: text; margin: 0;
text-shadow: 0 0 30px rgba(176, 196, 222, 0.3);
}
.tts-modal-close {
background: rgba(255, 255, 255, 0.05);
border: 1px solid var(--glass-border);
color: var(--text-secondary);
font-size: 1.2rem; cursor: pointer;
padding: 8px; border-radius: var(--radius-sm);
transition: all 0.2s ease;
display: flex; align-items: center; justify-content: center;
}
.tts-modal-close:hover {
background: rgba(255, 255, 255, 0.1);
border-color: var(--glass-border-hover);
color: var(--text-primary);
box-shadow: var(--accent-glow);
}
/* === BODY === */
.tts-modal-body {
padding: 24px 28px; display: flex; flex-direction: column; gap: 18px;
overflow-y: auto;
scrollbar-width: thin;
scrollbar-color: rgba(176, 196, 222, 0.3) transparent;
}
.tts-modal-body::-webkit-scrollbar { width: 6px; }
.tts-modal-body::-webkit-scrollbar-track { background: transparent; }
.tts-modal-body::-webkit-scrollbar-thumb { background: rgba(176, 196, 222, 0.3); border-radius: 3px; }
.tts-modal-body::-webkit-scrollbar-thumb:hover { background: rgba(176, 196, 222, 0.5); }
/* === CHECKBOXES === */
.tts-checkbox-list { display: flex; flex-direction: column; gap: 12px; margin-bottom: 8px; }
.tts-checkbox-row {
display: flex; align-items: center; gap: 12px;
padding: 10px 14px;
background: rgba(255, 255, 255, 0.02);
border-radius: var(--radius-sm);
border: 1px solid transparent;
transition: all 0.2s ease;
}
.tts-checkbox-row:hover {
background: rgba(255, 255, 255, 0.05);
border-color: var(--glass-border);
}
.tts-checkbox-row label { color: var(--text-secondary); font-size: 0.95rem; cursor: pointer; }
.tts-checkbox-row input[type="checkbox"],
.tts-checkbox {
appearance: none; -webkit-appearance: none;
width: 20px; height: 20px;
background: rgba(255, 255, 255, 0.05);
border: 2px solid rgba(176, 196, 222, 0.3);
border-radius: 6px;
cursor: pointer;
transition: all 0.2s ease;
position: relative;
flex-shrink: 0;
}
.tts-checkbox-row input[type="checkbox"]:checked,
.tts-checkbox:checked {
background: var(--accent-gradient);
border-color: transparent;
box-shadow: var(--accent-glow);
}
.tts-checkbox-row input[type="checkbox"]:checked::after,
.tts-checkbox:checked::after {
content: '✓';
position: absolute;
top: 50%; left: 50%;
transform: translate(-50%, -50%);
color: #1a1a2e;
font-size: 12px;
font-weight: bold;
}
/* === SLIDERS === */
.tts-slider-row {
display: flex; align-items: center; gap: 12px; margin-bottom: 8px;
padding: 10px 14px;
background: rgba(255, 255, 255, 0.02);
border-radius: var(--radius-sm);
border: 1px solid transparent;
transition: all 0.2s ease;
}
.tts-slider-row:hover {
background: rgba(255, 255, 255, 0.05);
border-color: var(--glass-border);
}
.tts-slider-label {
color: var(--text-secondary); font-size: 0.95rem;
margin-right: 8px; min-width: 120px;
}
.tts-slider {
flex: 1; height: 6px;
background: linear-gradient(90deg, rgba(176, 196, 222, 0.2), rgba(176, 196, 222, 0.1));
border-radius: 3px; outline: none; -webkit-appearance: none;
cursor: pointer;
}
.tts-slider::-webkit-slider-thumb {
-webkit-appearance: none; appearance: none;
width: 18px; height: 18px;
background: var(--accent-gradient);
cursor: pointer; border-radius: 50%;
box-shadow: var(--accent-glow);
transition: all 0.2s ease;
}
.tts-slider::-webkit-slider-thumb:hover {
transform: scale(1.1);
box-shadow: var(--accent-glow-strong);
}
.tts-slider::-moz-range-thumb {
width: 18px; height: 18px;
background: var(--accent-gradient);
cursor: pointer; border-radius: 50%; border: none;
box-shadow: var(--accent-glow);
}
.tts-slider-value {
width: 60px; padding: 8px 10px;
border-radius: var(--radius-sm);
border: 1px solid var(--glass-border);
background: rgba(255, 255, 255, 0.05);
color: var(--accent-primary);
font-size: 0.9rem; text-align: center;
font-family: 'JetBrains Mono', monospace;
transition: all 0.2s ease;
}
.tts-slider-value:focus {
border-color: var(--glass-border-hover);
box-shadow: var(--accent-glow);
outline: none;
}
/* === DROPDOWNS === */
.tts-dropdown-row {
display: flex; flex-direction: column; gap: 10px; margin-bottom: 8px;
}
.tts-dropdown-label {
color: var(--text-secondary); font-size: 0.9rem;
font-weight: 500; margin-bottom: 2px;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.tts-dropdown {
padding: 10px 14px; border-radius: var(--radius-sm);
border: 1px solid var(--glass-border);
background: rgba(255, 255, 255, 0.05);
color: var(--text-primary); font-size: 0.95rem;
min-width: 120px; margin-bottom: 2px;
cursor: pointer;
transition: all 0.2s ease;
backdrop-filter: blur(4px);
}
.tts-dropdown:hover, .tts-dropdown:focus {
border-color: var(--glass-border-hover);
background: rgba(255, 255, 255, 0.08);
box-shadow: var(--accent-glow);
outline: none;
}
.tts-dropdown option { background: #1e1f28; color: var(--text-primary); }
.tts-dropdown optgroup { background: #1e1f28; color: var(--text-muted); }
/* === FOOTER === */
.tts-modal-footer {
display: flex; justify-content: flex-end; gap: 12px;
padding: 18px 28px;
border-top: 1px solid var(--glass-border);
background: transparent;
border-radius: 0 0 var(--radius-lg) var(--radius-lg);
}
/* === BUTTONS === */
.tts-modal-btn {
padding: 10px 28px; border-radius: var(--radius-sm); border: none;
font-size: 0.95rem; font-weight: 600; cursor: pointer;
transition: all 0.2s ease;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.tts-modal-btn.cancel {
background: transparent;
border: 1px solid var(--glass-border);
color: var(--text-secondary);
}
.tts-modal-btn.save {
background: var(--accent-gradient);
color: #1a1a2e;
box-shadow: var(--accent-glow);
}
.tts-modal-btn.cancel:hover {
background: rgba(255, 255, 255, 0.1);
border-color: var(--glass-border-hover);
color: var(--text-primary);
}
.tts-modal-btn.save:hover {
box-shadow: var(--accent-glow-strong);
transform: translateY(-1px);
}
/* === API KEY CONTAINER === */
.tts-api-key-container {
display: flex; align-items: stretch; gap: 10px;
}
.tts-api-key-container textarea {
flex-grow: 1; padding: 10px 14px;
border-radius: var(--radius-sm);
border: 1px solid var(--glass-border);
background: rgba(255, 255, 255, 0.05);
color: var(--text-primary);
font-size: 0.95rem; resize: none;
font-family: 'JetBrains Mono', monospace;
height: 42px; line-height: 1.5;
transition: all 0.2s ease;
}
.tts-api-key-container textarea:focus {
border-color: var(--glass-border-hover);
box-shadow: var(--accent-glow);
outline: none;
}
.tts-api-key-container textarea::placeholder {
color: var(--text-muted);
}
.tts-api-key-validate-btn {
padding: 0 20px;
border-radius: var(--radius-sm);
border: 1px solid var(--glass-border);
background: rgba(255, 255, 255, 0.05);
color: var(--text-secondary);
font-size: 0.9rem; font-weight: 500;
cursor: pointer;
transition: all 0.2s ease;
text-transform: uppercase;
letter-spacing: 0.3px;
}
.tts-api-key-validate-btn:hover {
background: rgba(255, 255, 255, 0.1);
border-color: var(--glass-border-hover);
color: var(--text-primary);
box-shadow: var(--accent-glow);
}
.tts-api-key-validate-btn:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.tts-api-key-status {
font-size: 0.85rem; margin-top: 6px; height: 18px;
font-weight: 500;
}
.tts-api-key-status.success { color: #4ade80; text-shadow: 0 0 10px rgba(74, 222, 128, 0.3); }
.tts-api-key-status.error { color: #f87171; text-shadow: 0 0 10px rgba(248, 113, 113, 0.3); }
/* === PROVIDER SECTION DIVIDER === */
.tts-dropdown-row[style*="border-bottom"] {
padding-bottom: 18px !important;
margin-bottom: 0 !important;
border-bottom: 1px solid var(--glass-border) !important;
}
/* === SETTINGS PANELS === */
#tts-settings-builtin,
#tts-settings-elevenlabs,
#tts-settings-gemini {
display: flex;
flex-direction: column;
gap: 12px;
}
/* Remove extra margin from checkbox list when followed by other elements */
.tts-checkbox-list {
margin-bottom: 0 !important;
}
/* === TEXTAREA STYLES (for Gemini style prompt) === */
.tts-dropdown-row textarea {
padding: 10px 14px;
border-radius: var(--radius-sm);
border: 1px solid var(--glass-border);
background: rgba(255, 255, 255, 0.05);
color: var(--text-primary);
font-size: 0.95rem;
resize: none;
font-family: 'Segoe UI', system-ui, sans-serif;
transition: all 0.2s ease;
}
.tts-dropdown-row textarea:focus {
border-color: var(--glass-border-hover);
box-shadow: var(--accent-glow);
outline: none;
}
.tts-dropdown-row textarea::placeholder {
color: var(--text-muted);
}
/* Gemini style row - reduce internal gaps */
#tts-settings-gemini .tts-dropdown-row {
gap: 8px;
margin-bottom: 0;
}
/* Slider rows in settings panels - no extra bottom margin */
#tts-settings-gemini .tts-slider-row,
#tts-settings-elevenlabs .tts-slider-row,
#tts-settings-builtin .tts-slider-row {
margin-bottom: 0;
}
`;
if (!document.getElementById("tts-menu-style")) {
const style = document.createElement("style");
style.id = "tts-menu-style";
style.textContent = TTS_MENU_CSS;
document.head.appendChild(style);
}
const CHECKBOX_OPTIONS = [
{ id: "tts-enabled", label: "Enabled", default: false },
{ id: "tts-narrate-user", label: "Narrate user messages", default: false },
{ id: "tts-auto-gen", label: "Auto Generation", default: false },
{ id: "tts-only-quotes", label: 'Only narrate "quotes"', default: false },
{ id: "tts-ignore-asterisks", label: 'Ignore *text, even "quotes", inside asterisks*', default: false },
{ id: "tts-skip-codeblocks", label: "Skip codeblocks", default: false },
{ id: "tts-skip-bulletpoints", label: "Skip bulletpoints", default: false }
];
let elevenLabsVoices = [];
let elevenLabsModels = [];
// Gemini TTS pre-made models and voices (single-speaker only)
const GEMINI_TTS_MODELS = [
{ id: 'gemini-2.5-flash-preview-tts', label: 'Gemini 2.5 Flash (TTS)' },
{ id: 'gemini-2.5-pro-preview-tts', label: 'Gemini 2.5 Pro (TTS)' }
];
const GEMINI_TTS_VOICES = [
{ id: 'Zephyr', label: 'Zephyr -- Bright' },
{ id: 'Puck', label: 'Puck -- Upbeat' },
{ id: 'Charon', label: 'Charon -- Informative' },
{ id: 'Kore', label: 'Kore -- Firm' },
{ id: 'Fenrir', label: 'Fenrir -- Excitable' },
{ id: 'Leda', label: 'Leda -- Youthful' },
{ id: 'Orus', label: 'Orus -- Firm' },
{ id: 'Aoede', label: 'Aoede -- Breezy' },
{ id: 'Callirrhoe', label: 'Callirrhoe -- Easy-going' },
{ id: 'Autonoe', label: 'Autonoe -- Bright' },
{ id: 'Enceladus', label: 'Enceladus -- Breathy' },
{ id: 'Iapetus', label: 'Iapetus -- Clear' },
{ id: 'Umbriel', label: 'Umbriel -- Easy-going' },
{ id: 'Algieba', label: 'Algieba -- Smooth' },
{ id: 'Despina', label: 'Despina -- Smooth' },
{ id: 'Erinome', label: 'Erinome -- Clear' },
{ id: 'Algenib', label: 'Algenib -- Gravelly' },
{ id: 'Rasalgethi', label: 'Rasalgethi -- Informative' },
{ id: 'Laomedeia', label: 'Laomedeia -- Upbeat' },
{ id: 'Achernar', label: 'Achernar -- Soft' },
{ id: 'Alnilam', label: 'Alnilam -- Firm' },
{ id: 'Schedar', label: 'Schedar -- Even' },
{ id: 'Gacrux', label: 'Gacrux -- Mature' },
{ id: 'Pulcherrima', label: 'Pulcherrima -- Forward' },
{ id: 'Achird', label: 'Achird -- Friendly' },
{ id: 'Zubenelgenubi', label: 'Zubenelgenubi -- Casual' },
{ id: 'Vindemiatrix', label: 'Vindemiatrix -- Gentle' },
{ id: 'Sadachbia', label: 'Sadachbia -- Lively' },
{ id: 'Sadaltager', label: 'Sadaltager -- Knowledgeable' },
{ id: 'Sulafat', label: 'Sulafat -- Warm' }
];
function createTTSMenu() {
const savedSettings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
const getSetting = (key, def) => (key in savedSettings ? savedSettings[key] : def);
const overlay = document.createElement("div");
overlay.className = "tts-modal-overlay";
overlay.style.display = "none";
const container = document.createElement("div");
container.className = "tts-modal-container";
const header = document.createElement("div");
header.className = "tts-modal-header";
const title = document.createElement("h2");
title.className = "tts-modal-title";
title.textContent = "Text to Speech Settings";
const closeBtn = document.createElement("button");
closeBtn.className = "tts-modal-close";
closeBtn.innerHTML = `<svg width="20" height="20" viewBox="0 0 20 20" fill="none"><path d="M15 5L5 15M5 5l10 10" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"></path></svg>`;
closeBtn.onclick = () => {
stopPreviewAudio();
overlay.style.display = "none";
};
header.appendChild(title);
header.appendChild(closeBtn);
const mainBody = document.createElement("div");
mainBody.className = "tts-modal-body";
// Selector de proveedor
const providerDropdownRow = document.createElement("div");
providerDropdownRow.className = "tts-dropdown-row";
providerDropdownRow.style.paddingBottom = "18px";
providerDropdownRow.style.marginBottom = "0";
providerDropdownRow.style.borderBottom = "1px solid #444";
const providerLabel = document.createElement("label");
providerLabel.className = "tts-dropdown-label";
providerLabel.textContent = "TTS Provider";
const providerSelect = document.createElement("select");
providerSelect.id = "tts-provider-select";
providerSelect.className = "tts-dropdown";
providerSelect.innerHTML = `<option value="builtin">Built-in</option><option value="elevenlabs">ElevenLabs</option><option value="gemini">Gemini TTS (API)</option>`;
providerSelect.value = getSetting("provider", "builtin");
providerDropdownRow.appendChild(providerLabel);
providerDropdownRow.appendChild(providerSelect);
mainBody.appendChild(providerDropdownRow);
// Nombres actuales de bot/usuario
let botName = "char";
try {
const botNameElem = document.querySelector('[class^="_nameText_"]');
if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
} catch (e) {}
let userPersona = "User";
try {
const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
for (let i = allMessageNodes.length - 1; i >= 0; i--) {
const node = allMessageNodes[i];
if (!node.querySelector('[class^="_nameIcon_"]')) {
const nameElem = node.querySelector('[class^="_nameText_"]');
if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
}
}
} catch (e) {}
// Panel Built-in
const settingsBuiltIn = document.createElement("div");
settingsBuiltIn.id = "tts-settings-builtin";
settingsBuiltIn.style.display = "flex";
settingsBuiltIn.style.flexDirection = "column";
settingsBuiltIn.style.gap = "18px";
const checkboxListBuiltIn = document.createElement("div");
checkboxListBuiltIn.className = "tts-checkbox-list";
CHECKBOX_OPTIONS.forEach(opt => {
const row = document.createElement("div");
row.className = "tts-checkbox-row";
const cb = document.createElement("input");
cb.type = "checkbox";
cb.id = `builtin-${opt.id}`;
cb.dataset.key = opt.id;
cb.className = "tts-checkbox";
cb.checked = !!getSetting(opt.id, opt.default);
const label = document.createElement("label");
label.htmlFor = cb.id;
label.textContent = opt.label;
row.appendChild(cb);
row.appendChild(label);
checkboxListBuiltIn.appendChild(row);
});
settingsBuiltIn.appendChild(checkboxListBuiltIn);
const sliderRowBuiltIn = document.createElement("div");
sliderRowBuiltIn.className = "tts-slider-row";
const sliderLabelBuiltIn = document.createElement("span");
sliderLabelBuiltIn.className = "tts-slider-label";
sliderLabelBuiltIn.textContent = "Playback speed";
const sliderBuiltIn = document.createElement("input");
sliderBuiltIn.type = "range";
sliderBuiltIn.dataset.key = "playbackSpeed";
sliderBuiltIn.className = "tts-slider";
sliderBuiltIn.min = "0.10";
sliderBuiltIn.max = "2.00";
sliderBuiltIn.step = "0.05";
sliderBuiltIn.value = getSetting("playbackSpeed", "1.00");
const sliderValueBuiltIn = document.createElement("input");
sliderValueBuiltIn.type = "text";
sliderValueBuiltIn.className = "tts-slider-value";
sliderValueBuiltIn.value = sliderBuiltIn.value;
sliderBuiltIn.oninput = () => { sliderValueBuiltIn.value = parseFloat(sliderBuiltIn.value).toFixed(2); };
sliderValueBuiltIn.oninput = () => {
let v = parseFloat(sliderValueBuiltIn.value);
if (!isNaN(v) && v >= 0.1 && v <= 2) sliderBuiltIn.value = v.toFixed(2);
};
sliderRowBuiltIn.appendChild(sliderLabelBuiltIn);
sliderRowBuiltIn.appendChild(sliderBuiltIn);
sliderRowBuiltIn.appendChild(sliderValueBuiltIn);
settingsBuiltIn.appendChild(sliderRowBuiltIn);
const dropdownRowBuiltIn = document.createElement("div");
dropdownRowBuiltIn.className = "tts-dropdown-row";
dropdownRowBuiltIn.innerHTML = `
<label class="tts-dropdown-label">Default voice</label>
<select class="tts-dropdown" data-key="defaultVoice"></select>
<label class="tts-dropdown-label">Voice for "${botName}"</label>
<select class="tts-dropdown" data-key="charVoice_${botName}"></select>
<label class="tts-dropdown-label">Voice for "${userPersona}" (You)</label>
<select class="tts-dropdown" data-key="userVoice_${userPersona}"></select>
`;
loadBuiltinVoices(() => {
const dropdowns = dropdownRowBuiltIn.querySelectorAll('.tts-dropdown');
dropdowns.forEach(dd => {
dd.innerHTML = `<option value="Default">Default</option>`;
builtinVoices.forEach(v => {
const opt = document.createElement("option");
opt.value = v.name;
opt.textContent = `${v.name} (${v.lang})${v.default ? " [default]" : ""}`;
dd.appendChild(opt);
});
const key = dd.dataset.key;
const fallbackKey = key.startsWith('charVoice') || key.startsWith('userVoice') ? 'defaultVoice' : 'Default';
dd.value = getSetting(key, getSetting(fallbackKey, 'Default'));
});
});
settingsBuiltIn.appendChild(dropdownRowBuiltIn);
// Panel ElevenLabs
const settingsElevenLabs = document.createElement("div");
settingsElevenLabs.id = "tts-settings-elevenlabs";
settingsElevenLabs.style.display = "none";
settingsElevenLabs.style.flexDirection = "column";
settingsElevenLabs.style.gap = "18px";
// API Key
const apiKeyRow = document.createElement("div");
apiKeyRow.className = "tts-dropdown-row";
apiKeyRow.style.paddingBottom = "18px";
apiKeyRow.style.marginBottom = "0";
apiKeyRow.style.borderBottom = "1px solid #444";
const apiKeyLabel = document.createElement("label");
apiKeyLabel.className = "tts-dropdown-label";
apiKeyLabel.textContent = "ElevenLabs API Key";
const apiKeyContainer = document.createElement("div");
apiKeyContainer.className = 'tts-api-key-container';
const apiKeyInput = document.createElement("textarea");
apiKeyInput.dataset.key = "elevenlabs_apiKey";
apiKeyInput.value = getSetting("elevenlabs_apiKey", "");
apiKeyInput.placeholder = "Enter your API Key";
const validateBtn = document.createElement("button");
validateBtn.type = "button";
validateBtn.className = "tts-api-key-validate-btn";
validateBtn.textContent = "Validate";
const apiKeyStatus = document.createElement("div");
apiKeyStatus.className = "tts-api-key-status";
// Ocultar/mostrar clave
let isKeyHidden = true;
const originalKey = apiKeyInput.value;
function maskKey(key) { return key.length > 0 ? '•'.repeat(key.length) : ''; }
if (originalKey) { apiKeyInput.value = maskKey(originalKey); apiKeyInput.dataset.original = originalKey; }
apiKeyInput.addEventListener('focus', () => {
if (isKeyHidden && apiKeyInput.dataset.original) { apiKeyInput.value = apiKeyInput.dataset.original; isKeyHidden = false; }
});
apiKeyInput.addEventListener('blur', () => {
apiKeyInput.dataset.original = apiKeyInput.value;
apiKeyInput.value = maskKey(apiKeyInput.value);
isKeyHidden = true;
});
apiKeyInput.addEventListener('input', () => { apiKeyInput.dataset.original = apiKeyInput.value; });
apiKeyContainer.appendChild(apiKeyInput);
apiKeyContainer.appendChild(validateBtn);
apiKeyRow.appendChild(apiKeyLabel);
apiKeyRow.appendChild(apiKeyContainer);
apiKeyRow.appendChild(apiKeyStatus);
settingsElevenLabs.appendChild(apiKeyRow);
const checkboxListElevenLabs = document.createElement("div");
checkboxListElevenLabs.className = "tts-checkbox-list";
CHECKBOX_OPTIONS.forEach(opt => {
const row = document.createElement("div");
row.className = "tts-checkbox-row";
const cb = document.createElement("input");
cb.type = "checkbox";
cb.id = `elevenlabs-${opt.id}`;
cb.dataset.key = `elevenlabs_${opt.id}`;
cb.className = "tts-checkbox";
cb.checked = !!getSetting(cb.dataset.key, opt.default);
const label = document.createElement("label");
label.htmlFor = cb.id;
label.textContent = opt.label;
row.appendChild(cb);
row.appendChild(label);
checkboxListElevenLabs.appendChild(row);
});
settingsElevenLabs.appendChild(checkboxListElevenLabs);
function createSlider(labelText, key, min, max, step, defaultValue, formatFn, parseFn) {
const row = document.createElement("div");
row.className = "tts-slider-row";
row.innerHTML = `<span class="tts-slider-label">${labelText}</span>`;
const slider = document.createElement("input");
slider.type = "range";
slider.dataset.key = key;
slider.className = "tts-slider";
slider.min = min; slider.max = max; slider.step = step;
slider.value = getSetting(key, defaultValue);
const valueInput = document.createElement("input");
valueInput.type = "text";
valueInput.className = "tts-slider-value";
valueInput.value = formatFn(slider.value);
slider.oninput = () => { valueInput.value = formatFn(slider.value); };
valueInput.onchange = () => {
const parsed = parseFn(valueInput.value);
if (parsed.isValid) { slider.value = parsed.value; valueInput.value = formatFn(slider.value); }
else { valueInput.value = formatFn(slider.value); }
};
row.appendChild(slider);
row.appendChild(valueInput);
return row;
}
settingsElevenLabs.appendChild(createSlider("Playback speed", "elevenlabs_playbackSpeed", "0.1", "2.0", "0.05", "1.00",
v => parseFloat(v).toFixed(2),
v => { const n = parseFloat(v); return { isValid: !isNaN(n) && n >= 0.1 && n <= 2, value: n.toFixed(2) }; }
));
settingsElevenLabs.appendChild(createSlider("Stability", "elevenlabs_stability", "0", "1", "0.01", "0.50",
v => `${Math.round(v * 100)}%`,
v => { const n = parseInt(v.replace('%','')); return { isValid: !isNaN(n) && n >= 0 && n <= 100, value: (n/100).toFixed(2) }; }
));
settingsElevenLabs.appendChild(createSlider("Similarity Boost", "elevenlabs_similarity", "0", "1", "0.01", "0.75",
v => `${Math.round(v * 100)}%`,
v => { const n = parseInt(v.replace('%','')); return { isValid: !isNaN(n) && n >= 0 && n <= 100, value: (n/100).toFixed(2) }; }
));
settingsElevenLabs.appendChild(createSlider("Style", "elevenlabs_style", "0", "1", "0.01", "0.00",
v => `${Math.round(v * 100)}%`,
v => { const n = parseInt(v.replace('%','')); return { isValid: !isNaN(n) && n >= 0 && n <= 100, value: (n/100).toFixed(2) }; }
));
const speakerBoostRow = document.createElement("div");
speakerBoostRow.className = "tts-checkbox-row";
const speakerBoostCb = document.createElement("input");
speakerBoostCb.type = "checkbox";
speakerBoostCb.id = "elevenlabs-speaker-boost";
speakerBoostCb.dataset.key = "elevenlabs_speaker-boost";
speakerBoostCb.className = "tts-checkbox";
speakerBoostCb.checked = !!getSetting("elevenlabs_speaker-boost", false);
const speakerBoostLabel = document.createElement("label");
speakerBoostLabel.htmlFor = speakerBoostCb.id;
speakerBoostLabel.textContent = "Use Speaker Boost";
speakerBoostRow.appendChild(speakerBoostCb);
speakerBoostRow.appendChild(speakerBoostLabel);
checkboxListElevenLabs.appendChild(speakerBoostRow);
const dropdownRowElevenLabs = document.createElement("div");
dropdownRowElevenLabs.className = "tts-dropdown-row";
dropdownRowElevenLabs.innerHTML = `
<label class="tts-dropdown-label">Model</label>
<select class="tts-dropdown" data-key="elevenlabs_modelId"></select>
<label class="tts-dropdown-label">Default voice</label>
<select class="tts-dropdown" data-key="elevenlabs_defaultVoice"></select>
<label class="tts-dropdown-label">Voice for "${botName}"</label>
<select class="tts-dropdown" data-key="elevenlabs_charVoice_${botName}"></select>
<label class="tts-dropdown-label">Voice for "${userPersona}" (You)</label>
<select class="tts-dropdown" data-key="elevenlabs_userVoice_${userPersona}"></select>
`;
settingsElevenLabs.appendChild(dropdownRowElevenLabs);
// Panel Gemini TTS (single-speaker)
const settingsGemini = document.createElement("div");
settingsGemini.id = "tts-settings-gemini";
settingsGemini.style.display = "none";
settingsGemini.style.flexDirection = "column";
settingsGemini.style.gap = "12px";
// Gemini API Key (no validation call, just stored)
const geminiApiRow = document.createElement("div");
geminiApiRow.className = "tts-dropdown-row";
geminiApiRow.style.paddingBottom = "18px";
geminiApiRow.style.marginBottom = "0";
geminiApiRow.style.borderBottom = "1px solid rgba(255, 255, 255, 0.08)";
const geminiApiLabel = document.createElement("label");
geminiApiLabel.className = "tts-dropdown-label";
geminiApiLabel.textContent = "Gemini API Key";
const geminiApiContainer = document.createElement("div");
geminiApiContainer.className = 'tts-api-key-container';
const geminiApiInput = document.createElement("textarea");
geminiApiInput.dataset.key = "gemini_apiKey";
geminiApiInput.value = getSetting("gemini_apiKey", "");
geminiApiInput.placeholder = "Enter your Gemini API Key";
// Ocultar/mostrar clave (mismo patrón que ElevenLabs)
let geminiKeyHidden = true;
const geminiOriginalKey = geminiApiInput.value;
function maskGeminiKey(key) { return key.length > 0 ? '•'.repeat(key.length) : ''; }
if (geminiOriginalKey) { geminiApiInput.value = maskGeminiKey(geminiOriginalKey); geminiApiInput.dataset.original = geminiOriginalKey; }
geminiApiInput.addEventListener('focus', () => {
if (geminiKeyHidden && geminiApiInput.dataset.original) { geminiApiInput.value = geminiApiInput.dataset.original; geminiKeyHidden = false; }
});
geminiApiInput.addEventListener('blur', () => {
geminiApiInput.dataset.original = geminiApiInput.value;
geminiApiInput.value = maskGeminiKey(geminiApiInput.value);
geminiKeyHidden = true;
});
geminiApiInput.addEventListener('input', () => { geminiApiInput.dataset.original = geminiApiInput.value; });
geminiApiContainer.appendChild(geminiApiInput);
geminiApiRow.appendChild(geminiApiLabel);
geminiApiRow.appendChild(geminiApiContainer);
settingsGemini.appendChild(geminiApiRow);
// Gemini checkboxes (same options, prefixed)
const checkboxListGemini = document.createElement("div");
checkboxListGemini.className = "tts-checkbox-list";
CHECKBOX_OPTIONS.forEach(opt => {
const row = document.createElement("div");
row.className = "tts-checkbox-row";
const cb = document.createElement("input");
cb.type = "checkbox";
cb.id = `gemini-${opt.id}`;
cb.dataset.key = `gemini_${opt.id}`;
cb.className = "tts-checkbox";
cb.checked = !!getSetting(cb.dataset.key, opt.default);
const label = document.createElement("label");
label.htmlFor = cb.id;
label.textContent = opt.label;
row.appendChild(cb);
row.appendChild(label);
checkboxListGemini.appendChild(row);
});
settingsGemini.appendChild(checkboxListGemini);
// Gemini Static Style toggle + textarea
const geminiStyleRow = document.createElement("div");
geminiStyleRow.className = "tts-dropdown-row";
const geminiStyleCheckboxRow = document.createElement("div");
geminiStyleCheckboxRow.className = "tts-checkbox-row";
const geminiStyleCheckbox = document.createElement("input");
geminiStyleCheckbox.type = "checkbox";
geminiStyleCheckbox.id = "gemini-static-style";
geminiStyleCheckbox.dataset.key = "gemini_static_style_enabled";
geminiStyleCheckbox.className = "tts-checkbox";
geminiStyleCheckbox.checked = !!getSetting("gemini_static_style_enabled", false);
const geminiStyleCheckboxLabel = document.createElement("label");
geminiStyleCheckboxLabel.htmlFor = geminiStyleCheckbox.id;
geminiStyleCheckboxLabel.textContent = "Static Style";
geminiStyleCheckboxRow.appendChild(geminiStyleCheckbox);
geminiStyleCheckboxRow.appendChild(geminiStyleCheckboxLabel);
const geminiStyleTextareaLabel = document.createElement("label");
geminiStyleTextareaLabel.className = "tts-dropdown-label";
geminiStyleTextareaLabel.textContent = "Static style prompt (used for all Gemini TTS when enabled)";
const geminiStyleTextarea = document.createElement("textarea");
geminiStyleTextarea.dataset.key = "gemini_static_style_text";
geminiStyleTextarea.placeholder = "Write the style instructions to prepend before every Gemini TTS generation.";
geminiStyleTextarea.style.minHeight = "80px";
geminiStyleTextarea.style.resize = "none";
geminiStyleTextarea.style.padding = "10px 14px";
geminiStyleTextarea.style.borderRadius = "8px";
geminiStyleTextarea.style.border = "1px solid rgba(255, 255, 255, 0.08)";
geminiStyleTextarea.style.background = "rgba(255, 255, 255, 0.05)";
geminiStyleTextarea.style.color = "rgba(255, 255, 255, 0.95)";
geminiStyleTextarea.style.fontFamily = "'Segoe UI', system-ui, sans-serif";
geminiStyleTextarea.style.fontSize = "0.95rem";
geminiStyleTextarea.value = getSetting("gemini_static_style_text", "");
// Show/hide style textarea depending on toggle
function updateGeminiStyleVisibility() {
geminiStyleTextareaLabel.style.display = geminiStyleCheckbox.checked ? "block" : "none";
geminiStyleTextarea.style.display = geminiStyleCheckbox.checked ? "block" : "none";
}
geminiStyleCheckbox.addEventListener("change", updateGeminiStyleVisibility);
updateGeminiStyleVisibility();
geminiStyleRow.appendChild(geminiStyleCheckboxRow);
geminiStyleRow.appendChild(geminiStyleTextareaLabel);
geminiStyleRow.appendChild(geminiStyleTextarea);
settingsGemini.appendChild(geminiStyleRow);
// Gemini model + voice dropdowns
const dropdownRowGemini = document.createElement("div");
dropdownRowGemini.className = "tts-dropdown-row";
dropdownRowGemini.innerHTML = `
<label class="tts-dropdown-label">Model</label>
<select class="tts-dropdown" data-key="gemini_modelId"></select>
<label class="tts-dropdown-label">Default voice</label>
<select class="tts-dropdown" data-key="gemini_defaultVoice"></select>
<label class="tts-dropdown-label">Voice for "${botName}"</label>
<select class="tts-dropdown" data-key="gemini_charVoice_${botName}"></select>
<label class="tts-dropdown-label">Voice for "${userPersona}" (You)</label>
<select class="tts-dropdown" data-key="gemini_userVoice_${userPersona}"></select>
`;
// Populate Gemini model + voices
const geminiModelSelect = dropdownRowGemini.querySelector('[data-key="gemini_modelId"]');
GEMINI_TTS_MODELS.forEach(m => {
const opt = document.createElement('option');
opt.value = m.id;
opt.textContent = m.label;
geminiModelSelect.appendChild(opt);
});
geminiModelSelect.value = getSetting('gemini_modelId', GEMINI_TTS_MODELS[0]?.id || 'gemini-2.5-flash-preview-tts');
const geminiVoiceDropdowns = dropdownRowGemini.querySelectorAll('[data-key^="gemini_"]');
geminiVoiceDropdowns.forEach(dd => {
if (dd.dataset.key === 'gemini_modelId') return;
dd.innerHTML = `<option value="Default">Default</option>`;
GEMINI_TTS_VOICES.forEach(v => {
const opt = document.createElement('option');
opt.value = v.id;
opt.textContent = v.label;
dd.appendChild(opt);
});
const key = dd.dataset.key;
const fallbackKey = key.includes('charVoice') || key.includes('userVoice') ? 'gemini_defaultVoice' : 'Default';
dd.value = getSetting(key, getSetting(fallbackKey, 'Default'));
});
settingsGemini.appendChild(dropdownRowGemini);
mainBody.appendChild(settingsBuiltIn);
mainBody.appendChild(settingsElevenLabs);
mainBody.appendChild(settingsGemini);
validateBtn.addEventListener('click', async () => {
const key = apiKeyInput.dataset.original || apiKeyInput.value;
if (!key) {
apiKeyStatus.textContent = "API Key is empty.";
apiKeyStatus.className = "tts-api-key-status error";
return;
}
apiKeyStatus.textContent = "Validating...";
apiKeyStatus.className = "tts-api-key-status";
validateBtn.disabled = true;
const validation = await validateElevenLabsKey(key);
apiKeyStatus.textContent = validation.message;
apiKeyStatus.className = `tts-api-key-status ${validation.isValid ? 'success' : 'error'}`;
if (validation.isValid) {
await fetchAndPopulateElevenLabsData(key);
const currentSettings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
currentSettings.elevenlabs_apiKey = key;
localStorage.setItem("ttsSettings", JSON.stringify(currentSettings));
}
validateBtn.disabled = false;
});
async function fetchAndPopulateElevenLabsData(apiKey) {
try {
const [voices, models] = await Promise.all([
elevenLabsApiRequest({ method: "GET", endpoint: "/v1/voices", apiKey }),
elevenLabsApiRequest({ method: "GET", endpoint: "/v1/models", apiKey })
]);
elevenLabsVoices = voices.voices || [];
elevenLabsModels = models.filter(m => m.can_do_text_to_speech) || [];
// Modelo
const modelSelect = dropdownRowElevenLabs.querySelector('[data-key="elevenlabs_modelId"]');
modelSelect.innerHTML = '';
elevenLabsModels.forEach(model => {
const option = document.createElement('option');
option.value = model.model_id;
option.textContent = model.name;
modelSelect.appendChild(option);
});
modelSelect.value = getSetting('elevenlabs_modelId', elevenLabsModels[0]?.model_id || '');
// Voces
const dropdownsEL = dropdownRowElevenLabs.querySelectorAll('[data-key^="elevenlabs_"], [data-key*="Voice"]');
dropdownsEL.forEach(dd => {
if(dd.dataset.key === 'elevenlabs_modelId') return;
const currentVal = dd.value;
dd.innerHTML = `<option value="Default">Default</option>`;
const categorized = { 'Premade': [], 'Cloned': [] };
elevenLabsVoices.forEach(v => {
if(v.category === 'premade') categorized.Premade.push(v);
else categorized.Cloned.push(v);
});
Object.keys(categorized).forEach(category => {
const voicesInCategory = categorized[category];
if(voicesInCategory.length > 0){
const optgroup = document.createElement('optgroup');
optgroup.label = `${category} (${voicesInCategory.length})`;
voicesInCategory.forEach(voice => {
const option = document.createElement('option');
option.value = voice.voice_id;
option.textContent = voice.name;
option.dataset.previewUrl = voice.preview_url || '';
optgroup.appendChild(option);
});
dd.appendChild(optgroup);
}
});
const key = dd.dataset.key;
const fallbackKey = key.includes('charVoice') || key.includes('userVoice') ? 'elevenlabs_defaultVoice' : 'Default';
dd.value = getSetting(key, getSetting(fallbackKey, 'Default'));
// Add event listener to play preview when voice changes
dd.addEventListener('change', (event) => {
const selectedOption = event.target.options[event.target.selectedIndex];
const previewUrl = selectedOption.dataset.previewUrl;
if (previewUrl) {
playVoicePreview(previewUrl);
}
});
});
} catch (error) {
console.error("TTS Userscript: Failed to fetch ElevenLabs data:", error);
apiKeyStatus.textContent = `Failed to get voices/models: ${error.message}`;
apiKeyStatus.className = "tts-api-key-status error";
}
}
// Cambio de proveedor
providerSelect.onchange = () => {
if (providerSelect.value === 'builtin') {
settingsBuiltIn.style.display = 'flex';
settingsElevenLabs.style.display = 'none';
settingsGemini.style.display = 'none';
} else if (providerSelect.value === 'elevenlabs') {
settingsBuiltIn.style.display = 'none';
settingsElevenLabs.style.display = 'flex';
settingsGemini.style.display = 'none';
const key = getSetting("elevenlabs_apiKey", "");
if(key) fetchAndPopulateElevenLabsData(key);
} else if (providerSelect.value === 'gemini') {
settingsBuiltIn.style.display = 'none';
settingsElevenLabs.style.display = 'none';
settingsGemini.style.display = 'flex';
}
};
setTimeout(() => { providerSelect.onchange(); }, 0);
// Footer guardar/cancelar
const footer = document.createElement("div");
footer.className = "tts-modal-footer";
const cancelBtn = document.createElement("button");
cancelBtn.className = "tts-modal-btn cancel";
cancelBtn.textContent = "Cancel";
cancelBtn.onclick = () => {
stopPreviewAudio();
overlay.style.display = "none";
};
const saveBtn = document.createElement("button");
saveBtn.className = "tts-modal-btn save";
saveBtn.textContent = "Save Settings";
saveBtn.onclick = () => {
const prevSettings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
const newSettings = { ...prevSettings, provider: providerSelect.value };
// Built-in & ElevenLabs settings
document.querySelectorAll('#tts-settings-builtin [data-key], #tts-settings-elevenlabs [data-key]').forEach(el => {
const key = el.dataset.key;
if (key === 'elevenlabs_apiKey') {
if (el.dataset.original) newSettings[key] = el.dataset.original;
} else if (el.type === 'checkbox') {
newSettings[key] = el.checked;
} else if (el.type === 'range' || el.classList.contains('tts-slider-value')) {
newSettings[key] = parseFloat(el.value);
} else {
newSettings[key] = el.value;
}
});
// Gemini TTS (API) settings stored separately with gemini_ prefix
document.querySelectorAll('#tts-settings-gemini [data-key]').forEach(el => {
const key = el.dataset.key;
if (!key) return;
if (key === 'gemini_apiKey') {
if (el.dataset.original) newSettings[key] = el.dataset.original;
else newSettings[key] = el.value;
} else if (el.type === 'checkbox') {
newSettings[key] = el.checked;
} else if (el.type === 'range' || el.classList.contains('tts-slider-value')) {
newSettings[key] = parseFloat(el.value);
} else {
newSettings[key] = el.value;
}
});
localStorage.setItem("ttsSettings", JSON.stringify(newSettings));
stopPreviewAudio();
overlay.style.display = "none";
document.querySelectorAll('.temp-btn').forEach(btn => btn.remove());
document.querySelectorAll(CONTROL_PANEL_SELECTOR).forEach(injectTempButton);
};
footer.appendChild(cancelBtn);
footer.appendChild(saveBtn);
container.appendChild(header);
container.appendChild(mainBody);
container.appendChild(footer);
overlay.appendChild(container);
document.body.appendChild(overlay);
return overlay;
}
let ttsMenuOverlay = null;
// ==========================================================
// SECCIÓN 5. INYECCIÓN EN EL MENÚ DE LA APP
// ----------------------------------------------------------
// - Añade entrada "Text to Speech" en el menú de JanitorAI
// - Abre modal de configuración al hacer click
// ==========================================================
const MENU_LIST_SELECTOR = '[class^="_menuList_"]';
const MENU_ITEM_CLASS = '[class^="_menuItem_"]';
const TTS_BUTTON_ID = 'tts-menu-item';
const bodyObserver = new MutationObserver(() => { injectTTSMenuItem(); });
bodyObserver.observe(document.body, { childList: true, subtree: true });
function injectTTSMenuItem() {
const menuList = document.querySelector(MENU_LIST_SELECTOR);
if (!menuList) return;
if (menuList.querySelector(`#${TTS_BUTTON_ID}`)) return;
const btn = document.createElement('button');
btn.type = 'button';
const firstMenuItem = menuList.querySelector(MENU_ITEM_CLASS);
btn.className = firstMenuItem ? firstMenuItem.className : '';
btn.id = TTS_BUTTON_ID;
btn.innerHTML = `
<span class="_menuItemIcon_1fzcr_81">
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24"
viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round"
class="lucide lucide-audio-lines-icon">
<path d="M2 10v3"/><path d="M6 6v11"/><path d="M10 3v18"/>
<path d="M14 8v7"/><path d="M18 5v13"/><path d="M22 10v3"/>
</svg>
</span>
<span class="_menuItemContent_1fzcr_96">Text to Speech</span>
`;
btn.addEventListener('click', function() {
if (ttsMenuOverlay) ttsMenuOverlay.remove();
ttsMenuOverlay = createTTSMenu();
ttsMenuOverlay.style.display = "flex";
});
const menuItems = Array.from(menuList.querySelectorAll(MENU_ITEM_CLASS));
let inserted = false;
for (let i = 0; i < menuItems.length; i++) {
const span = menuItems[i].querySelector('span[class*="_menuItemContent_"]');
if (span && span.textContent.trim() === "Generation Settings") {
if (menuItems[i].nextSibling) {
menuList.insertBefore(btn, menuItems[i].nextSibling);
} else {
menuList.appendChild(btn);
}
inserted = true;
break;
}
}
if (!inserted) menuList.appendChild(btn);
}
// ==========================================================
// SECCIÓN 6. MOTOR DE TTS (Built-in y ElevenLabs)
// ----------------------------------------------------------
// - Reproduce TTS según proveedor elegido
// - Emite eventos para Live2D (stop, blobURL, audioBuffer)
// - Segmenta audio según EmotionQueue+Alignment (si aplica)
// ==========================================================
let currentUtterance = null;
let currentElevenLabsAudio = null;
let currentPreviewAudio = null; // For voice preview playback
let isPlaying = false; // Track global playing state
function updateAllButtonStates(playing) {
isPlaying = playing;
const svg = playing ? STOP_SVG : PLAY_SVG;
document.querySelectorAll('.temp-btn').forEach(button => {
button.innerHTML = svg;
});
}
function playTTS(text, isBot) {
const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
const provider = settings.provider || 'builtin';
if (provider === 'elevenlabs') {
playElevenLabsTTS(text, isBot, settings);
} else if (provider === 'gemini') {
playGeminiTTSTTS(text, isBot, settings);
} else {
playBuiltinTTS(text, isBot, settings);
}
}
function stopTTS() {
// Detener Built-in
if (window.speechSynthesis && window.speechSynthesis.speaking) {
window.speechSynthesis.cancel();
}
// Detener ElevenLabs
if (currentElevenLabsAudio) {
currentElevenLabsAudio.pause();
currentElevenLabsAudio.src = '';
currentElevenLabsAudio = null;
}
// Actualizar todos los botones
updateAllButtonStates(false);
}
// Voice preview functions
function stopPreviewAudio() {
if (currentPreviewAudio) {
currentPreviewAudio.pause();
currentPreviewAudio.src = '';
currentPreviewAudio = null;
}
}
function playVoicePreview(previewUrl) {
if (!previewUrl) return;
stopPreviewAudio();
currentPreviewAudio = new Audio(previewUrl);
currentPreviewAudio.play().catch(e => console.error("Error playing voice preview:", e));
}
// Gemini TTS (single-speaker, PCM -> WAV in browser)
async function playGeminiTTSTTS(text, isBot, settings) {
const apiKey = settings.gemini_apiKey;
if (!settings['gemini_tts-enabled'] || !apiKey) { stopTTS(); return; }
// If Live2D is active, use emotion analysis (single segment) and send queue to Live2D
if (live2dScriptDetected) {
console.log("🎭 [Gemini] Live2D detected - starting whole-utterance emotion analysis and queue dispatch");
try {
// 1. Process text with the same filters used elsewhere
console.log("📝 [Gemini] Step 1: Processing text with TTS filters...");
const { processed: processedText } = processTTSOutput(text);
if (!processedText || processedText.trim() === '') {
console.warn("⚠️ [Gemini] Processed text is empty after filters, skipping Gemini TTS + Live2D");
return;
}
console.log(`✓ [Gemini] Text processed: ${processedText.length} characters`);
// 2. Resolve speaker names (needed for voice selection in parallel call)
console.log("👤 [Gemini] Step 2: Resolving speaker names...");
try {
const botNameElem = document.querySelector('[class^="_nameText_"]');
if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
} catch (e) {
console.warn("⚠️ [Gemini] Could not get bot name:", e);
}
let userPersona = "User";
try {
const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
for (let i = allMessageNodes.length - 1; i >= 0; i--) {
const node = allMessageNodes[i];
if (!node.querySelector('[class^="_nameIcon_"]')) {
const nameElem = node.querySelector('[class^="_nameText_"]');
if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
}
}
} catch (e) {
console.warn("⚠️ [Gemini] Could not get user persona:", e);
}
console.log(`✓ [Gemini] Bot: "${botName}", User: "${userPersona}"`);
// 3. Resolve voice
console.log("🎤 [Gemini] Step 3: Resolving voice configuration...");
let voiceName;
if (isBot) voiceName = settings[`gemini_charVoice_${botName}`] || settings.gemini_defaultVoice;
else voiceName = settings[`gemini_userVoice_${userPersona}`] || settings.gemini_defaultVoice;
if (!voiceName || voiceName === 'Default') {
console.error("❌ [Gemini] No Gemini voice selected for this speaker.");
return;
}
console.log(`✓ [Gemini] Using voice: ${voiceName}`);
const modelId = settings.gemini_modelId || 'gemini-2.5-flash-preview-tts';
const playbackSpeed = parseFloat(settings.gemini_playbackSpeed) || 1.0;
// 4. Build final text depending on static style setting
console.log("🎨 [Gemini] Step 4: Building style prompt...");
const staticStyleEnabled = !!settings.gemini_static_style_enabled;
let finalText;
if (staticStyleEnabled && settings.gemini_static_style_text) {
finalText = `${settings.gemini_static_style_text}\n${processedText}`;
} else {
const defaultStyle = `### ROLE AND PERSONA
You are a professional voice actor specializing in realistic, naturalistic roleplay narration.
### VOICE PROFILE
1. **Base Tone:** Low, quiet, and measured. A steady, grounded hum.
2. **Performance Style:** Understated and conversational. Use a "less is more" approach.
3. **Negative Constraints (Crucial):**
- **No Theatrics:** Do not sound exaggerated, melodramatic, or "acty."
- **No Sultry Tone:** Do not use a breathy or seductive voice.
- **No Cartoons:** Avoid high-pitch spikes or anime-style exclamations. Keep it human.
### INSTRUCTIONS
1. **Narrate Everything:** Read every word provided, including the descriptive text (narration) and the dialogue.
2. **Context-Driven Emotion:** You must analyze the narration to understand the character's mental state, but express it subtly.
- **Subtlety is Key:** If the text describes anger, do not shout. Instead, drop your pitch and speak with cold intensity. If the text describes excitement, do not get loud; just quicken your pace slightly.
- **Continuity:** Ensure the narration and the dialogue flow together as one cohesive story, not two separate voices.
### EXAMPLE
**Input:**
*The guard slams his hand against the table, causing the mugs to rattle.*
"I told you already," *he hisses through gritted teeth, leaning in close so no one else hears,* "I don't have the money."
**Required Performance Logic:**
1. **Narration (*The guard slams...*):** Read this calmly but firmly to set the weight of the action.
2. **Dialogue ("I told you already")::** The text says "hisses" and "leaning in close." Do NOT shout. Whisper-talk this line with a tight, sharp intensity.
3. **Narration (*he hisses...*):** Maintain the low, measured storytelling tone.
4. **Dialogue ("I don't have the money")::** Deliver this with finality and a flat, cold tone.
**HERE'S THE TEXT YOU MUST NARRATE FOLLOWING ALL PREVIOUS INSTRUCTIONS, DO NOT NARRATE ANY OTHER TEXT:**
`;
finalText = `${defaultStyle}\n${processedText}`;
}
const body = {
contents: [ { parts: [ { text: finalText } ] } ],
generationConfig: {
responseModalities: ['AUDIO'],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: { voiceName }
}
}
}
};
// ⚡ OPTIMIZATION: Execute Gemini emotion analysis and TTS generation in PARALLEL
console.log("⚡ [Gemini] Step 5: Starting PARALLEL execution of emotion analysis and TTS generation...");
updateAllButtonStates(true);
const parallelStartTime = performance.now();
// Create promises for both operations
const emotionAnalysisPromise = analyzeTextWithGemini(processedText)
.then(segments => {
console.log(`✓ [Gemini] Emotion analysis completed (${((performance.now() - parallelStartTime) / 1000).toFixed(2)}s)`);
return segments;
})
.catch(err => {
console.warn("⚠️ [Gemini] Emotion analysis failed, will use Neutral:", err);
return null;
});
const ttsGenerationPromise = new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: "POST",
url: `https://generativelanguage.googleapis.com/v1beta/models/${modelId}:generateContent`,
headers: {
"Content-Type": "application/json",
"x-goog-api-key": apiKey
},
data: JSON.stringify(body),
onload: async (res) => {
if (res.status < 200 || res.status >= 300) {
reject(new Error(`HTTP ${res.status}: ${res.responseText}`));
return;
}
try {
const json = JSON.parse(res.responseText);
const base64 = json?.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
if (!base64) {
reject(new Error("No audio in response"));
return;
}
console.log(`✓ [Gemini] TTS generation completed (${((performance.now() - parallelStartTime) / 1000).toFixed(2)}s)`);
resolve({ base64, json });
} catch (e) {
reject(e);
}
},
onerror: (err) => reject(new Error("TTS request failed: " + err))
});
});
// Wait for BOTH operations to complete in parallel
console.log("⏳ [Gemini] Waiting for parallel operations to complete...");
const [emotionSegments, ttsResult] = await Promise.all([emotionAnalysisPromise, ttsGenerationPromise]);
const parallelTotalTime = ((performance.now() - parallelStartTime) / 1000).toFixed(2);
console.log(`⚡ [Gemini] ✓ PARALLEL execution completed in ${parallelTotalTime}s (vs sequential: would take ~${(parseFloat(parallelTotalTime) * 1.5).toFixed(2)}s)`);
// Process emotion analysis results
const mainSeg = (emotionSegments && emotionSegments[0]) || { emotion: "Neutral", action: null, text: processedText };
const mainEmotion = mainSeg.emotion || "Neutral";
const mainAction = mainSeg.action || null;
console.log(`✓ [Gemini] Main emotion: ${mainEmotion}${mainAction ? `, action: ${mainAction}` : ''}`);
// Process TTS results
try {
const { base64 } = ttsResult;
console.log("🔄 [Gemini] Step 6: Decoding PCM16 audio and wrapping as WAV blob...");
const pcmBuffer = base64ToArrayBuffer(base64);
const wavBuffer = createWavFromPCM(pcmBuffer, 24000, 1, 16);
const blob = new Blob([wavBuffer], { type: 'audio/wav' });
const url = URL.createObjectURL(blob);
// Build a single segment queue item
console.log("📦 [Gemini] Building single emotion segment for Live2D...");
const singleSegment = {
emotion: mainEmotion,
action: mainAction,
text: mainSeg.text || processedText,
blobUrl: url,
startTime: 0,
endTime: null,
duration: null
};
const audioElement = new Audio(url);
audioElement.onloadedmetadata = () => {
singleSegment.duration = audioElement.duration;
singleSegment.endTime = audioElement.duration;
console.log("⏱️ [Gemini] Step 7: Audio duration:", audioElement.duration, "seconds");
console.log("📤 [Gemini] Step 8: Dispatching 'TTSEmotionSegmentsReady' to Live2D with a single segment...");
const event = new CustomEvent('TTSEmotionSegmentsReady', {
detail: {
segments: [singleSegment],
totalDuration: audioElement.duration,
sampleRate: 24000
}
});
window.dispatchEvent(event);
console.log("✅ [Gemini] Single-segment emotion queue dispatched to Live2D");
updateAllButtonStates(false);
};
// Don't play directly: Live2D script will control playback using the blobUrl
audioElement.onerror = () => {
console.error("❌ [Gemini] Error loading audio metadata for duration");
updateAllButtonStates(false);
};
// Force metadata load
audioElement.load();
audioElement.onerror = (e) => {
console.error("❌ [Gemini] Failed to load audio for duration check:", e);
updateAllButtonStates(false);
};
} catch (error) {
console.error("❌ [Gemini] Error processing audio:", error);
updateAllButtonStates(false);
}
} catch (e) {
console.error("❌ [Gemini] Unexpected error in Live2D flow", e);
updateAllButtonStates(false);
}
// Live2D will handle playback using the blobUrl queue
return;
}
// If Live2D is NOT active, fall back to local playback (original behavior)
stopTTS();
// Resolve speaker names
let botName = "char";
try {
const botNameElem = document.querySelector('[class^="_nameText_"]');
if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
} catch (e) {}
let userPersona = "User";
try {
const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
for (let i = allMessageNodes.length - 1; i >= 0; i--) {
const node = allMessageNodes[i];
if (!node.querySelector('[class^="_nameIcon_"]')) {
const nameElem = node.querySelector('[class^="_nameText_"]');
if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
}
}
} catch (e) {}
// Resolve voice
let voiceName;
if (isBot) voiceName = settings[`gemini_charVoice_${botName}`] || settings.gemini_defaultVoice;
else voiceName = settings[`gemini_userVoice_${userPersona}`] || settings.gemini_defaultVoice;
if (!voiceName || voiceName === 'Default') {
console.error("TTS Userscript: No Gemini voice selected for this speaker.");
return;
}
const modelId = settings.gemini_modelId || 'gemini-2.5-flash-preview-tts';
const playbackSpeed = parseFloat(settings.gemini_playbackSpeed) || 1.0;
// Build final text depending on static style setting
const staticStyleEnabled = !!settings.gemini_static_style_enabled;
let finalText;
if (staticStyleEnabled && settings.gemini_static_style_text) {
finalText = `${settings.gemini_static_style_text}\n${text}`;
} else {
// Default style prompt when Static Style is disabled or empty
const defaultStyle = `### ROLE AND PERSONA
You are a professional voice actor specializing in realistic, naturalistic roleplay narration.
### VOICE PROFILE
1. **Base Tone:** Low, quiet, and measured. A steady, grounded hum.
2. **Performance Style:** Understated and conversational. Use a "less is more" approach.
3. **Negative Constraints (Crucial):**
- **No Theatrics:** Do not sound exaggerated, melodramatic, or "acty."
- **No Sultry Tone:** Do not use a breathy or seductive voice.
- **No Cartoons:** Avoid high-pitch spikes or anime-style exclamations. Keep it human.
### INSTRUCTIONS
1. **Narrate Everything:** Read every word provided, including the descriptive text (narration) and the dialogue.
2. **Context-Driven Emotion:** You must analyze the narration to understand the character's mental state, but express it subtly.
- **Subtlety is Key:** If the text describes anger, do not shout. Instead, drop your pitch and speak with cold intensity. If the text describes excitement, do not get loud; just quicken your pace slightly.
- **Continuity:** Ensure the narration and the dialogue flow together as one cohesive story, not two separate voices.
### EXAMPLE
**Input:**
*The guard slams his hand against the table, causing the mugs to rattle.*
"I told you already," *he hisses through gritted teeth, leaning in close so no one else hears,* "I don't have the money."
**Required Performance Logic:**
1. **Narration (*The guard slams...*):** Read this calmly but firmly to set the weight of the action.
2. **Dialogue ("I told you already")::** The text says "hisses" and "leaning in close." Do NOT shout. Whisper-talk this line with a tight, sharp intensity.
3. **Narration (*he hisses...*):** Maintain the low, measured storytelling tone.
4. **Dialogue ("I don't have the money")::** Deliver this with finality and a flat, cold tone.
**HERE'S THE TEXT YOU MUST NARRATE FOLLOWING ALL PREVIOUS INSTRUCTIONS, DO NOT NARRATE ANY OTHER TEXT:**
`;
finalText = `${defaultStyle}\n${text}`;
}
const body = {
contents: [ { parts: [ { text: finalText } ] } ],
generationConfig: {
responseModalities: ['AUDIO'],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: { voiceName }
}
}
}
};
updateAllButtonStates(true);
try {
GM_xmlhttpRequest({
method: "POST",
url: `https://generativelanguage.googleapis.com/v1beta/models/${modelId}:generateContent`,
headers: {
"Content-Type": "application/json",
"x-goog-api-key": apiKey
},
data: JSON.stringify(body),
onload: async (res) => {
if (res.status < 200 || res.status >= 300) {
console.error("Gemini TTS HTTP error", res.status, res.responseText);
updateAllButtonStates(false);
return;
}
try {
const json = JSON.parse(res.responseText);
const base64 = json?.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
if (!base64) {
console.error("Gemini TTS: no audio in response", json);
updateAllButtonStates(false);
return;
}
// PCM16 mono @24kHz by spec
const pcmBuffer = base64ToArrayBuffer(base64);
const wavBuffer = createWavFromPCM(pcmBuffer, 24000, 1, 16);
const blob = new Blob([wavBuffer], { type: 'audio/wav' });
const url = URL.createObjectURL(blob);
currentElevenLabsAudio = new Audio(url); // reuse same audio holder
currentElevenLabsAudio.playbackRate = playbackSpeed;
currentElevenLabsAudio.onended = () => { updateAllButtonStates(false); currentElevenLabsAudio = null; };
currentElevenLabsAudio.onerror = () => { updateAllButtonStates(false); currentElevenLabsAudio = null; };
currentElevenLabsAudio.play();
} catch (e) {
console.error("Gemini TTS parse/play error", e);
updateAllButtonStates(false);
}
},
onerror: (e) => {
console.error("Gemini TTS network error", e);
updateAllButtonStates(false);
}
});
} catch (e) {
console.error("Gemini TTS unexpected error", e);
updateAllButtonStates(false);
}
}
// Built-in (Web Speech)
function playBuiltinTTS(text, isBot, settings) {
if (!settings['tts-enabled']) { stopTTS(); return; }
if (!window.speechSynthesis || !text || typeof text !== 'string') return;
stopTTS();
const utter = new SpeechSynthesisUtterance(text);
currentUtterance = utter;
utter.rate = parseFloat(settings.playbackSpeed) || 1.0;
utter.pitch = 1;
const allVoices = window.speechSynthesis.getVoices();
if (allVoices.length === 0) {
window.speechSynthesis.speak(utter);
updateAllButtonStates(true);
return;
}
let defaultVoice = allVoices.find(v => v.lang === 'en-US' && v.default) || allVoices.find(v => v.lang === 'en-US') || allVoices.find(v => v.lang.startsWith('en')) || allVoices[0];
let botName = "char";
try {
const botNameElem = document.querySelector('[class^="_nameText_"]');
if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
} catch (e) {}
let userPersona = "User";
try {
const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
for (let i = allMessageNodes.length - 1; i >= 0; i--) {
const node = allMessageNodes[i];
if (!node.querySelector('[class^="_nameIcon_"]')) {
const nameElem = node.querySelector('[class^="_nameText_"]');
if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
}
}
} catch (e) {}
let targetVoiceName = 'Default';
if (isBot) {
targetVoiceName = settings[`charVoice_${botName}`] || settings.defaultVoice || 'Default';
} else {
targetVoiceName = settings[`userVoice_${userPersona}`] || settings.defaultVoice || 'Default';
}
let selectedVoice = (targetVoiceName !== 'Default') ? allVoices.find(v => v.name === targetVoiceName) : null;
utter.voice = selectedVoice || defaultVoice;
utter.onstart = () => { updateAllButtonStates(true); };
utter.onend = () => { updateAllButtonStates(false); };
utter.onerror = () => { updateAllButtonStates(false); };
window.speechSynthesis.speak(utter);
}
// ElevenLabs (con timestamps, WAV generation y AudioBuffer)
async function playElevenLabsTTS(text, isBot, settings) {
const apiKey = settings.elevenlabs_apiKey;
if (!settings['elevenlabs_tts-enabled'] || !apiKey) { stopTTS(); return; }
// Si Live2D está activo, usar análisis de Gemini y segmentación de audio
if (live2dScriptDetected) {
console.log("🎭 Live2D script detected - Starting emotion-based audio segmentation");
try {
// 1. Procesar el texto según los ajustes del usuario
console.log("📝 Step 1: Processing text with TTS filters...");
const { processed: processedText } = processTTSOutput(text);
if (!processedText || processedText.trim() === '') {
console.warn("⚠️ Processed text is empty after filters, skipping emotion analysis");
return;
}
console.log(`✓ Text processed: ${processedText.length} characters`);
// 2. Obtener nombre del personaje/usuario (needed for voice selection in parallel call)user names...");
let botName = "char";
try {
const botNameElem = document.querySelector('[class^="_nameText_"]');
if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
} catch (e) {
console.warn("⚠️ Could not get bot name:", e);
}
let userPersona = "User";
try {
const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
for (let i = allMessageNodes.length - 1; i >= 0; i--) {
const node = allMessageNodes[i];
if (!node.querySelector('[class^="_nameIcon_"]')) {
const nameElem = node.querySelector('[class^="_nameText_"]');
if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
}
}
} catch (e) {
console.warn("⚠️ Could not get user persona:", e);
}
console.log(`✓ Bot: "${botName}", User: "${userPersona}"`);
// 3. Obtener configuración de voz
console.log("🎤 Step 3: Getting voice configuration...");
let voiceId;
if (isBot) voiceId = settings[`elevenlabs_charVoice_${botName}`] || settings.elevenlabs_defaultVoice;
else voiceId = settings[`elevenlabs_userVoice_${userPersona}`] || settings.elevenlabs_defaultVoice;
if (!voiceId || voiceId === 'Default') {
console.error("❌ No ElevenLabs voice selected for this speaker.");
console.error(` Looking for: ${isBot ? `elevenlabs_charVoice_${botName}` : `elevenlabs_userVoice_${userPersona}`}`);
console.error(` Fallback: elevenlabs_defaultVoice = ${settings.elevenlabs_defaultVoice}`);
return;
}
console.log(`✓ Using voice ID: ${voiceId}`);
// 4. Preparar request body
console.log("⚙️ Step 4: Preparing ElevenLabs request...");
const stability = typeof settings.elevenlabs_stability !== "undefined" ? parseFloat(settings.elevenlabs_stability) : 0.50;
const similarity = typeof settings.elevenlabs_similarity !== "undefined" ? parseFloat(settings.elevenlabs_similarity) : 0.75;
const style = typeof settings.elevenlabs_style !== "undefined" ? parseFloat(settings.elevenlabs_style) : 0.00;
const speakerBoost = !!settings['elevenlabs_speaker-boost'];
const requestBody = {
text: processedText,
model_id: settings.elevenlabs_modelId,
voice_settings: {
stability: stability,
similarity_boost: similarity,
style: style,
use_speaker_boost: speakerBoost
}
};
console.log(`✓ Request body prepared (model: ${settings.elevenlabs_modelId})`);
// ⚡ OPTIMIZATION: Execute Gemini emotion analysis and ElevenLabs TTS in PARALLEL
console.log("⚡ Step 5: Starting PARALLEL execution of emotion analysis and TTS generation...");
const parallelStartTime = performance.now();
const [emotionSegments, responseData] = await Promise.all([
analyzeTextWithGemini(processedText)
.then(segments => {
console.log(`✓ Emotion analysis completed (${((performance.now() - parallelStartTime) / 1000).toFixed(2)}s)`);
return segments;
})
.catch(err => {
console.warn("⚠️ Emotion analysis failed, will continue without emotions:", err);
return null;
}),
elevenLabsApiRequest({
method: 'POST',
endpoint: `/v1/text-to-speech/${voiceId}/with-timestamps`,
apiKey: apiKey,
data: requestBody,
responseType: 'json'
})
.then(data => {
console.log(`✓ ElevenLabs TTS completed (${((performance.now() - parallelStartTime) / 1000).toFixed(2)}s)`);
return data;
})
]);
const parallelTotalTime = ((performance.now() - parallelStartTime) / 1000).toFixed(2);
console.log(`⚡ ✓ PARALLEL execution completed in ${parallelTotalTime}s (vs sequential: would take ~${(parseFloat(parallelTotalTime) * 1.5).toFixed(2)}s)`);
if (!emotionSegments || emotionSegments.length === 0) {
console.error("❌ No emotion segments returned from Gemini");
return;
}
console.log(`✓ Got ${emotionSegments.length} emotion segments from Gemini`);
if (!responseData || !responseData.audio_base64 || !responseData.alignment) {
console.error("❌ Invalid response from ElevenLabs:", responseData);
return;
}
console.log("✓ Timestamps received from ElevenLabs");
// 6. Decodificar audio base64
console.log("🔄 Step 6: Decoding base64 audio...");
const audioData = base64ToArrayBuffer(responseData.audio_base64);
console.log(`✓ Audio data decoded: ${audioData.byteLength} bytes`);
// 7. Decodificar a AudioBuffer
console.log("🎵 Step 7: Converting to AudioBuffer...");
const audioBuffer = await decodeTTSArrayBuffer(audioData);
console.log(`✓ Audio decoded to AudioBuffer (${audioBuffer.duration.toFixed(2)}s)`);
// 8. Calcular tiempos exactos de cada segmento usando alignment
console.log("⏱️ Step 8: Calculating segment timings...");
const segmentTimings = calculateSegmentEndTimes(responseData.alignment, emotionSegments);
if (!segmentTimings || segmentTimings.length === 0) {
console.error("❌ Failed to calculate segment timings - no valid segments returned");
return;
}
console.log(`✓ Calculated timings for ${segmentTimings.length} segments`);
// 9. Dividir audio en segmentos según timestamps
console.log("✂️ Step 9: Splitting audio by timestamps...");
const audioBlobs = await splitAudioByTimestamps(audioBuffer, segmentTimings);
if (!audioBlobs || audioBlobs.length === 0) {
console.error("❌ Failed to split audio - no blobs created");
return;
}
console.log(`✓ Created ${audioBlobs.length} audio blobs`);
// 10. Generar blob URLs para cada segmento
console.log("🔗 Step 10: Generating blob URLs...");
console.log("\n🎵 === EMOTION-BASED AUDIO SEGMENTS ===");
const emotionAudioSegments = audioBlobs.map((wavBlob, index) => {
const blobUrl = URL.createObjectURL(wavBlob);
const timing = segmentTimings[index];
const textPreview = timing.text.substring(0, 50);
const actionText = timing.action ? ` [Action: ${timing.action}]` : '';
console.log(`\n📦 Segment ${index + 1} [${timing.emotion}]${actionText}:`);
console.log(` Text: "${textPreview}${timing.text.length > 50 ? '...' : ''}"`);
console.log(` Time: ${timing.startTime.toFixed(3)}s - ${timing.endTime.toFixed(3)}s`);
console.log(` Duration: ${timing.duration.toFixed(3)}s`);
console.log(` WAV Blob URL: ${blobUrl}`);
return {
emotion: timing.emotion,
action: timing.action,
text: timing.text,
blobUrl: blobUrl,
startTime: timing.startTime,
endTime: timing.endTime,
duration: timing.duration
};
});
console.log("\n✅ All emotion-based segments generated successfully");
// 11. Enviar los segmentos al script de Live2D mediante CustomEvent
console.log("📤 Step 11: Dispatching segments to Live2D...");
const event = new CustomEvent('TTSEmotionSegmentsReady', {
detail: {
segments: emotionAudioSegments,
totalDuration: audioBuffer.duration,
sampleRate: audioBuffer.sampleRate
}
});
window.dispatchEvent(event);
console.log("✅ Emotion segments dispatched to Live2D script via 'TTSEmotionSegmentsReady' event");
console.log("⚠️ Playback skipped - Live2D script will handle audio playback\n");
} catch (error) {
console.error("❌ Error during emotion-based segmentation:");
console.error(" Error type:", error.name);
console.error(" Error message:", error.message);
console.error(" Error stack:", error.stack);
// Intentar identificar el paso donde falló
if (error.message.includes('Gemini') || error.message.includes('fetch')) {
console.error(" → Likely failed during Gemini API call (Step 2)");
} else if (error.message.includes('elevenlabs') || error.message.includes('API')) {
console.error(" → Likely failed during ElevenLabs API call (Step 6)");
} else if (error.message.includes('decode') || error.message.includes('Audio')) {
console.error(" → Likely failed during audio decoding (Step 7-8)");
} else if (error.message.includes('segment') || error.message.includes('timing')) {
console.error(" → Likely failed during segment processing (Step 9-10)");
}
}
return;
}
// Si no hay Live2D, reproducir normalmente
stopTTS();
let botName = "char";
try {
const botNameElem = document.querySelector('[class^="_nameText_"]');
if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
} catch (e) {}
let userPersona = "User";
try {
const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
for (let i = allMessageNodes.length - 1; i >= 0; i--) {
const node = allMessageNodes[i];
if (!node.querySelector('[class^="_nameIcon_"]')) {
const nameElem = node.querySelector('[class^="_nameText_"]');
if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
}
}
} catch (e) {}
let voiceId;
if (isBot) voiceId = settings[`elevenlabs_charVoice_${botName}`] || settings.elevenlabs_defaultVoice;
else voiceId = settings[`elevenlabs_userVoice_${userPersona}`] || settings.elevenlabs_defaultVoice;
if (!voiceId || voiceId === 'Default') {
console.error("TTS Userscript: No ElevenLabs voice selected for this speaker.");
return;
}
const playbackSpeed = parseFloat(settings.elevenlabs_playbackSpeed) || 1.0;
const stability = typeof settings.elevenlabs_stability !== "undefined" ? parseFloat(settings.elevenlabs_stability) : 0.50;
const similarity = typeof settings.elevenlabs_similarity !== "undefined" ? parseFloat(settings.elevenlabs_similarity) : 0.75;
const style = typeof settings.elevenlabs_style !== "undefined" ? parseFloat(settings.elevenlabs_style) : 0.00;
const speakerBoost = !!settings['elevenlabs_speaker-boost'];
const requestBody = {
text: text,
model_id: settings.elevenlabs_modelId,
voice_settings: {
stability: stability,
similarity_boost: similarity,
style: style,
use_speaker_boost: speakerBoost
}
};
try {
updateAllButtonStates(true);
const responseData = await elevenLabsApiRequest({
method: 'POST',
endpoint: `/v1/text-to-speech/${voiceId}/with-timestamps`,
apiKey: apiKey,
data: requestBody,
responseType: 'json'
});
const audioBase64 = responseData.audio_base64;
const alignment = responseData.alignment;
console.log("ElevenLabs Timestamps (Alignment):", alignment);
const audioData = base64ToArrayBuffer(audioBase64);
// Decodificar a AudioBuffer
const audioBuffer = await decodeTTSArrayBuffer(audioData);
logAudioBuffer(audioBuffer);
// Crear WAV desde AudioBuffer y generar blob URL descargable
const wavBlob = bufferToWave(audioBuffer);
const wavBlobUrl = URL.createObjectURL(wavBlob);
console.log("🎵 ElevenLabs Audio WAV Download URL:");
console.log(wavBlobUrl);
console.log("💾 To download: Right-click the link above and 'Save link as...' or run:");
console.log(`const a = document.createElement('a'); a.href = '${wavBlobUrl}'; a.download = 'tts_audio_${Date.now()}.wav'; a.click();`);
console.log("--------------------");
// Despachar evento con AudioBuffer decodificado (para otros usos si es necesario)
dispatchTTSDecodedAudio(audioBuffer, playbackSpeed, alignment);
// Crear blob MP3 para reproducción
const blob = new Blob([audioData], { type: 'audio/mpeg' });
const audioUrl = URL.createObjectURL(blob);
// Reproducir audio directamente (ya verificamos que Live2D no está activo)
currentElevenLabsAudio = new Audio(audioUrl);
currentElevenLabsAudio.playbackRate = playbackSpeed;
currentElevenLabsAudio.onended = () => {
updateAllButtonStates(false);
currentElevenLabsAudio = null;
};
currentElevenLabsAudio.onerror = () => {
updateAllButtonStates(false);
currentElevenLabsAudio = null;
};
currentElevenLabsAudio.play();
} catch (error) {
console.error("TTS Userscript: ElevenLabs generation failed:", error);
alert(`ElevenLabs TTS failed: ${error.message}`);
updateAllButtonStates(false);
}
}
// ==========================================================
// SECCIÓN 7. HELPERS DE API ELEVENLABS
// ----------------------------------------------------------
// - Peticiones a la API (GM_xmlhttpRequest)
// - Validación de API key
// ==========================================================
function elevenLabsApiRequest(options) {
const { method, endpoint, apiKey, params = {}, data = null, responseType = 'json' } = options;
let url = `https://api.elevenlabs.io${endpoint}`;
if (Object.keys(params).length > 0) url += `?${new URLSearchParams(params).toString()}`;
return new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: method,
url: url,
headers: { "xi-api-key": apiKey, "Content-Type": "application/json" },
data: data ? JSON.stringify(data) : null,
responseType: responseType,
onload: function(response) {
if (response.status === 200) {
resolve(responseType === 'json' ? JSON.parse(response.responseText) : response.response);
} else {
let errorMessage = `Error: ${response.status}`;
try {
const errorDetail = JSON.parse(response.responseText).detail;
if (typeof errorDetail === 'string') errorMessage = errorDetail;
else if (errorDetail[0]?.msg) errorMessage = errorDetail[0].msg;
} catch (e) { /* ignore */ }
reject({ status: response.status, message: errorMessage });
}
},
onerror: function(error) {
reject({ status: 0, message: `Network error: ${error.statusText || 'Unknown'}` });
}
});
});
}
async function validateElevenLabsKey(apiKey) {
try {
await elevenLabsApiRequest({ method: "GET", endpoint: "/v1/models", apiKey });
return { isValid: true, message: "API Key Valid" };
} catch (error) {
return { isValid: false, message: `Invalid API Key` };
}
}
// ==========================================================
// SECCIÓN 8. BOTONES DE CONTROL EN CADA MENSAJE
// ----------------------------------------------------------
// - Inyecta botón play/stop en panel del mensaje
// - Respeta ajustes (proveedor, narrar usuario, etc.)
// ==========================================================
const PLAY_SVG = `
<svg class="w-6 h-6 text-gray-800 dark:text-white" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" viewBox="0 0 24 24">
<path fill-rule="evenodd" d="M12 5a7 7 0 0 0-7 7v1.17c.313-.11.65-.17 1-.17h2a1 1 0 0 1 1 1v6a1 1 0 0 1-1 1H6a3 3 0 0 1-3-3v-6a9 9 0 0 1 18 0v6a3 3 0 0 1-3 3h-2a1 1 0 0 1-1-1v-6a1 1 0 0 1 1-1h2c.35 0 .687.06 1 .17V12a7 7 0 0 0-7-7Z" clip-rule="evenodd"/>
</svg>`;
const STOP_SVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-circle-stop-icon">
<circle cx="12" cy="12" r="10"/><rect x="9" y="9" width="6" height="6" rx="1"/>
</svg>`;
function injectTempButton(panel) {
if (!panel || panel.querySelector('.temp-btn')) return;
const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
const provider = settings.provider || 'builtin';
const prefix = provider === 'elevenlabs' ? 'elevenlabs_' : provider === 'gemini' ? 'gemini_' : '';
const ttsEnabled = !!settings[`${prefix}tts-enabled`];
const narrateUser = !!settings[`${prefix}tts-narrate-user`];
if (!ttsEnabled) return;
const isBot = !!(panel.closest && panel.closest('[data-index]') && panel.closest('[data-index]').querySelector(BOT_NAME_ICON_SELECTOR));
if (!narrateUser && !isBot) return;
const btn = document.createElement('button');
btn.type = 'button';
btn.className = '_controlPanelButton_prxth_8 temp-btn';
btn.style.marginLeft = '0px';
btn.innerHTML = isPlaying ? STOP_SVG : PLAY_SVG;
btn.onclick = function() {
if ((window.speechSynthesis && window.speechSynthesis.speaking) || currentElevenLabsAudio) {
stopTTS();
return;
}
const messageWrapper = this.closest(MESSAGE_WRAPPER_SELECTOR);
if (messageWrapper) {
const messageText = extractFormattedMessageText(messageWrapper);
const { processed: processedTTS } = processTTSOutput(messageText);
// Mostrar logs al hacer clic manual
console.log("📜 Raw extracted text (Manual):");
console.log(messageText);
console.log("\n🎤 Processed TTS (Manual):");
console.log(processedTTS || "[No TTS output]");
console.log("--------------------");
if (processedTTS) playTTS(processedTTS, isBot);
}
};
panel.insertBefore(btn, panel.firstChild);
}
// Observa aparición de paneles de control para inyectar botón
const controlPanelObserver = new MutationObserver(mutations => {
for (const mutation of mutations) {
for (const node of mutation.addedNodes) {
if (node.nodeType === Node.ELEMENT_NODE) {
if (node.matches(CONTROL_PANEL_SELECTOR)) injectTempButton(node);
node.querySelectorAll?.(CONTROL_PANEL_SELECTOR).forEach(injectTempButton);
}
}
}
});
function startControlPanelObserver() {
const chatContainer = document.querySelector(CHAT_CONTAINER_SELECTOR);
if (chatContainer) {
document.querySelectorAll(CONTROL_PANEL_SELECTOR).forEach(injectTempButton);
controlPanelObserver.observe(chatContainer, { childList: true, subtree: true });
} else {
setTimeout(startControlPanelObserver, 1000);
}
}
startControlPanelObserver();
})();