JanitorAI - Text to Speech - Built-in/ElevenLabs/GeminiTTS

Text to Speech (TTS) integration for JanitorAI using built-in voices, ElevenLabs TTS, and Gemini TTS with emotion analysis and audio segmentation.

您需要先安装一个扩展,例如 篡改猴Greasemonkey暴力猴,之后才能安装此脚本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安装一个扩展,例如 篡改猴暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴Userscripts ,之后才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。

您需要先安装用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         JanitorAI - Text to Speech - Built-in/ElevenLabs/GeminiTTS
// @namespace    http://tampermonkey.net/
// @version      3.9.5
// @license      MIT
// @description  Text to Speech (TTS) integration for JanitorAI using built-in voices, ElevenLabs TTS, and Gemini TTS with emotion analysis and audio segmentation.
// @author       Zephyr (xzeph__ on Discord)
// @match        https://janitorai.com/chats/*
// @icon         https://www.google.com/s2/favicons?sz=64&domain=janitorai.com
// @grant        GM_xmlhttpRequest
// @grant        GM_addStyle
// @connect      api.elevenlabs.io
// @connect      generativelanguage.googleapis.com
// ==/UserScript==

// ==========================================================
// SECCIÓN A. UTILIDADES DE AUDIO (globales, antes del userscript)
// ----------------------------------------------------------
// - Inicializa/recupera AudioContext para ElevenLabs
// - Decodifica ArrayBuffer a AudioBuffer
// - Convierte AudioBuffer a WAV Blob
// - Convierte base64 a ArrayBuffer
// - Despacha evento con AudioBuffer decodificado
// - Logs para depurar AudioBuffer
// ==========================================================

// --- ElevenLabs TTS AudioContext and AudioBuffer integration ---
let elevenLabsAudioContext = null;

function getElevenLabsAudioContext() {
    if (!elevenLabsAudioContext) {
        elevenLabsAudioContext = new (window.AudioContext || window.webkitAudioContext)();
    }
    return elevenLabsAudioContext;
}

function decodeTTSArrayBuffer(arrayBuffer) {
    try {
        if (!arrayBuffer || arrayBuffer.byteLength === 0) {
            throw new Error("ArrayBuffer is empty or null");
        }
        const audioContext = getElevenLabsAudioContext();
        return audioContext.decodeAudioData(arrayBuffer.slice(0));
    } catch (error) {
        console.error("❌ Failed to decode ArrayBuffer to AudioBuffer:", error);
        throw new Error(`decodeTTSArrayBuffer failed: ${error.message}`);
    }
}

function dispatchTTSDecodedAudio(audioBuffer, playbackRate = 1.0, alignment) {
    const event = new CustomEvent('ElevenLabsTTSDecodedAudio', {
        detail: { audioBuffer, playbackRate, alignment }
    });
    window.dispatchEvent(event);
}

function logAudioBuffer(audioBuffer) {
    if (!(audioBuffer instanceof AudioBuffer)) {
        console.error('Provided object is not an AudioBuffer');
        return;
    }
    console.log('AudioBuffer Info:');
    console.log('Sample Rate:', audioBuffer.sampleRate);
    console.log('Number of Channels:', audioBuffer.numberOfChannels);
    console.log('Length (frames):', audioBuffer.length);
    console.log('Duration (seconds):', audioBuffer.duration);
    for (let i = 0; i < audioBuffer.numberOfChannels; i++) {
        console.log(`Channel ${i} Data:`, audioBuffer.getChannelData(i));
    }
}

function base64ToArrayBuffer(base64) {
    try {
        const binaryString = window.atob(base64);
        const len = binaryString.length;
        const bytes = new Uint8Array(len);
        for (let i = 0; i < len; i++) {
            bytes[i] = binaryString.charCodeAt(i);
        }
        return bytes.buffer;
    } catch (error) {
        console.error("❌ Failed to decode base64 to ArrayBuffer:", error);
        throw new Error(`base64ToArrayBuffer failed: ${error.message}`);
    }
}

// Convierte un AudioBuffer a Blob WAV (para segmentación/exportación)
function bufferToWave(abuffer) {
    try {
        if (!abuffer || !(abuffer instanceof AudioBuffer)) {
            throw new Error("Invalid AudioBuffer provided");
        }

        let numOfChan = abuffer.numberOfChannels,
            length = abuffer.length * numOfChan * 2 + 44,
            buffer = new ArrayBuffer(length),
            view = new DataView(buffer),
            channels = [], i, sample,
            offset = 0,
            pos = 0;

        // WAVE header
        setUint32(0x46464952); // "RIFF"
        setUint32(length - 8); // file length - 8
        setUint32(0x45564157); // "WAVE"
        setUint32(0x20746d66); // "fmt "
        setUint32(16); // length = 16
        setUint16(1); // PCM
        setUint16(numOfChan);
        setUint32(abuffer.sampleRate);
        setUint32(abuffer.sampleRate * 2 * numOfChan);
        setUint16(numOfChan * 2);
        setUint16(16); // 16-bit
        setUint32(0x61746164); // "data"
        setUint32(length - pos - 4);

        // Datos intercalados
        for (i = 0; i < abuffer.numberOfChannels; i++)
            channels.push(abuffer.getChannelData(i));

        while (pos < length) {
            for (i = 0; i < numOfChan; i++) {
                sample = Math.max(-1, Math.min(1, channels[i][offset])); // clamp
                sample = (0.5 + sample < 0 ? sample * 32768 : sample * 32767) | 0; // 16-bit
                view.setInt16(pos, sample, true);
                pos += 2;
            }
            offset++;
        }

        return new Blob([buffer], { type: "audio/wav" });

        function setUint16(data) { view.setUint16(pos, data, true); pos += 2; }
        function setUint32(data) { view.setUint32(pos, data, true); pos += 4; }
    } catch (error) {
        console.error("❌ Failed to convert AudioBuffer to WAV:", error);
        throw new Error(`bufferToWave failed: ${error.message}`);
    }
}

// Simple PCM16 mono -> WAV helper for Gemini TTS
function createWavFromPCM(pcmBuffer, rate = 24000, ch = 1, bits = 16) {
  const pcmBytes = pcmBuffer.byteLength;
  const blockAlign = ch * bits / 8;
  const byteRate = rate * blockAlign;
  const wav = new ArrayBuffer(44 + pcmBytes);
  const view = new DataView(wav);

  function writeString(offset, str) {
    for (let i = 0; i < str.length; i++) view.setUint8(offset + i, str.charCodeAt(i));
  }

  writeString(0, 'RIFF');
  view.setUint32(4, 36 + pcmBytes, true);
  writeString(8, 'WAVE');
  writeString(12, 'fmt ');
  view.setUint32(16, 16, true);
  view.setUint16(20, 1, true);
  view.setUint16(22, ch, true);
  view.setUint32(24, rate, true);
  view.setUint32(28, byteRate, true);
  view.setUint16(32, blockAlign, true);
  view.setUint16(34, bits, true);
  writeString(36, 'data');
  view.setUint32(40, pcmBytes, true);

  new Uint8Array(wav).set(new Uint8Array(pcmBuffer), 44);
  return wav;
}

(function () {
  "use strict";

  // ==========================================================
  // SECCIÓN 0. ESTADO GLOBAL, RESET DE AJUSTES Y CONSTANTES
  // ----------------------------------------------------------
  // - Resetea formato viejo de settings si aplica
  // - Define selectores y flags comunes
  // ==========================================================

  // Reset de settings legacy
  try {
    const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
    if (settings.hasOwnProperty('charVoice') || settings.hasOwnProperty('userVoice')) {
      console.log('TTS Userscript: Detected old voice setting format. Resetting to defaults.');
      localStorage.removeItem("ttsSettings");
    }
  } catch (e) {
    console.error("TTS Userscript: Could not parse settings, resetting to default.", e);
    localStorage.removeItem("ttsSettings");
  }

  // Selectores de chat/control
  const CHAT_CONTAINER_SELECTOR = '[class^="_messagesMain_"]';
  const MESSAGE_CONTAINER_SELECTOR = '[data-testid="virtuoso-item-list"] > div[data-index]';
  const BOT_NAME_ICON_SELECTOR = '[class^="_nameIcon_"]';
  const LAST_MESSAGE_SWIPE_CONTAINER_SELECTOR = '[class^="_botChoicesContainer_"]';
  const SWIPE_SLIDER_SELECTOR = '[class^="_botChoicesSlider_"]';
  const MESSAGE_WRAPPER_SELECTOR = 'li[class^="_messageDisplayWrapper_"]';
  const MESSAGE_BODY_SELECTOR = '[class^="_messageBody_"]';
  const NAME_CONTAINER_SELECTOR = '[class^="_nameContainer_"]';
  const EDIT_PANEL_SELECTOR = '[class^="_editPanel_"]';
  const CONTROL_PANEL_SELECTOR = '[class^="_controlPanel_"]';
  const BOT_NAME_SELECTOR = '[class^="_nameText_"]';

  // Estado de último log y comunicación con Live2D
  let lastLoggedText = "";
  let lastLoggedStatus = "";
  let lastLoggedSwipeIndex = -1;
  let lastLoggedMessageIndex = -1;
  let live2dScriptDetected = false;

  // ==========================================================
  // SECCIÓN 0.5. CONSTANTES Y FUNCIONES DE GEMINI
  // ----------------------------------------------------------
  // - API key y endpoint de Gemini
  // - Lista de emociones soportadas
  // - Función para analizar texto con Gemini
  // - Función para calcular tiempos de segmentos según emoción
  // ==========================================================

  const GEMINI_API_KEY = "YOUR_GEMINI_API_KEY"; // Reemplaza con tu clave real
  const GEMINI_MODEL = "gemini-2.5-flash";
  const GEMINI_ENDPOINT = `https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;
  const EMOTION_LIST = [
    "Admiration", "Amusement", "Anger", "Annoyance", "Approval", "Caring", "Confusion",
    "Curiosity", "Desire", "Disappointment", "Disapproval", "Disgust", "Embarrassment",
    "Excitement", "Fear", "Gratitude", "Joy", "Love", "Nervousness", "Neutral",
    "Optimism", "Pride", "Realization", "Relief", "Remorse", "Sadness", "Surprise"
  ];

  async function analyzeTextWithGemini(text) {
    console.log("🤖 Sending text to Gemini for emotion and action analysis...");

    // Construir lista de acciones disponibles para el prompt
    const actionsListText = availableActions.length > 0
      ? availableActions.join(", ")
      : "No actions available";

    const prompt = `You are a text analyzer that MUST preserve EVERY SINGLE CHARACTER from the input.

ABSOLUTE REQUIREMENTS - FAILURE TO COMPLY WILL BREAK THE SYSTEM:

1. CHARACTER PRESERVATION (CRITICAL):
   ✓ Keep EVERY letter (A-Z, a-z)
   ✓ Keep EVERY number (0-9)
   ✓ Keep EVERY punctuation mark (. , ! ? ; : - ' " etc.)
   ✓ Keep EVERY space character (leading, trailing, between words)
   ✓ Keep EVERY special symbol (@ # $ % & * + = etc.)
   ✓ Keep EVERY bracket/parenthesis ( ) [ ] { }
   ✓ Keep EVERY newline character (\\n)
   ✓ Keep EVERY asterisk (*) for actions
   ✓ Keep EVERY quotation mark (" ')
   ✓ Do NOT add, remove, or modify ANY character
   ✓ Do NOT trim whitespace
   ✓ Do NOT normalize text
   ✓ Do NOT fix typos or grammar

2. SEGMENT SPLITTING RULES:
   ✓ Split text ONLY when there is a SIGNIFICANT change in emotion or a physical action occurs
   ✓ BE REALISTIC: Most dialogue doesn't need many segments - only split when truly necessary
   ✓ MERGE CONSECUTIVE SEGMENTS: If two or more segments have the SAME emotion AND the SAME action (or both null), MERGE them into ONE segment
   ✓ Each segment's "text" must contain a COMPLETE, UNBROKEN portion of the original
   ✓ DO NOT cut words in half
   ✓ DO NOT break sentences mid-word
   ✓ Include complete phrases with their surrounding spaces
   ✓ When concatenating all segment "text" fields, the result MUST be IDENTICAL to the input
   ✓ QUALITY OVER QUANTITY: Fewer, meaningful segments are better than many unnecessary ones

3. MERGING EXAMPLES:
   ❌ BAD (too many segments):
   [
     {"emotion": "Joy", "action": null, "text": "Hi there! "},
     {"emotion": "Joy", "action": null, "text": "I'm so happy to see you! "},
     {"emotion": "Joy", "action": null, "text": "How are you?"}
   ]

   ✅ GOOD (merged):
   [
     {"emotion": "Joy", "action": null, "text": "Hi there! I'm so happy to see you! How are you?"}
   ]

   ❌ BAD (unnecessary split):
   [
     {"emotion": "Neutral", "action": null, "text": "I went to the "},
     {"emotion": "Neutral", "action": null, "text": "store yesterday."}
   ]

   ✅ GOOD (kept together):
   [
     {"emotion": "Neutral", "action": null, "text": "I went to the store yesterday."}
   ]

4. VALIDATION CHECK:
   Before responding, verify: input_text == segment[0].text + segment[1].text + ... + segment[n].text
   If they don't match EXACTLY, you have failed.

RESPONSE FORMAT (JSON only, no other text):
[
  {"emotion": "EmotionName", "action": "ActionName or null", "text": "exact text from input"},
  ...
]

EMOTION LIST (choose from): ${EMOTION_LIST.join(", ")}

ACTION LIST (choose from or use null): ${actionsListText}

EXAMPLE 1 (Simple, one segment):
Input: "Hi there! How are you today?"
Output:
[
  {"emotion": "Joy", "action": null, "text": "Hi there! How are you today?"}
]

EXAMPLE 2 (With action):
Input: "Hi there! *waves enthusiastically* How are you today?"
Output:
[
  {"emotion": "Joy", "action": null, "text": "Hi there! "},
  {"emotion": "Joy", "action": "Wave hand", "text": "*waves enthusiastically* "},
  {"emotion": "Curiosity", "action": null, "text": "How are you today?"}
]

EXAMPLE 3 (Emotion change):
Input: "I'm so happy! Wait... what's that noise? Oh no!"
Output:
[
  {"emotion": "Joy", "action": null, "text": "I'm so happy! "},
  {"emotion": "Curiosity", "action": null, "text": "Wait... what's that noise? "},
  {"emotion": "Fear", "action": null, "text": "Oh no!"}
]

IMPORTANT NOTES:
- Notice that ALL spaces, punctuation, and characters are preserved EXACTLY
- Each segment text is COMPLETE and UNBROKEN
- Segments are MERGED when they share the same emotion and action
- Only split when there's a REAL, SIGNIFICANT change
- Concatenating all segments MUST equal the original input EXACTLY

INPUT TEXT TO ANALYZE:
"${text.replace(/"/g, '\\"').replace(/\n/g, '\\n')}"`;

    const payload = {
      contents: [{ parts: [{ text: prompt }] }],
      generationConfig: { response_mime_type: "application/json" },
    };

    const response = await fetch(GEMINI_ENDPOINT, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify(payload),
    });

    if (!response.ok) {
      const errorBody = await response.text();
      console.error("❌ Gemini API Error Body:", errorBody);
      throw new Error(`Gemini API request failed: ${response.status} ${response.statusText}`);
    }

    const data = await response.json();
    console.log("✓ Received response from Gemini:", data);

    let emotionQueue;
    try {
      const responseText = data.candidates?.[0]?.content?.parts?.[0]?.text;
      if (!responseText) {
        throw new Error("No response text from Gemini");
      }
      emotionQueue = JSON.parse(responseText);
    } catch (parseError) {
      console.error("❌ Failed to parse Gemini response:", parseError);
      console.warn("⚠️ Defaulting to single neutral segment.");
      return [{ emotion: "Neutral", action: null, text: text }];
    }

    if (!Array.isArray(emotionQueue) || emotionQueue.length === 0) {
      console.warn("⚠️ Gemini did not return a valid emotion queue. Defaulting to a single neutral segment.");
      return [{ emotion: "Neutral", action: null, text: text }];
    }

    // Strict validation: ALL characters must be preserved
    const reconstructedText = emotionQueue.map(seg => seg.text || '').join('');

    if (reconstructedText !== text) {
      console.error("❌ CRITICAL VALIDATION FAILURE:");
      console.error("   Original length:", text.length);
      console.error("   Reconstructed length:", reconstructedText.length);
      console.error("   Character difference:", Math.abs(text.length - reconstructedText.length));

      // Show character-by-character comparison for debugging
      const maxLen = Math.max(text.length, reconstructedText.length);
      let firstDiffIndex = -1;
      for (let i = 0; i < maxLen; i++) {
        if (text[i] !== reconstructedText[i]) {
          firstDiffIndex = i;
          break;
        }
      }

      if (firstDiffIndex !== -1) {
        console.error("   First difference at index:", firstDiffIndex);
        console.error("   Expected char:", JSON.stringify(text[firstDiffIndex]));
        console.error("   Got char:", JSON.stringify(reconstructedText[firstDiffIndex]));
        const contextStart = Math.max(0, firstDiffIndex - 20);
        const contextEnd = Math.min(text.length, firstDiffIndex + 20);
        console.error("   Context (original):", JSON.stringify(text.substring(contextStart, contextEnd)));
        console.error("   Context (reconstructed):", JSON.stringify(reconstructedText.substring(contextStart, contextEnd)));
      }

      console.warn("⚠️ Falling back to single neutral segment to ensure accuracy.");
      return [{ emotion: "Neutral", action: null, text: text }];
    }

    // Additional validation: check each segment has text
    const invalidSegments = emotionQueue.filter(seg => !seg.text || typeof seg.text !== 'string');
    if (invalidSegments.length > 0) {
      console.error("❌ Found segments with invalid text fields:", invalidSegments);
      console.warn("⚠️ Falling back to single neutral segment.");
      return [{ emotion: "Neutral", action: null, text: text }];
    }

    console.log("✅ Gemini response VALIDATED - all characters preserved!");
    console.log(`📊 Emotion segments: ${emotionQueue.length}`);
    emotionQueue.forEach((seg, i) => {
      const actionText = seg.action ? ` [Action: ${seg.action}]` : '';
      const preview = seg.text.length > 40 ? seg.text.substring(0, 40) + '...' : seg.text;
      console.log(`  ${i + 1}. [${seg.emotion}]${actionText} "${preview}" (${seg.text.length} chars)`);
    });
    return emotionQueue;
  }

  function calculateSegmentEndTimes(alignment, segments) {
    console.log("--- Calculating Segment End Times (Character-Accurate) ---");
    const { characters, character_start_times_seconds, character_end_times_seconds } = alignment;

    const alignmentText = characters.join('');
    const segmentsText = segments.map(s => s.text || '').join('');

    console.log("Alignment text length:", alignmentText.length);
    console.log("Segments text length:", segmentsText.length);

    // CRITICAL: Verify exact character match
    if (segmentsText !== alignmentText) {
      console.error("❌ CRITICAL ERROR: Segments text does not match alignment text!");
      console.error("   This means character preservation failed somewhere in the pipeline.");
      console.error("   Alignment text:", JSON.stringify(alignmentText.substring(0, 100)));
      console.error("   Segments text:", JSON.stringify(segmentsText.substring(0, 100)));

      // Find first mismatch
      for (let i = 0; i < Math.max(alignmentText.length, segmentsText.length); i++) {
        if (alignmentText[i] !== segmentsText[i]) {
          console.error(`   First mismatch at index ${i}:`);
          console.error(`     Expected: ${JSON.stringify(alignmentText[i])}`);
          console.error(`     Got: ${JSON.stringify(segmentsText[i])}`);
          break;
        }
      }

      return [];
    }

    console.log("✅ Text validation passed - segments match alignment exactly!");

    let currentCharIndex = 0;
    const segmentTimings = [];

    segments.forEach((segment, i) => {
      const segmentLength = segment.text.length;

      if (segmentLength === 0) {
        console.warn(`⚠️ Segment ${i + 1} has zero length, skipping`);
        return;
      }

      const startCharIndex = currentCharIndex;
      const endCharIndex = currentCharIndex + segmentLength - 1;

      // Safety check: ensure indices are within bounds
      if (startCharIndex >= character_start_times_seconds.length ||
          endCharIndex >= character_end_times_seconds.length) {
        console.error(`❌ Segment ${i + 1} indices out of bounds!`);
        console.error(`   Start index: ${startCharIndex}, End index: ${endCharIndex}`);
        console.error(`   Available indices: 0-${character_start_times_seconds.length - 1}`);
        return;
      }

      const startTime = character_start_times_seconds[startCharIndex];
      const endTime = character_end_times_seconds[endCharIndex];

      const actionText = segment.action ? ` [Action: ${segment.action}]` : '';
      const textPreview = segment.text.length > 50 ? segment.text.substring(0, 50) + '...' : segment.text;

      console.log(`Segment ${i + 1} [${segment.emotion}]${actionText}:`);
      console.log(`  Text: "${textPreview}" (${segmentLength} chars)`);
      console.log(`  Char indices: ${startCharIndex} → ${endCharIndex}`);
      console.log(`  Time range: ${startTime.toFixed(3)}s → ${endTime.toFixed(3)}s (${(endTime - startTime).toFixed(3)}s duration)`);

      segmentTimings.push({
        emotion: segment.emotion,
        action: segment.action || null,
        text: segment.text,
        startTime: startTime,
        endTime: endTime,
        duration: endTime - startTime
      });

      currentCharIndex += segmentLength;
    });

    // Final validation
    if (currentCharIndex !== alignmentText.length) {
      console.error(`❌ Character counting error! Processed ${currentCharIndex} chars but expected ${alignmentText.length}`);
    } else {
      console.log(`✅ Segment timings calculated successfully for ${segmentTimings.length} segments`);
      console.log(`✅ Total characters processed: ${currentCharIndex}`);
    }

    return segmentTimings;
  }

  async function splitAudioByTimestamps(audioBuffer, segmentTimings) {
    console.log('--- Splitting Audio by Emotion Timestamps ---');
    console.log(`Audio buffer: ${audioBuffer.duration.toFixed(3)}s, ${audioBuffer.numberOfChannels} channels, ${audioBuffer.sampleRate}Hz`);

    const blobs = [];

    for (let i = 0; i < segmentTimings.length; i++) {
      const timing = segmentTimings[i];
      const startOffset = timing.startTime;
      const endOffset = timing.endTime;

      // Calculate frame positions with proper rounding
      const startFrame = Math.floor(startOffset * audioBuffer.sampleRate);
      const endFrame = Math.ceil(endOffset * audioBuffer.sampleRate);
      const frameCount = endFrame - startFrame;

      const actionText = timing.action ? ` [Action: ${timing.action}]` : '';
      const textPreview = timing.text.length > 30 ? timing.text.substring(0, 30) + '...' : timing.text;

      console.log(`Segment ${i + 1}/${segmentTimings.length} [${timing.emotion}]${actionText}:`);
      console.log(`  Text: "${textPreview}"`);
      console.log(`  Time: ${startOffset.toFixed(3)}s → ${endOffset.toFixed(3)}s (${timing.duration.toFixed(3)}s)`);
      console.log(`  Frames: ${startFrame} → ${endFrame} (${frameCount} frames)`);

      if (frameCount <= 0) {
        console.error(`  ❌ Invalid frame count (${frameCount}), skipping segment`);
        continue;
      }

      if (startFrame >= audioBuffer.length) {
        console.error(`  ❌ Start frame ${startFrame} exceeds buffer length ${audioBuffer.length}, skipping`);
        continue;
      }

      // Clamp end frame to buffer length
      const actualEndFrame = Math.min(endFrame, audioBuffer.length);
      const actualFrameCount = actualEndFrame - startFrame;

      if (actualFrameCount <= 0) {
        console.error(`  ❌ Actual frame count after clamping is ${actualFrameCount}, skipping`);
        continue;
      }

      try {
        const audioContext = new (window.AudioContext || window.webkitAudioContext)();
        const partBuffer = audioContext.createBuffer(
          audioBuffer.numberOfChannels,
          actualFrameCount,
          audioBuffer.sampleRate
        );

        // Copy audio data for each channel with bounds checking
        for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
          const sourceData = audioBuffer.getChannelData(channel);
          const targetData = partBuffer.getChannelData(channel);

          for (let j = 0; j < actualFrameCount; j++) {
            const sourceIndex = startFrame + j;
            if (sourceIndex < sourceData.length) {
              targetData[j] = sourceData[sourceIndex];
            } else {
              targetData[j] = 0; // Silence if we exceed source bounds
            }
          }
        }

        const wavBlob = bufferToWave(partBuffer);
        blobs.push(wavBlob);
        console.log(`  ✅ Created WAV segment: ${(wavBlob.size / 1024).toFixed(2)} KB`);

      } catch (error) {
        console.error(`  ❌ Error creating segment ${i + 1}:`, error);
        continue;
      }
    }

    console.log(`✅ Successfully created ${blobs.length}/${segmentTimings.length} emotion-based audio segments`);
    return blobs;
  }

  // ==========================================================
  // SECCIÓN 1. EVENTOS DE INTEGRACIÓN Live2D
  // ----------------------------------------------------------
  // - Live2DScriptReady: saber si Live2D está activo
  // - TTSScriptReady: informar a Live2D que TTS está listo
  // ==========================================================

  // Informar a Live2D que el script TTS está presente y listo
  console.log("[TTS] TTS Script initialized. Dispatching 'TTSScriptReady' event...");
  const ttsReadyEvent = new CustomEvent('TTSScriptReady', {
      detail: {
          version: '3.9.1',
          capabilities: {
              emotionAnalysis: true,
              segmentedAudio: true,
              elevenLabs: true,
              builtInVoices: true
          }
      }
  });
  window.dispatchEvent(ttsReadyEvent);
  console.log("[TTS] 📢 'TTSScriptReady' event dispatched to Live2D script");

  // Live2D indica que está listo
  window.addEventListener("Live2DScriptReady", function () {
      if (!live2dScriptDetected) {
          live2dScriptDetected = true;
          console.log("[TTS] Live2D script detected. TTS will NOT play audio directly when Live2D is active.");
      }
  });

  // Variable global para almacenar las acciones disponibles
  let availableActions = [];

  // Listener para recibir las acciones disponibles desde Live2D
  window.addEventListener("Live2DActionsReady", function (event) {
      const { emotions, actions, modelName } = event.detail;
      availableActions = actions || [];
      console.log(`[TTS] Received actions list from Live2D model "${modelName}":`, availableActions);
      console.log(`[TTS] Emotions available:`, emotions.length);
  });

  // ==========================================================
  // SECCIÓN 2. DETECCIÓN DE MENSAJES DEL BOT/USUARIO
  // ----------------------------------------------------------
  // - Extrae el último mensaje terminado y dispara TTS si procede
  // - Ofrece utilidades para formatear texto a leer
  // ==========================================================

  // Extrae y procesa el último mensaje del bot ya finalizado
  function logMessageStatus() {
    const allMessageNodes = document.querySelectorAll(MESSAGE_CONTAINER_SELECTOR);
    if (allMessageNodes.length === 0) return;

    // Encuentra el último del bot finalizado
    let lastBotMessageContainer = null;
    let activeMessageNode = null;
    let activeSwipeIndex = 0;
    let messageIndex = -1;

    for (let i = allMessageNodes.length - 1; i >= 0; i--) {
      const node = allMessageNodes[i];
      if (node.querySelector(BOT_NAME_ICON_SELECTOR)) {
        let candidateNode;
        const swipeContainer = node.querySelector(LAST_MESSAGE_SWIPE_CONTAINER_SELECTOR);
        if (swipeContainer) {
          const slider = swipeContainer.querySelector(SWIPE_SLIDER_SELECTOR);
          if (!slider) continue;
          const transform = slider.style.transform;
          const translateX = transform ? parseFloat(transform.match(/translateX\(([-0-9.]+)%\)/)?.[1] || "0") : 0;
          activeSwipeIndex = Math.round(Math.abs(translateX) / 100);
          const allSwipes = slider.querySelectorAll(MESSAGE_WRAPPER_SELECTOR);
          if (allSwipes.length <= activeSwipeIndex) continue;
          candidateNode = allSwipes[activeSwipeIndex];
        } else {
          candidateNode = node.querySelector(MESSAGE_WRAPPER_SELECTOR);
        }
        if (!candidateNode) continue;
        if (candidateNode.querySelector(EDIT_PANEL_SELECTOR)) continue;
        if (!candidateNode.querySelector(CONTROL_PANEL_SELECTOR)) continue;
        lastBotMessageContainer = node;
        activeMessageNode = candidateNode;
        messageIndex = parseInt(node.dataset.index, 10);
        break;
      }
    }
    if (!activeMessageNode) return;

    const messageText = extractFormattedMessageText(activeMessageNode);
    const { processed: processedTTS } = processTTSOutput(messageText);

    const status = "Finished";
    const shouldLog =
      status !== lastLoggedStatus ||
      activeSwipeIndex !== lastLoggedSwipeIndex ||
      messageIndex !== lastLoggedMessageIndex ||
      (status !== "Streaming" && messageText !== lastLoggedText);

    if (shouldLog) {
      lastLoggedStatus = status;
      lastLoggedSwipeIndex = activeSwipeIndex;
      lastLoggedMessageIndex = messageIndex;
      lastLoggedText = messageText;

      console.log("📜 Raw extracted text (Auto):");
      console.log(messageText);
      console.log("\n🎤 Processed TTS (Auto):");
      console.log(processedTTS || "[No TTS output]");
      console.log("--------------------");

      if (processedTTS) {
          playTTS(processedTTS, true); // isBot = true
      }
    }
  }

  // Versión que detecta último mensaje finalizado (bot o usuario)
  function logLastFinishedMessage() {
    const allMessageNodes = document.querySelectorAll(MESSAGE_CONTAINER_SELECTOR);
    if (allMessageNodes.length === 0) return;

    let lastFinishedNode = null;
    let messageIndex = -1;
    let isBot = false;

    for (let i = allMessageNodes.length - 1; i >= 0; i--) {
      const node = allMessageNodes[i];
      let candidateNode;
      if (node.querySelector(BOT_NAME_ICON_SELECTOR)) {
        const swipeContainer = node.querySelector(LAST_MESSAGE_SWIPE_CONTAINER_SELECTOR);
        if (swipeContainer) {
          const slider = swipeContainer.querySelector(SWIPE_SLIDER_SELECTOR);
          if (!slider) continue;
          const transform = slider.style.transform;
          const translateX = transform ? parseFloat(transform.match(/translateX\(([-0-9.]+)%\)/)?.[1] || "0") : 0;
          const activeSwipeIndex = Math.round(Math.abs(translateX) / 100);
          const allSwipes = slider.querySelectorAll(MESSAGE_WRAPPER_SELECTOR);
          if (allSwipes.length <= activeSwipeIndex) continue;
          candidateNode = allSwipes[activeSwipeIndex];
        } else {
          candidateNode = node.querySelector(MESSAGE_WRAPPER_SELECTOR);
        }
        if (!candidateNode) continue;
        if (candidateNode.querySelector(EDIT_PANEL_SELECTOR)) continue;
        if (!candidateNode.querySelector(CONTROL_PANEL_SELECTOR)) continue;
        lastFinishedNode = candidateNode;
        messageIndex = parseInt(node.dataset.index, 10);
        isBot = true;
        break;
      } else {
        candidateNode = node.querySelector(MESSAGE_WRAPPER_SELECTOR);
        if (!candidateNode) continue;
        if (candidateNode.querySelector(EDIT_PANEL_SELECTOR)) continue;
        if (!candidateNode.querySelector(CONTROL_PANEL_SELECTOR)) continue;
        lastFinishedNode = candidateNode;
        messageIndex = parseInt(node.dataset.index, 10);
        isBot = false;
        break;
      }
    }
    if (!lastFinishedNode) return;

    const messageText = extractFormattedMessageText(lastFinishedNode);
    const { processed: processedTTS } = processTTSOutput(messageText);
    const status = "Finished";

    if (
      status !== lastLoggedStatus ||
      messageIndex !== lastLoggedMessageIndex ||
      (status !== "Streaming" && messageText !== lastLoggedText)
    ) {
      lastLoggedStatus = status;
      lastLoggedSwipeIndex = -1;
      lastLoggedMessageIndex = messageIndex;
      lastLoggedText = messageText;

      console.log("📜 Raw extracted text (Auto, User+Bot):");
      console.log(messageText);
      console.log("\n🎤 Processed TTS (Auto, User+Bot):");
      console.log(processedTTS || "[No TTS output]");
      console.log("--------------------");

      if (processedTTS) {
          playTTS(processedTTS, isBot);
      }
    }
  }

  // Extrae texto formateado a partir del nodo del mensaje (respeta cursivas, etc.)
  function extractFormattedMessageText(messageNode) {
    // Find the message text container dynamically
    // Structure: _messageBody_ > [_nameContainer_, textContainer]
    // The text container is a direct child of _messageBody_ that is NOT _nameContainer_
    const messageBody = messageNode.querySelector(MESSAGE_BODY_SELECTOR);
    if (!messageBody) return "[No text found]";

    // Find the text container: it's a div child of messageBody that is not the name container
    let textContainer = null;
    for (const child of messageBody.children) {
      if (child.tagName === 'DIV' && !child.className.match(/_nameContainer_/)) {
        // This should be the text container (has dynamic css-XXXXX class)
        textContainer = child;
        break;
      }
    }
    if (!textContainer) return "[No text found]";

    let result = [];
    // Find text blocks - they have class 'css-0' or are direct children with content
    const blocks = textContainer.querySelectorAll('[class^="css-"]');
    blocks.forEach(block => {
      const p = block.querySelector('p');
      if (p) {
        let line = '';
        p.childNodes.forEach(child => {
          if (child.nodeType === Node.ELEMENT_NODE) {
            if (child.tagName === 'EM') line += '_' + child.textContent + '_';
            else if (child.tagName === 'STRONG') line += '**' + child.textContent + '**';
            else if (child.tagName === 'CODE') line += '`' + child.textContent + '`';
            else line += child.textContent;
          } else if (child.nodeType === Node.TEXT_NODE) {
            line += child.textContent;
          }
        });
        if (line.trim()) result.push(line.trim());
        return;
      }
      const ul = block.querySelector('ul');
      if (ul) {
        ul.querySelectorAll('li').forEach(li => result.push('• ' + li.textContent.trim()));
        return;
      }
      const code = block.querySelector('code');
      if (code && !p) { result.push('`' + code.textContent.trim() + '`'); return; }
      if (!block.textContent.trim()) return;
      result.push(block.textContent.trim());
    });
    return result.length ? result.join('\n') : "[No text found]";
  }

  // Limpia/filtra el texto de entrada según ajustes del usuario
  function processTTSOutput(rawText) {
    const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
    const provider = settings.provider || 'builtin';
    const prefix = provider === 'elevenlabs' ? 'elevenlabs_' : provider === 'gemini' ? 'gemini_' : '';

    let processed = rawText;
    let needsDelay = false;

    // Saltar bloques de código
    if (settings[`${prefix}tts-skip-codeblocks`]) {
      const codeblockRegex = /```[\s\S]*?```/g;
      if (codeblockRegex.test(processed)) needsDelay = true;
      processed = processed.replace(codeblockRegex, "");
      const inlineCodeRegex = /`[^`]*`/g;
      if (inlineCodeRegex.test(processed)) needsDelay = true;
      processed = processed.replace(inlineCodeRegex, "");
    } else {
      processed = processed.replace(/```([\s\S]*?)```/g, (m, p1) => p1.trim());
      processed = processed.replace(/`([^`]*)`/g, (m, p1) => p1);
    }

    // Omitir bullets
    if (settings[`${prefix}tts-skip-bulletpoints`]) {
      const lines = processed.split("\n");
      let found = false;
      processed = lines.filter(line => {
        if (/^\s*([•\-*])\s+/.test(line)) { found = true; return false; }
        return true;
      }).join("\n");
      if (found) needsDelay = true;
    }

    // Asteriscos y énfasis
    if (settings[`${prefix}tts-ignore-asterisks`]) {
      let found = false;
      processed = processed.replace(/\*\*[^*\n]+\*\*/g, () => { found = true; return ""; });
      processed = processed.replace(/\*[^*\n]+\*/g, () => { found = true; return ""; });
      processed = processed.replace(/_[^_\n]+_/g, () => { found = true; return ""; });
      if (found) needsDelay = true;
    } else {
      processed = processed.replace(/\*\*([^*\n]+)\*\*/g, (m, p1) => p1);
      processed = processed.replace(/\*([^*\n]+)\*/g, (m, p1) => p1);
      processed = processed.replace(/_([^_\n]+)_/g, (m, p1) => p1);
    }

    // Solo narrar comillas dobles
    if (settings[`${prefix}tts-only-quotes`]) {
      const matches = [];
      let match;
      const regex = /"([^"]+)"/g;
      while ((match = regex.exec(processed)) !== null) matches.push(match[1]);
      processed = matches.length > 0 ? matches.join(" ") : "";
    }

    processed = processed.replace(/\n{2,}/g, "\n").trim();
    return { processed, needsDelay };
  }

  // ==========================================================
  // SECCIÓN 3. OBSERVADOR DEL CHAT (activa detección automática)
  // ----------------------------------------------------------
  // - Observa cambios de DOM y llama a detectores adecuados
  // - Auto-narración de usuario si está activa
  // ==========================================================

  function initializeObserver() {
    const container = document.querySelector(CHAT_CONTAINER_SELECTOR);

    if (container) {
      const observer = new MutationObserver(() => {
        const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
        const provider = settings.provider || 'builtin';
        const prefix = provider === 'elevenlabs' ? 'elevenlabs_' : provider === 'gemini' ? 'gemini_' : '';

        const ttsEnabled = !!settings[`${prefix}tts-enabled`];
        const autoGen = !!settings[`${prefix}tts-auto-gen`];
        const narrateUser = !!settings[`${prefix}tts-narrate-user`];

        // Only proceed if TTS is enabled and auto-gen is on
        if (ttsEnabled && autoGen) {
          // If narrate user is enabled, use logLastFinishedMessage (handles both bot and user)
          // Otherwise, use logMessageStatus (handles only bot messages)
          if (narrateUser) {
            logLastFinishedMessage();
          } else {
            logMessageStatus();
          }
        }
      });

      observer.observe(container, {
        childList: true, subtree: true, attributes: true, attributeFilter: ['style'],
      });

      // Initial check
      const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
      const provider = settings.provider || 'builtin';
      const prefix = provider === 'elevenlabs' ? 'elevenlabs_' : provider === 'gemini' ? 'gemini_' : '';
      if (settings[`${prefix}tts-enabled`] && settings[`${prefix}tts-auto-gen`]) {
        if (settings[`${prefix}tts-narrate-user`]) {
          logLastFinishedMessage();
        } else {
          logMessageStatus();
        }
      }
    } else {
      setTimeout(initializeObserver, 1000);
    }
  }

  if (document.readyState === "loading") {
    document.addEventListener("DOMContentLoaded", initializeObserver);
  } else {
    initializeObserver();
  }

  // ==========================================================
  // SECCIÓN 4. VOCES INTEGRADAS (Web Speech) Y MENÚ DE AJUSTES
  // ----------------------------------------------------------
  // - Carga de voces integradas y popup de prueba
  // - CSS y construcción del modal de ajustes TTS
  // - Guardado de settings (Built-in y ElevenLabs)
  // ==========================================================

  // Voces integradas
  let builtinVoices = [];
  function loadBuiltinVoices(callback) {
    function updateVoices() {
      builtinVoices = window.speechSynthesis?.getVoices() || [];
      if (typeof callback === "function") callback(builtinVoices);
    }
    if (!window.speechSynthesis) {
      builtinVoices = [];
      if (typeof callback === "function") callback([]);
      return;
    }
    window.speechSynthesis.onvoiceschanged = updateVoices;
    updateVoices();
  }

  function showVoicesPopup() {
    loadBuiltinVoices(function(voices) {
      if (!voices || voices.length === 0) {
        alert("No built-in voices available or still loading. Try again in a moment.");
        return;
      }
      let msg = "Available Built-in Voices:\n\n";
      voices.forEach((v, i) => { msg += `${i + 1}. ${v.name} (${v.lang})${v.default ? " [default]" : ""}\n`; });
      alert(msg);
    });
  }

  // Atajo de teclado temporal: Ctrl+Alt+V para ver voces
  window.addEventListener("keydown", function(e) {
    if (e.ctrlKey && e.altKey && e.key.toLowerCase() === "v") showVoicesPopup();
  });

  // CSS del menú TTS - Glassmorphism Style (matching Live2D)
  const TTS_MENU_CSS = `
    /* === GLASSMORPHISM BASE VARIABLES === */
    .tts-modal-overlay {
      --glass-bg: rgba(18, 18, 22, 0.78);
      --glass-bg-light: rgba(30, 30, 36, 0.7);
      --glass-border: rgba(255, 255, 255, 0.08);
      --glass-border-hover: rgba(176, 196, 222, 0.4);
      --accent-primary: rgba(176, 196, 222, 0.9);
      --accent-gradient: linear-gradient(135deg, rgba(176, 196, 222, 0.9), rgba(147, 197, 253, 0.8));
      --accent-glow: 0 0 15px rgba(176, 196, 222, 0.4);
      --accent-glow-strong: 0 0 20px rgba(176, 196, 222, 0.6), 0 0 40px rgba(176, 196, 222, 0.2);
      --text-primary: rgba(255, 255, 255, 0.95);
      --text-secondary: rgba(200, 200, 220, 0.8);
      --text-muted: rgba(160, 160, 180, 0.7);
      --blur-amount: 12px;
      --radius-sm: 8px;
      --radius-md: 15px;
      --radius-lg: 20px;
    }

    /* === MODAL OVERLAY === */
    .tts-modal-overlay {
      position: fixed; z-index: 9999; inset: 0;
      background: rgba(0, 0, 0, 0.6);
      backdrop-filter: blur(4px);
      -webkit-backdrop-filter: blur(4px);
      display: flex; align-items: center; justify-content: center;
      animation: ttsFadeIn 0.2s ease-out;
    }
    @keyframes ttsFadeIn { from { opacity: 0; } to { opacity: 1; } }
    @keyframes ttsSlideUp { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } }

    /* === MODAL CONTAINER === */
    .tts-modal-container {
      background: var(--glass-bg);
      backdrop-filter: blur(var(--blur-amount));
      -webkit-backdrop-filter: blur(var(--blur-amount));
      border-radius: var(--radius-lg);
      border: 1px solid var(--glass-border);
      box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4), inset 0 1px 0 rgba(255, 255, 255, 0.05);
      min-width: 480px; max-width: 95vw; min-height: 320px; max-height: 90vh; padding: 0;
      display: flex; flex-direction: column; font-family: 'Segoe UI', system-ui, sans-serif;
      animation: ttsSlideUp 0.3s ease-out;
    }

    /* === HEADER === */
    .tts-modal-header {
      display: flex; align-items: center; justify-content: space-between;
      padding: 20px 28px 16px 28px;
      border-bottom: 1px solid var(--glass-border);
    }
    .tts-modal-title {
      font-size: 1.35rem; font-weight: 600;
      background: linear-gradient(135deg, #fff 0%, rgba(176, 196, 222, 1) 100%);
      -webkit-background-clip: text; -webkit-text-fill-color: transparent;
      background-clip: text; margin: 0;
      text-shadow: 0 0 30px rgba(176, 196, 222, 0.3);
    }
    .tts-modal-close {
      background: rgba(255, 255, 255, 0.05);
      border: 1px solid var(--glass-border);
      color: var(--text-secondary);
      font-size: 1.2rem; cursor: pointer;
      padding: 8px; border-radius: var(--radius-sm);
      transition: all 0.2s ease;
      display: flex; align-items: center; justify-content: center;
    }
    .tts-modal-close:hover {
      background: rgba(255, 255, 255, 0.1);
      border-color: var(--glass-border-hover);
      color: var(--text-primary);
      box-shadow: var(--accent-glow);
    }

    /* === BODY === */
    .tts-modal-body {
      padding: 24px 28px; display: flex; flex-direction: column; gap: 18px;
      overflow-y: auto;
      scrollbar-width: thin;
      scrollbar-color: rgba(176, 196, 222, 0.3) transparent;
    }
    .tts-modal-body::-webkit-scrollbar { width: 6px; }
    .tts-modal-body::-webkit-scrollbar-track { background: transparent; }
    .tts-modal-body::-webkit-scrollbar-thumb { background: rgba(176, 196, 222, 0.3); border-radius: 3px; }
    .tts-modal-body::-webkit-scrollbar-thumb:hover { background: rgba(176, 196, 222, 0.5); }

    /* === CHECKBOXES === */
    .tts-checkbox-list { display: flex; flex-direction: column; gap: 12px; margin-bottom: 8px; }
    .tts-checkbox-row {
      display: flex; align-items: center; gap: 12px;
      padding: 10px 14px;
      background: rgba(255, 255, 255, 0.02);
      border-radius: var(--radius-sm);
      border: 1px solid transparent;
      transition: all 0.2s ease;
    }
    .tts-checkbox-row:hover {
      background: rgba(255, 255, 255, 0.05);
      border-color: var(--glass-border);
    }
    .tts-checkbox-row label { color: var(--text-secondary); font-size: 0.95rem; cursor: pointer; }
    .tts-checkbox-row input[type="checkbox"],
    .tts-checkbox {
      appearance: none; -webkit-appearance: none;
      width: 20px; height: 20px;
      background: rgba(255, 255, 255, 0.05);
      border: 2px solid rgba(176, 196, 222, 0.3);
      border-radius: 6px;
      cursor: pointer;
      transition: all 0.2s ease;
      position: relative;
      flex-shrink: 0;
    }
    .tts-checkbox-row input[type="checkbox"]:checked,
    .tts-checkbox:checked {
      background: var(--accent-gradient);
      border-color: transparent;
      box-shadow: var(--accent-glow);
    }
    .tts-checkbox-row input[type="checkbox"]:checked::after,
    .tts-checkbox:checked::after {
      content: '✓';
      position: absolute;
      top: 50%; left: 50%;
      transform: translate(-50%, -50%);
      color: #1a1a2e;
      font-size: 12px;
      font-weight: bold;
    }

    /* === SLIDERS === */
    .tts-slider-row {
      display: flex; align-items: center; gap: 12px; margin-bottom: 8px;
      padding: 10px 14px;
      background: rgba(255, 255, 255, 0.02);
      border-radius: var(--radius-sm);
      border: 1px solid transparent;
      transition: all 0.2s ease;
    }
    .tts-slider-row:hover {
      background: rgba(255, 255, 255, 0.05);
      border-color: var(--glass-border);
    }
    .tts-slider-label {
      color: var(--text-secondary); font-size: 0.95rem;
      margin-right: 8px; min-width: 120px;
    }
    .tts-slider {
      flex: 1; height: 6px;
      background: linear-gradient(90deg, rgba(176, 196, 222, 0.2), rgba(176, 196, 222, 0.1));
      border-radius: 3px; outline: none; -webkit-appearance: none;
      cursor: pointer;
    }
    .tts-slider::-webkit-slider-thumb {
      -webkit-appearance: none; appearance: none;
      width: 18px; height: 18px;
      background: var(--accent-gradient);
      cursor: pointer; border-radius: 50%;
      box-shadow: var(--accent-glow);
      transition: all 0.2s ease;
    }
    .tts-slider::-webkit-slider-thumb:hover {
      transform: scale(1.1);
      box-shadow: var(--accent-glow-strong);
    }
    .tts-slider::-moz-range-thumb {
      width: 18px; height: 18px;
      background: var(--accent-gradient);
      cursor: pointer; border-radius: 50%; border: none;
      box-shadow: var(--accent-glow);
    }
    .tts-slider-value {
      width: 60px; padding: 8px 10px;
      border-radius: var(--radius-sm);
      border: 1px solid var(--glass-border);
      background: rgba(255, 255, 255, 0.05);
      color: var(--accent-primary);
      font-size: 0.9rem; text-align: center;
      font-family: 'JetBrains Mono', monospace;
      transition: all 0.2s ease;
    }
    .tts-slider-value:focus {
      border-color: var(--glass-border-hover);
      box-shadow: var(--accent-glow);
      outline: none;
    }

    /* === DROPDOWNS === */
    .tts-dropdown-row {
      display: flex; flex-direction: column; gap: 10px; margin-bottom: 8px;
    }
    .tts-dropdown-label {
      color: var(--text-secondary); font-size: 0.9rem;
      font-weight: 500; margin-bottom: 2px;
      text-transform: uppercase;
      letter-spacing: 0.5px;
    }
    .tts-dropdown {
      padding: 10px 14px; border-radius: var(--radius-sm);
      border: 1px solid var(--glass-border);
      background: rgba(255, 255, 255, 0.05);
      color: var(--text-primary); font-size: 0.95rem;
      min-width: 120px; margin-bottom: 2px;
      cursor: pointer;
      transition: all 0.2s ease;
      backdrop-filter: blur(4px);
    }
    .tts-dropdown:hover, .tts-dropdown:focus {
      border-color: var(--glass-border-hover);
      background: rgba(255, 255, 255, 0.08);
      box-shadow: var(--accent-glow);
      outline: none;
    }
    .tts-dropdown option { background: #1e1f28; color: var(--text-primary); }
    .tts-dropdown optgroup { background: #1e1f28; color: var(--text-muted); }

    /* === FOOTER === */
    .tts-modal-footer {
      display: flex; justify-content: flex-end; gap: 12px;
      padding: 18px 28px;
      border-top: 1px solid var(--glass-border);
      background: transparent;
      border-radius: 0 0 var(--radius-lg) var(--radius-lg);
    }

    /* === BUTTONS === */
    .tts-modal-btn {
      padding: 10px 28px; border-radius: var(--radius-sm); border: none;
      font-size: 0.95rem; font-weight: 600; cursor: pointer;
      transition: all 0.2s ease;
      text-transform: uppercase;
      letter-spacing: 0.5px;
    }
    .tts-modal-btn.cancel {
      background: transparent;
      border: 1px solid var(--glass-border);
      color: var(--text-secondary);
    }
    .tts-modal-btn.save {
      background: var(--accent-gradient);
      color: #1a1a2e;
      box-shadow: var(--accent-glow);
    }
    .tts-modal-btn.cancel:hover {
      background: rgba(255, 255, 255, 0.1);
      border-color: var(--glass-border-hover);
      color: var(--text-primary);
    }
    .tts-modal-btn.save:hover {
      box-shadow: var(--accent-glow-strong);
      transform: translateY(-1px);
    }

    /* === API KEY CONTAINER === */
    .tts-api-key-container {
      display: flex; align-items: stretch; gap: 10px;
    }
    .tts-api-key-container textarea {
      flex-grow: 1; padding: 10px 14px;
      border-radius: var(--radius-sm);
      border: 1px solid var(--glass-border);
      background: rgba(255, 255, 255, 0.05);
      color: var(--text-primary);
      font-size: 0.95rem; resize: none;
      font-family: 'JetBrains Mono', monospace;
      height: 42px; line-height: 1.5;
      transition: all 0.2s ease;
    }
    .tts-api-key-container textarea:focus {
      border-color: var(--glass-border-hover);
      box-shadow: var(--accent-glow);
      outline: none;
    }
    .tts-api-key-container textarea::placeholder {
      color: var(--text-muted);
    }
    .tts-api-key-validate-btn {
      padding: 0 20px;
      border-radius: var(--radius-sm);
      border: 1px solid var(--glass-border);
      background: rgba(255, 255, 255, 0.05);
      color: var(--text-secondary);
      font-size: 0.9rem; font-weight: 500;
      cursor: pointer;
      transition: all 0.2s ease;
      text-transform: uppercase;
      letter-spacing: 0.3px;
    }
    .tts-api-key-validate-btn:hover {
      background: rgba(255, 255, 255, 0.1);
      border-color: var(--glass-border-hover);
      color: var(--text-primary);
      box-shadow: var(--accent-glow);
    }
    .tts-api-key-validate-btn:disabled {
      opacity: 0.5;
      cursor: not-allowed;
    }
    .tts-api-key-status {
      font-size: 0.85rem; margin-top: 6px; height: 18px;
      font-weight: 500;
    }
    .tts-api-key-status.success { color: #4ade80; text-shadow: 0 0 10px rgba(74, 222, 128, 0.3); }
    .tts-api-key-status.error { color: #f87171; text-shadow: 0 0 10px rgba(248, 113, 113, 0.3); }

    /* === PROVIDER SECTION DIVIDER === */
    .tts-dropdown-row[style*="border-bottom"] {
      padding-bottom: 18px !important;
      margin-bottom: 0 !important;
      border-bottom: 1px solid var(--glass-border) !important;
    }

    /* === SETTINGS PANELS === */
    #tts-settings-builtin,
    #tts-settings-elevenlabs,
    #tts-settings-gemini {
      display: flex;
      flex-direction: column;
      gap: 12px;
    }

    /* Remove extra margin from checkbox list when followed by other elements */
    .tts-checkbox-list {
      margin-bottom: 0 !important;
    }

    /* === TEXTAREA STYLES (for Gemini style prompt) === */
    .tts-dropdown-row textarea {
      padding: 10px 14px;
      border-radius: var(--radius-sm);
      border: 1px solid var(--glass-border);
      background: rgba(255, 255, 255, 0.05);
      color: var(--text-primary);
      font-size: 0.95rem;
      resize: none;
      font-family: 'Segoe UI', system-ui, sans-serif;
      transition: all 0.2s ease;
    }
    .tts-dropdown-row textarea:focus {
      border-color: var(--glass-border-hover);
      box-shadow: var(--accent-glow);
      outline: none;
    }
    .tts-dropdown-row textarea::placeholder {
      color: var(--text-muted);
    }

    /* Gemini style row - reduce internal gaps */
    #tts-settings-gemini .tts-dropdown-row {
      gap: 8px;
      margin-bottom: 0;
    }

    /* Slider rows in settings panels - no extra bottom margin */
    #tts-settings-gemini .tts-slider-row,
    #tts-settings-elevenlabs .tts-slider-row,
    #tts-settings-builtin .tts-slider-row {
      margin-bottom: 0;
    }
  `;
  if (!document.getElementById("tts-menu-style")) {
    const style = document.createElement("style");
    style.id = "tts-menu-style";
    style.textContent = TTS_MENU_CSS;
    document.head.appendChild(style);
  }

  const CHECKBOX_OPTIONS = [
      { id: "tts-enabled", label: "Enabled", default: false },
      { id: "tts-narrate-user", label: "Narrate user messages", default: false },
      { id: "tts-auto-gen", label: "Auto Generation", default: false },
      { id: "tts-only-quotes", label: 'Only narrate "quotes"', default: false },
      { id: "tts-ignore-asterisks", label: 'Ignore *text, even "quotes", inside asterisks*', default: false },
      { id: "tts-skip-codeblocks", label: "Skip codeblocks", default: false },
      { id: "tts-skip-bulletpoints", label: "Skip bulletpoints", default: false }
  ];

  let elevenLabsVoices = [];
  let elevenLabsModels = [];

  // Gemini TTS pre-made models and voices (single-speaker only)
  const GEMINI_TTS_MODELS = [
    { id: 'gemini-2.5-flash-preview-tts', label: 'Gemini 2.5 Flash (TTS)' },
    { id: 'gemini-2.5-pro-preview-tts', label: 'Gemini 2.5 Pro (TTS)' }
  ];

  const GEMINI_TTS_VOICES = [
    { id: 'Zephyr', label: 'Zephyr -- Bright' },
    { id: 'Puck', label: 'Puck -- Upbeat' },
    { id: 'Charon', label: 'Charon -- Informative' },
    { id: 'Kore', label: 'Kore -- Firm' },
    { id: 'Fenrir', label: 'Fenrir -- Excitable' },
    { id: 'Leda', label: 'Leda -- Youthful' },
    { id: 'Orus', label: 'Orus -- Firm' },
    { id: 'Aoede', label: 'Aoede -- Breezy' },
    { id: 'Callirrhoe', label: 'Callirrhoe -- Easy-going' },
    { id: 'Autonoe', label: 'Autonoe -- Bright' },
    { id: 'Enceladus', label: 'Enceladus -- Breathy' },
    { id: 'Iapetus', label: 'Iapetus -- Clear' },
    { id: 'Umbriel', label: 'Umbriel -- Easy-going' },
    { id: 'Algieba', label: 'Algieba -- Smooth' },
    { id: 'Despina', label: 'Despina -- Smooth' },
    { id: 'Erinome', label: 'Erinome -- Clear' },
    { id: 'Algenib', label: 'Algenib -- Gravelly' },
    { id: 'Rasalgethi', label: 'Rasalgethi -- Informative' },
    { id: 'Laomedeia', label: 'Laomedeia -- Upbeat' },
    { id: 'Achernar', label: 'Achernar -- Soft' },
    { id: 'Alnilam', label: 'Alnilam -- Firm' },
    { id: 'Schedar', label: 'Schedar -- Even' },
    { id: 'Gacrux', label: 'Gacrux -- Mature' },
    { id: 'Pulcherrima', label: 'Pulcherrima -- Forward' },
    { id: 'Achird', label: 'Achird -- Friendly' },
    { id: 'Zubenelgenubi', label: 'Zubenelgenubi -- Casual' },
    { id: 'Vindemiatrix', label: 'Vindemiatrix -- Gentle' },
    { id: 'Sadachbia', label: 'Sadachbia -- Lively' },
    { id: 'Sadaltager', label: 'Sadaltager -- Knowledgeable' },
    { id: 'Sulafat', label: 'Sulafat -- Warm' }
  ];

  function createTTSMenu() {
    const savedSettings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
    const getSetting = (key, def) => (key in savedSettings ? savedSettings[key] : def);

    const overlay = document.createElement("div");
    overlay.className = "tts-modal-overlay";
    overlay.style.display = "none";

    const container = document.createElement("div");
    container.className = "tts-modal-container";

    const header = document.createElement("div");
    header.className = "tts-modal-header";
    const title = document.createElement("h2");
    title.className = "tts-modal-title";
    title.textContent = "Text to Speech Settings";
    const closeBtn = document.createElement("button");
    closeBtn.className = "tts-modal-close";
    closeBtn.innerHTML = `<svg width="20" height="20" viewBox="0 0 20 20" fill="none"><path d="M15 5L5 15M5 5l10 10" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"></path></svg>`;
    closeBtn.onclick = () => {
        stopPreviewAudio();
        overlay.style.display = "none";
    };
    header.appendChild(title);
    header.appendChild(closeBtn);

    const mainBody = document.createElement("div");
    mainBody.className = "tts-modal-body";

    // Selector de proveedor
    const providerDropdownRow = document.createElement("div");
    providerDropdownRow.className = "tts-dropdown-row";
    providerDropdownRow.style.paddingBottom = "18px";
    providerDropdownRow.style.marginBottom = "0";
    providerDropdownRow.style.borderBottom = "1px solid #444";

  const providerLabel = document.createElement("label");
    providerLabel.className = "tts-dropdown-label";
    providerLabel.textContent = "TTS Provider";
  const providerSelect = document.createElement("select");
  providerSelect.id = "tts-provider-select";
  providerSelect.className = "tts-dropdown";
  providerSelect.innerHTML = `<option value="builtin">Built-in</option><option value="elevenlabs">ElevenLabs</option><option value="gemini">Gemini TTS (API)</option>`;
    providerSelect.value = getSetting("provider", "builtin");
    providerDropdownRow.appendChild(providerLabel);
    providerDropdownRow.appendChild(providerSelect);
    mainBody.appendChild(providerDropdownRow);

    // Nombres actuales de bot/usuario
    let botName = "char";
    try {
        const botNameElem = document.querySelector('[class^="_nameText_"]');
        if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
    } catch (e) {}
    let userPersona = "User";
    try {
        const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
        for (let i = allMessageNodes.length - 1; i >= 0; i--) {
            const node = allMessageNodes[i];
            if (!node.querySelector('[class^="_nameIcon_"]')) {
                const nameElem = node.querySelector('[class^="_nameText_"]');
                if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
            }
        }
    } catch (e) {}

    // Panel Built-in
    const settingsBuiltIn = document.createElement("div");
    settingsBuiltIn.id = "tts-settings-builtin";
    settingsBuiltIn.style.display = "flex";
    settingsBuiltIn.style.flexDirection = "column";
    settingsBuiltIn.style.gap = "18px";

    const checkboxListBuiltIn = document.createElement("div");
    checkboxListBuiltIn.className = "tts-checkbox-list";
    CHECKBOX_OPTIONS.forEach(opt => {
        const row = document.createElement("div");
        row.className = "tts-checkbox-row";
        const cb = document.createElement("input");
        cb.type = "checkbox";
        cb.id = `builtin-${opt.id}`;
        cb.dataset.key = opt.id;
        cb.className = "tts-checkbox";
        cb.checked = !!getSetting(opt.id, opt.default);
        const label = document.createElement("label");
        label.htmlFor = cb.id;
        label.textContent = opt.label;
        row.appendChild(cb);
        row.appendChild(label);
        checkboxListBuiltIn.appendChild(row);
    });
    settingsBuiltIn.appendChild(checkboxListBuiltIn);

    const sliderRowBuiltIn = document.createElement("div");
    sliderRowBuiltIn.className = "tts-slider-row";
    const sliderLabelBuiltIn = document.createElement("span");
    sliderLabelBuiltIn.className = "tts-slider-label";
    sliderLabelBuiltIn.textContent = "Playback speed";
    const sliderBuiltIn = document.createElement("input");
    sliderBuiltIn.type = "range";
    sliderBuiltIn.dataset.key = "playbackSpeed";
    sliderBuiltIn.className = "tts-slider";
    sliderBuiltIn.min = "0.10";
    sliderBuiltIn.max = "2.00";
    sliderBuiltIn.step = "0.05";
    sliderBuiltIn.value = getSetting("playbackSpeed", "1.00");
    const sliderValueBuiltIn = document.createElement("input");
    sliderValueBuiltIn.type = "text";
    sliderValueBuiltIn.className = "tts-slider-value";
    sliderValueBuiltIn.value = sliderBuiltIn.value;
    sliderBuiltIn.oninput = () => { sliderValueBuiltIn.value = parseFloat(sliderBuiltIn.value).toFixed(2); };
    sliderValueBuiltIn.oninput = () => {
      let v = parseFloat(sliderValueBuiltIn.value);
      if (!isNaN(v) && v >= 0.1 && v <= 2) sliderBuiltIn.value = v.toFixed(2);
    };
    sliderRowBuiltIn.appendChild(sliderLabelBuiltIn);
    sliderRowBuiltIn.appendChild(sliderBuiltIn);
    sliderRowBuiltIn.appendChild(sliderValueBuiltIn);
    settingsBuiltIn.appendChild(sliderRowBuiltIn);

    const dropdownRowBuiltIn = document.createElement("div");
    dropdownRowBuiltIn.className = "tts-dropdown-row";
    dropdownRowBuiltIn.innerHTML = `
        <label class="tts-dropdown-label">Default voice</label>
        <select class="tts-dropdown" data-key="defaultVoice"></select>
        <label class="tts-dropdown-label">Voice for "${botName}"</label>
        <select class="tts-dropdown" data-key="charVoice_${botName}"></select>
        <label class="tts-dropdown-label">Voice for "${userPersona}" (You)</label>
        <select class="tts-dropdown" data-key="userVoice_${userPersona}"></select>
    `;
    loadBuiltinVoices(() => {
        const dropdowns = dropdownRowBuiltIn.querySelectorAll('.tts-dropdown');
        dropdowns.forEach(dd => {
            dd.innerHTML = `<option value="Default">Default</option>`;
            builtinVoices.forEach(v => {
                const opt = document.createElement("option");
                opt.value = v.name;
                opt.textContent = `${v.name} (${v.lang})${v.default ? " [default]" : ""}`;
                dd.appendChild(opt);
            });
            const key = dd.dataset.key;
            const fallbackKey = key.startsWith('charVoice') || key.startsWith('userVoice') ? 'defaultVoice' : 'Default';
            dd.value = getSetting(key, getSetting(fallbackKey, 'Default'));
        });
    });
    settingsBuiltIn.appendChild(dropdownRowBuiltIn);

  // Panel ElevenLabs
    const settingsElevenLabs = document.createElement("div");
    settingsElevenLabs.id = "tts-settings-elevenlabs";
    settingsElevenLabs.style.display = "none";
    settingsElevenLabs.style.flexDirection = "column";
    settingsElevenLabs.style.gap = "18px";

    // API Key
    const apiKeyRow = document.createElement("div");
    apiKeyRow.className = "tts-dropdown-row";
    apiKeyRow.style.paddingBottom = "18px";
    apiKeyRow.style.marginBottom = "0";
    apiKeyRow.style.borderBottom = "1px solid #444";
    const apiKeyLabel = document.createElement("label");
    apiKeyLabel.className = "tts-dropdown-label";
    apiKeyLabel.textContent = "ElevenLabs API Key";
    const apiKeyContainer = document.createElement("div");
    apiKeyContainer.className = 'tts-api-key-container';
    const apiKeyInput = document.createElement("textarea");
    apiKeyInput.dataset.key = "elevenlabs_apiKey";
    apiKeyInput.value = getSetting("elevenlabs_apiKey", "");
    apiKeyInput.placeholder = "Enter your API Key";
    const validateBtn = document.createElement("button");
    validateBtn.type = "button";
    validateBtn.className = "tts-api-key-validate-btn";
    validateBtn.textContent = "Validate";
    const apiKeyStatus = document.createElement("div");
    apiKeyStatus.className = "tts-api-key-status";

    // Ocultar/mostrar clave
    let isKeyHidden = true;
    const originalKey = apiKeyInput.value;
    function maskKey(key) { return key.length > 0 ? '•'.repeat(key.length) : ''; }
    if (originalKey) { apiKeyInput.value = maskKey(originalKey); apiKeyInput.dataset.original = originalKey; }
    apiKeyInput.addEventListener('focus', () => {
        if (isKeyHidden && apiKeyInput.dataset.original) { apiKeyInput.value = apiKeyInput.dataset.original; isKeyHidden = false; }
    });
    apiKeyInput.addEventListener('blur', () => {
        apiKeyInput.dataset.original = apiKeyInput.value;
        apiKeyInput.value = maskKey(apiKeyInput.value);
        isKeyHidden = true;
    });
    apiKeyInput.addEventListener('input', () => { apiKeyInput.dataset.original = apiKeyInput.value; });

    apiKeyContainer.appendChild(apiKeyInput);
    apiKeyContainer.appendChild(validateBtn);
    apiKeyRow.appendChild(apiKeyLabel);
    apiKeyRow.appendChild(apiKeyContainer);
    apiKeyRow.appendChild(apiKeyStatus);
    settingsElevenLabs.appendChild(apiKeyRow);

    const checkboxListElevenLabs = document.createElement("div");
    checkboxListElevenLabs.className = "tts-checkbox-list";
    CHECKBOX_OPTIONS.forEach(opt => {
        const row = document.createElement("div");
        row.className = "tts-checkbox-row";
        const cb = document.createElement("input");
        cb.type = "checkbox";
        cb.id = `elevenlabs-${opt.id}`;
        cb.dataset.key = `elevenlabs_${opt.id}`;
        cb.className = "tts-checkbox";
        cb.checked = !!getSetting(cb.dataset.key, opt.default);
        const label = document.createElement("label");
        label.htmlFor = cb.id;
        label.textContent = opt.label;
        row.appendChild(cb);
        row.appendChild(label);
        checkboxListElevenLabs.appendChild(row);
    });
    settingsElevenLabs.appendChild(checkboxListElevenLabs);

    function createSlider(labelText, key, min, max, step, defaultValue, formatFn, parseFn) {
        const row = document.createElement("div");
        row.className = "tts-slider-row";
        row.innerHTML = `<span class="tts-slider-label">${labelText}</span>`;
        const slider = document.createElement("input");
        slider.type = "range";
        slider.dataset.key = key;
        slider.className = "tts-slider";
        slider.min = min; slider.max = max; slider.step = step;
        slider.value = getSetting(key, defaultValue);
        const valueInput = document.createElement("input");
        valueInput.type = "text";
        valueInput.className = "tts-slider-value";
        valueInput.value = formatFn(slider.value);
        slider.oninput = () => { valueInput.value = formatFn(slider.value); };
        valueInput.onchange = () => {
            const parsed = parseFn(valueInput.value);
            if (parsed.isValid) { slider.value = parsed.value; valueInput.value = formatFn(slider.value); }
            else { valueInput.value = formatFn(slider.value); }
        };
        row.appendChild(slider);
        row.appendChild(valueInput);
        return row;
    }

    settingsElevenLabs.appendChild(createSlider("Playback speed", "elevenlabs_playbackSpeed", "0.1", "2.0", "0.05", "1.00",
        v => parseFloat(v).toFixed(2),
        v => { const n = parseFloat(v); return { isValid: !isNaN(n) && n >= 0.1 && n <= 2, value: n.toFixed(2) }; }
    ));
    settingsElevenLabs.appendChild(createSlider("Stability", "elevenlabs_stability", "0", "1", "0.01", "0.50",
        v => `${Math.round(v * 100)}%`,
        v => { const n = parseInt(v.replace('%','')); return { isValid: !isNaN(n) && n >= 0 && n <= 100, value: (n/100).toFixed(2) }; }
    ));
    settingsElevenLabs.appendChild(createSlider("Similarity Boost", "elevenlabs_similarity", "0", "1", "0.01", "0.75",
        v => `${Math.round(v * 100)}%`,
        v => { const n = parseInt(v.replace('%','')); return { isValid: !isNaN(n) && n >= 0 && n <= 100, value: (n/100).toFixed(2) }; }
    ));
    settingsElevenLabs.appendChild(createSlider("Style", "elevenlabs_style", "0", "1", "0.01", "0.00",
        v => `${Math.round(v * 100)}%`,
        v => { const n = parseInt(v.replace('%','')); return { isValid: !isNaN(n) && n >= 0 && n <= 100, value: (n/100).toFixed(2) }; }
    ));

    const speakerBoostRow = document.createElement("div");
    speakerBoostRow.className = "tts-checkbox-row";
    const speakerBoostCb = document.createElement("input");
    speakerBoostCb.type = "checkbox";
    speakerBoostCb.id = "elevenlabs-speaker-boost";
    speakerBoostCb.dataset.key = "elevenlabs_speaker-boost";
    speakerBoostCb.className = "tts-checkbox";
    speakerBoostCb.checked = !!getSetting("elevenlabs_speaker-boost", false);
    const speakerBoostLabel = document.createElement("label");
    speakerBoostLabel.htmlFor = speakerBoostCb.id;
    speakerBoostLabel.textContent = "Use Speaker Boost";
    speakerBoostRow.appendChild(speakerBoostCb);
    speakerBoostRow.appendChild(speakerBoostLabel);
    checkboxListElevenLabs.appendChild(speakerBoostRow);

    const dropdownRowElevenLabs = document.createElement("div");
    dropdownRowElevenLabs.className = "tts-dropdown-row";
    dropdownRowElevenLabs.innerHTML = `
        <label class="tts-dropdown-label">Model</label>
        <select class="tts-dropdown" data-key="elevenlabs_modelId"></select>
        <label class="tts-dropdown-label">Default voice</label>
        <select class="tts-dropdown" data-key="elevenlabs_defaultVoice"></select>
        <label class="tts-dropdown-label">Voice for "${botName}"</label>
        <select class="tts-dropdown" data-key="elevenlabs_charVoice_${botName}"></select>
        <label class="tts-dropdown-label">Voice for "${userPersona}" (You)</label>
        <select class="tts-dropdown" data-key="elevenlabs_userVoice_${userPersona}"></select>
    `;
  settingsElevenLabs.appendChild(dropdownRowElevenLabs);

  // Panel Gemini TTS (single-speaker)
  const settingsGemini = document.createElement("div");
  settingsGemini.id = "tts-settings-gemini";
  settingsGemini.style.display = "none";
  settingsGemini.style.flexDirection = "column";
  settingsGemini.style.gap = "12px";

  // Gemini API Key (no validation call, just stored)
  const geminiApiRow = document.createElement("div");
  geminiApiRow.className = "tts-dropdown-row";
  geminiApiRow.style.paddingBottom = "18px";
  geminiApiRow.style.marginBottom = "0";
  geminiApiRow.style.borderBottom = "1px solid rgba(255, 255, 255, 0.08)";
  const geminiApiLabel = document.createElement("label");
  geminiApiLabel.className = "tts-dropdown-label";
  geminiApiLabel.textContent = "Gemini API Key";
  const geminiApiContainer = document.createElement("div");
  geminiApiContainer.className = 'tts-api-key-container';
  const geminiApiInput = document.createElement("textarea");
  geminiApiInput.dataset.key = "gemini_apiKey";
  geminiApiInput.value = getSetting("gemini_apiKey", "");
  geminiApiInput.placeholder = "Enter your Gemini API Key";

  // Ocultar/mostrar clave (mismo patrón que ElevenLabs)
  let geminiKeyHidden = true;
  const geminiOriginalKey = geminiApiInput.value;
  function maskGeminiKey(key) { return key.length > 0 ? '•'.repeat(key.length) : ''; }
  if (geminiOriginalKey) { geminiApiInput.value = maskGeminiKey(geminiOriginalKey); geminiApiInput.dataset.original = geminiOriginalKey; }
  geminiApiInput.addEventListener('focus', () => {
    if (geminiKeyHidden && geminiApiInput.dataset.original) { geminiApiInput.value = geminiApiInput.dataset.original; geminiKeyHidden = false; }
  });
  geminiApiInput.addEventListener('blur', () => {
    geminiApiInput.dataset.original = geminiApiInput.value;
    geminiApiInput.value = maskGeminiKey(geminiApiInput.value);
    geminiKeyHidden = true;
  });
  geminiApiInput.addEventListener('input', () => { geminiApiInput.dataset.original = geminiApiInput.value; });

  geminiApiContainer.appendChild(geminiApiInput);
  geminiApiRow.appendChild(geminiApiLabel);
  geminiApiRow.appendChild(geminiApiContainer);
  settingsGemini.appendChild(geminiApiRow);

  // Gemini checkboxes (same options, prefixed)
  const checkboxListGemini = document.createElement("div");
  checkboxListGemini.className = "tts-checkbox-list";
  CHECKBOX_OPTIONS.forEach(opt => {
    const row = document.createElement("div");
    row.className = "tts-checkbox-row";
    const cb = document.createElement("input");
    cb.type = "checkbox";
    cb.id = `gemini-${opt.id}`;
    cb.dataset.key = `gemini_${opt.id}`;
    cb.className = "tts-checkbox";
    cb.checked = !!getSetting(cb.dataset.key, opt.default);
    const label = document.createElement("label");
    label.htmlFor = cb.id;
    label.textContent = opt.label;
    row.appendChild(cb);
    row.appendChild(label);
    checkboxListGemini.appendChild(row);
  });
  settingsGemini.appendChild(checkboxListGemini);

  // Gemini Static Style toggle + textarea
  const geminiStyleRow = document.createElement("div");
  geminiStyleRow.className = "tts-dropdown-row";

  const geminiStyleCheckboxRow = document.createElement("div");
  geminiStyleCheckboxRow.className = "tts-checkbox-row";
  const geminiStyleCheckbox = document.createElement("input");
  geminiStyleCheckbox.type = "checkbox";
  geminiStyleCheckbox.id = "gemini-static-style";
  geminiStyleCheckbox.dataset.key = "gemini_static_style_enabled";
  geminiStyleCheckbox.className = "tts-checkbox";
  geminiStyleCheckbox.checked = !!getSetting("gemini_static_style_enabled", false);
  const geminiStyleCheckboxLabel = document.createElement("label");
  geminiStyleCheckboxLabel.htmlFor = geminiStyleCheckbox.id;
  geminiStyleCheckboxLabel.textContent = "Static Style";
  geminiStyleCheckboxRow.appendChild(geminiStyleCheckbox);
  geminiStyleCheckboxRow.appendChild(geminiStyleCheckboxLabel);

  const geminiStyleTextareaLabel = document.createElement("label");
  geminiStyleTextareaLabel.className = "tts-dropdown-label";
  geminiStyleTextareaLabel.textContent = "Static style prompt (used for all Gemini TTS when enabled)";

  const geminiStyleTextarea = document.createElement("textarea");
  geminiStyleTextarea.dataset.key = "gemini_static_style_text";
  geminiStyleTextarea.placeholder = "Write the style instructions to prepend before every Gemini TTS generation.";
  geminiStyleTextarea.style.minHeight = "80px";
  geminiStyleTextarea.style.resize = "none";
  geminiStyleTextarea.style.padding = "10px 14px";
  geminiStyleTextarea.style.borderRadius = "8px";
  geminiStyleTextarea.style.border = "1px solid rgba(255, 255, 255, 0.08)";
  geminiStyleTextarea.style.background = "rgba(255, 255, 255, 0.05)";
  geminiStyleTextarea.style.color = "rgba(255, 255, 255, 0.95)";
  geminiStyleTextarea.style.fontFamily = "'Segoe UI', system-ui, sans-serif";
  geminiStyleTextarea.style.fontSize = "0.95rem";
  geminiStyleTextarea.value = getSetting("gemini_static_style_text", "");

  // Show/hide style textarea depending on toggle
  function updateGeminiStyleVisibility() {
    geminiStyleTextareaLabel.style.display = geminiStyleCheckbox.checked ? "block" : "none";
    geminiStyleTextarea.style.display = geminiStyleCheckbox.checked ? "block" : "none";
  }
  geminiStyleCheckbox.addEventListener("change", updateGeminiStyleVisibility);
  updateGeminiStyleVisibility();

  geminiStyleRow.appendChild(geminiStyleCheckboxRow);
  geminiStyleRow.appendChild(geminiStyleTextareaLabel);
  geminiStyleRow.appendChild(geminiStyleTextarea);
  settingsGemini.appendChild(geminiStyleRow);

  // Gemini model + voice dropdowns
  const dropdownRowGemini = document.createElement("div");
  dropdownRowGemini.className = "tts-dropdown-row";
  dropdownRowGemini.innerHTML = `
    <label class="tts-dropdown-label">Model</label>
    <select class="tts-dropdown" data-key="gemini_modelId"></select>
    <label class="tts-dropdown-label">Default voice</label>
    <select class="tts-dropdown" data-key="gemini_defaultVoice"></select>
    <label class="tts-dropdown-label">Voice for "${botName}"</label>
    <select class="tts-dropdown" data-key="gemini_charVoice_${botName}"></select>
    <label class="tts-dropdown-label">Voice for "${userPersona}" (You)</label>
    <select class="tts-dropdown" data-key="gemini_userVoice_${userPersona}"></select>
  `;

  // Populate Gemini model + voices
  const geminiModelSelect = dropdownRowGemini.querySelector('[data-key="gemini_modelId"]');
  GEMINI_TTS_MODELS.forEach(m => {
    const opt = document.createElement('option');
    opt.value = m.id;
    opt.textContent = m.label;
    geminiModelSelect.appendChild(opt);
  });
  geminiModelSelect.value = getSetting('gemini_modelId', GEMINI_TTS_MODELS[0]?.id || 'gemini-2.5-flash-preview-tts');

  const geminiVoiceDropdowns = dropdownRowGemini.querySelectorAll('[data-key^="gemini_"]');
  geminiVoiceDropdowns.forEach(dd => {
    if (dd.dataset.key === 'gemini_modelId') return;
    dd.innerHTML = `<option value="Default">Default</option>`;
    GEMINI_TTS_VOICES.forEach(v => {
      const opt = document.createElement('option');
      opt.value = v.id;
      opt.textContent = v.label;
      dd.appendChild(opt);
    });
    const key = dd.dataset.key;
    const fallbackKey = key.includes('charVoice') || key.includes('userVoice') ? 'gemini_defaultVoice' : 'Default';
    dd.value = getSetting(key, getSetting(fallbackKey, 'Default'));
  });

  settingsGemini.appendChild(dropdownRowGemini);

  mainBody.appendChild(settingsBuiltIn);
  mainBody.appendChild(settingsElevenLabs);
  mainBody.appendChild(settingsGemini);

    validateBtn.addEventListener('click', async () => {
        const key = apiKeyInput.dataset.original || apiKeyInput.value;
        if (!key) {
            apiKeyStatus.textContent = "API Key is empty.";
            apiKeyStatus.className = "tts-api-key-status error";
            return;
        }
        apiKeyStatus.textContent = "Validating...";
        apiKeyStatus.className = "tts-api-key-status";
        validateBtn.disabled = true;

        const validation = await validateElevenLabsKey(key);
        apiKeyStatus.textContent = validation.message;
        apiKeyStatus.className = `tts-api-key-status ${validation.isValid ? 'success' : 'error'}`;

        if (validation.isValid) {
            await fetchAndPopulateElevenLabsData(key);
            const currentSettings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
            currentSettings.elevenlabs_apiKey = key;
            localStorage.setItem("ttsSettings", JSON.stringify(currentSettings));
        }
        validateBtn.disabled = false;
    });

    async function fetchAndPopulateElevenLabsData(apiKey) {
        try {
            const [voices, models] = await Promise.all([
                elevenLabsApiRequest({ method: "GET", endpoint: "/v1/voices", apiKey }),
                elevenLabsApiRequest({ method: "GET", endpoint: "/v1/models", apiKey })
            ]);

            elevenLabsVoices = voices.voices || [];
            elevenLabsModels = models.filter(m => m.can_do_text_to_speech) || [];

            // Modelo
            const modelSelect = dropdownRowElevenLabs.querySelector('[data-key="elevenlabs_modelId"]');
            modelSelect.innerHTML = '';
            elevenLabsModels.forEach(model => {
                const option = document.createElement('option');
                option.value = model.model_id;
                option.textContent = model.name;
                modelSelect.appendChild(option);
            });
            modelSelect.value = getSetting('elevenlabs_modelId', elevenLabsModels[0]?.model_id || '');

            // Voces
            const dropdownsEL = dropdownRowElevenLabs.querySelectorAll('[data-key^="elevenlabs_"], [data-key*="Voice"]');
            dropdownsEL.forEach(dd => {
                if(dd.dataset.key === 'elevenlabs_modelId') return;
                const currentVal = dd.value;
                dd.innerHTML = `<option value="Default">Default</option>`;
                const categorized = { 'Premade': [], 'Cloned': [] };
                elevenLabsVoices.forEach(v => {
                    if(v.category === 'premade') categorized.Premade.push(v);
                    else categorized.Cloned.push(v);
                });
                Object.keys(categorized).forEach(category => {
                    const voicesInCategory = categorized[category];
                    if(voicesInCategory.length > 0){
                        const optgroup = document.createElement('optgroup');
                        optgroup.label = `${category} (${voicesInCategory.length})`;
                        voicesInCategory.forEach(voice => {
                            const option = document.createElement('option');
                            option.value = voice.voice_id;
                            option.textContent = voice.name;
                            option.dataset.previewUrl = voice.preview_url || '';
                            optgroup.appendChild(option);
                        });
                        dd.appendChild(optgroup);
                    }
                });
                const key = dd.dataset.key;
                const fallbackKey = key.includes('charVoice') || key.includes('userVoice') ? 'elevenlabs_defaultVoice' : 'Default';
                dd.value = getSetting(key, getSetting(fallbackKey, 'Default'));

                // Add event listener to play preview when voice changes
                dd.addEventListener('change', (event) => {
                    const selectedOption = event.target.options[event.target.selectedIndex];
                    const previewUrl = selectedOption.dataset.previewUrl;
                    if (previewUrl) {
                        playVoicePreview(previewUrl);
                    }
                });
            });

        } catch (error) {
            console.error("TTS Userscript: Failed to fetch ElevenLabs data:", error);
            apiKeyStatus.textContent = `Failed to get voices/models: ${error.message}`;
            apiKeyStatus.className = "tts-api-key-status error";
        }
    }

    // Cambio de proveedor
    providerSelect.onchange = () => {
      if (providerSelect.value === 'builtin') {
        settingsBuiltIn.style.display = 'flex';
        settingsElevenLabs.style.display = 'none';
        settingsGemini.style.display = 'none';
      } else if (providerSelect.value === 'elevenlabs') {
        settingsBuiltIn.style.display = 'none';
        settingsElevenLabs.style.display = 'flex';
        settingsGemini.style.display = 'none';
        const key = getSetting("elevenlabs_apiKey", "");
        if(key) fetchAndPopulateElevenLabsData(key);
      } else if (providerSelect.value === 'gemini') {
        settingsBuiltIn.style.display = 'none';
        settingsElevenLabs.style.display = 'none';
        settingsGemini.style.display = 'flex';
      }
    };
    setTimeout(() => { providerSelect.onchange(); }, 0);

    // Footer guardar/cancelar
    const footer = document.createElement("div");
    footer.className = "tts-modal-footer";
    const cancelBtn = document.createElement("button");
    cancelBtn.className = "tts-modal-btn cancel";
    cancelBtn.textContent = "Cancel";
    cancelBtn.onclick = () => {
        stopPreviewAudio();
        overlay.style.display = "none";
    };

  const saveBtn = document.createElement("button");
    saveBtn.className = "tts-modal-btn save";
    saveBtn.textContent = "Save Settings";
    saveBtn.onclick = () => {
    const prevSettings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
    const newSettings = { ...prevSettings, provider: providerSelect.value };

    // Built-in & ElevenLabs settings
    document.querySelectorAll('#tts-settings-builtin [data-key], #tts-settings-elevenlabs [data-key]').forEach(el => {
          const key = el.dataset.key;
          if (key === 'elevenlabs_apiKey') {
              if (el.dataset.original) newSettings[key] = el.dataset.original;
          } else if (el.type === 'checkbox') {
              newSettings[key] = el.checked;
          } else if (el.type === 'range' || el.classList.contains('tts-slider-value')) {
              newSettings[key] = parseFloat(el.value);
          } else {
              newSettings[key] = el.value;
          }
      });

    // Gemini TTS (API) settings stored separately with gemini_ prefix
    document.querySelectorAll('#tts-settings-gemini [data-key]').forEach(el => {
      const key = el.dataset.key;
      if (!key) return;
      if (key === 'gemini_apiKey') {
        if (el.dataset.original) newSettings[key] = el.dataset.original;
        else newSettings[key] = el.value;
      } else if (el.type === 'checkbox') {
        newSettings[key] = el.checked;
      } else if (el.type === 'range' || el.classList.contains('tts-slider-value')) {
        newSettings[key] = parseFloat(el.value);
      } else {
        newSettings[key] = el.value;
      }
    });

      localStorage.setItem("ttsSettings", JSON.stringify(newSettings));
      stopPreviewAudio();
      overlay.style.display = "none";
      document.querySelectorAll('.temp-btn').forEach(btn => btn.remove());
      document.querySelectorAll(CONTROL_PANEL_SELECTOR).forEach(injectTempButton);
    };
    footer.appendChild(cancelBtn);
    footer.appendChild(saveBtn);

    container.appendChild(header);
    container.appendChild(mainBody);
    container.appendChild(footer);
    overlay.appendChild(container);

    document.body.appendChild(overlay);
    return overlay;
  }

  let ttsMenuOverlay = null;

  // ==========================================================
  // SECCIÓN 5. INYECCIÓN EN EL MENÚ DE LA APP
  // ----------------------------------------------------------
  // - Añade entrada "Text to Speech" en el menú de JanitorAI
  // - Abre modal de configuración al hacer click
  // ==========================================================

  const MENU_LIST_SELECTOR = '[class^="_menuList_"]';
  const MENU_ITEM_CLASS = '[class^="_menuItem_"]';
  const TTS_BUTTON_ID = 'tts-menu-item';

  const bodyObserver = new MutationObserver(() => { injectTTSMenuItem(); });
  bodyObserver.observe(document.body, { childList: true, subtree: true });

  function injectTTSMenuItem() {
    const menuList = document.querySelector(MENU_LIST_SELECTOR);
    if (!menuList) return;
    if (menuList.querySelector(`#${TTS_BUTTON_ID}`)) return;

    const btn = document.createElement('button');
    btn.type = 'button';
    const firstMenuItem = menuList.querySelector(MENU_ITEM_CLASS);
    btn.className = firstMenuItem ? firstMenuItem.className : '';
    btn.id = TTS_BUTTON_ID;
    btn.innerHTML = `
      <span class="_menuItemIcon_1fzcr_81">
        <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24"
             viewBox="0 0 24 24" fill="none" stroke="currentColor"
             stroke-width="2" stroke-linecap="round" stroke-linejoin="round"
             class="lucide lucide-audio-lines-icon">
          <path d="M2 10v3"/><path d="M6 6v11"/><path d="M10 3v18"/>
          <path d="M14 8v7"/><path d="M18 5v13"/><path d="M22 10v3"/>
        </svg>
      </span>
      <span class="_menuItemContent_1fzcr_96">Text to Speech</span>
    `;
    btn.addEventListener('click', function() {
      if (ttsMenuOverlay) ttsMenuOverlay.remove();
      ttsMenuOverlay = createTTSMenu();
      ttsMenuOverlay.style.display = "flex";
    });

    const menuItems = Array.from(menuList.querySelectorAll(MENU_ITEM_CLASS));
    let inserted = false;
    for (let i = 0; i < menuItems.length; i++) {
      const span = menuItems[i].querySelector('span[class*="_menuItemContent_"]');
      if (span && span.textContent.trim() === "Generation Settings") {
        if (menuItems[i].nextSibling) {
          menuList.insertBefore(btn, menuItems[i].nextSibling);
        } else {
          menuList.appendChild(btn);
        }
        inserted = true;
        break;
      }
    }
    if (!inserted) menuList.appendChild(btn);
  }

  // ==========================================================
  // SECCIÓN 6. MOTOR DE TTS (Built-in y ElevenLabs)
  // ----------------------------------------------------------
  // - Reproduce TTS según proveedor elegido
  // - Emite eventos para Live2D (stop, blobURL, audioBuffer)
  // - Segmenta audio según EmotionQueue+Alignment (si aplica)
  // ==========================================================

  let currentUtterance = null;
  let currentElevenLabsAudio = null;
  let currentPreviewAudio = null; // For voice preview playback
  let isPlaying = false; // Track global playing state

  function updateAllButtonStates(playing) {
      isPlaying = playing;
      const svg = playing ? STOP_SVG : PLAY_SVG;
      document.querySelectorAll('.temp-btn').forEach(button => {
          button.innerHTML = svg;
      });
  }

  function playTTS(text, isBot) {
      const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
      const provider = settings.provider || 'builtin';

    if (provider === 'elevenlabs') {
      playElevenLabsTTS(text, isBot, settings);
    } else if (provider === 'gemini') {
      playGeminiTTSTTS(text, isBot, settings);
    } else {
      playBuiltinTTS(text, isBot, settings);
    }
  }

  function stopTTS() {
      // Detener Built-in
      if (window.speechSynthesis && window.speechSynthesis.speaking) {
          window.speechSynthesis.cancel();
      }
      // Detener ElevenLabs
      if (currentElevenLabsAudio) {
          currentElevenLabsAudio.pause();
          currentElevenLabsAudio.src = '';
          currentElevenLabsAudio = null;
      }
      // Actualizar todos los botones
      updateAllButtonStates(false);
  }

  // Voice preview functions
  function stopPreviewAudio() {
      if (currentPreviewAudio) {
          currentPreviewAudio.pause();
          currentPreviewAudio.src = '';
          currentPreviewAudio = null;
      }
  }

  function playVoicePreview(previewUrl) {
      if (!previewUrl) return;
      stopPreviewAudio();
      currentPreviewAudio = new Audio(previewUrl);
      currentPreviewAudio.play().catch(e => console.error("Error playing voice preview:", e));
  }

  // Gemini TTS (single-speaker, PCM -> WAV in browser)
  async function playGeminiTTSTTS(text, isBot, settings) {
    const apiKey = settings.gemini_apiKey;
    if (!settings['gemini_tts-enabled'] || !apiKey) { stopTTS(); return; }

    // If Live2D is active, use emotion analysis (single segment) and send queue to Live2D

    if (live2dScriptDetected) {
      console.log("🎭 [Gemini] Live2D detected - starting whole-utterance emotion analysis and queue dispatch");

      try {
        // 1. Process text with the same filters used elsewhere
        console.log("📝 [Gemini] Step 1: Processing text with TTS filters...");
        const { processed: processedText } = processTTSOutput(text);

        if (!processedText || processedText.trim() === '') {
          console.warn("⚠️ [Gemini] Processed text is empty after filters, skipping Gemini TTS + Live2D");
          return;
        }
        console.log(`✓ [Gemini] Text processed: ${processedText.length} characters`);

        // 2. Resolve speaker names (needed for voice selection in parallel call)
        console.log("👤 [Gemini] Step 2: Resolving speaker names...");
        try {
          const botNameElem = document.querySelector('[class^="_nameText_"]');
          if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
        } catch (e) {
          console.warn("⚠️ [Gemini] Could not get bot name:", e);
        }
        let userPersona = "User";
        try {
          const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
          for (let i = allMessageNodes.length - 1; i >= 0; i--) {
            const node = allMessageNodes[i];
            if (!node.querySelector('[class^="_nameIcon_"]')) {
              const nameElem = node.querySelector('[class^="_nameText_"]');
              if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
            }
          }
        } catch (e) {
          console.warn("⚠️ [Gemini] Could not get user persona:", e);
        }
        console.log(`✓ [Gemini] Bot: "${botName}", User: "${userPersona}"`);

        // 3. Resolve voice
        console.log("🎤 [Gemini] Step 3: Resolving voice configuration...");
        let voiceName;
        if (isBot) voiceName = settings[`gemini_charVoice_${botName}`] || settings.gemini_defaultVoice;
        else voiceName = settings[`gemini_userVoice_${userPersona}`] || settings.gemini_defaultVoice;

        if (!voiceName || voiceName === 'Default') {
          console.error("❌ [Gemini] No Gemini voice selected for this speaker.");
          return;
        }
        console.log(`✓ [Gemini] Using voice: ${voiceName}`);

        const modelId = settings.gemini_modelId || 'gemini-2.5-flash-preview-tts';
        const playbackSpeed = parseFloat(settings.gemini_playbackSpeed) || 1.0;

        // 4. Build final text depending on static style setting
        console.log("🎨 [Gemini] Step 4: Building style prompt...");
        const staticStyleEnabled = !!settings.gemini_static_style_enabled;
        let finalText;
        if (staticStyleEnabled && settings.gemini_static_style_text) {
          finalText = `${settings.gemini_static_style_text}\n${processedText}`;
        } else {
          const defaultStyle = `### ROLE AND PERSONA
You are a professional voice actor specializing in realistic, naturalistic roleplay narration.

### VOICE PROFILE
1.  **Base Tone:** Low, quiet, and measured. A steady, grounded hum.
2.  **Performance Style:** Understated and conversational. Use a "less is more" approach.
3.  **Negative Constraints (Crucial):**
    -   **No Theatrics:** Do not sound exaggerated, melodramatic, or "acty."
    -   **No Sultry Tone:** Do not use a breathy or seductive voice.
    -   **No Cartoons:** Avoid high-pitch spikes or anime-style exclamations. Keep it human.

### INSTRUCTIONS
1.  **Narrate Everything:** Read every word provided, including the descriptive text (narration) and the dialogue.
2.  **Context-Driven Emotion:** You must analyze the narration to understand the character's mental state, but express it subtly.
    -   **Subtlety is Key:** If the text describes anger, do not shout. Instead, drop your pitch and speak with cold intensity. If the text describes excitement, do not get loud; just quicken your pace slightly.
    -   **Continuity:** Ensure the narration and the dialogue flow together as one cohesive story, not two separate voices.

### EXAMPLE

**Input:**
*The guard slams his hand against the table, causing the mugs to rattle.*
"I told you already," *he hisses through gritted teeth, leaning in close so no one else hears,* "I don't have the money."

**Required Performance Logic:**
1.  **Narration (*The guard slams...*):** Read this calmly but firmly to set the weight of the action.
2.  **Dialogue ("I told you already")::** The text says "hisses" and "leaning in close." Do NOT shout. Whisper-talk this line with a tight, sharp intensity.
3.  **Narration (*he hisses...*):** Maintain the low, measured storytelling tone.
4.  **Dialogue ("I don't have the money")::** Deliver this with finality and a flat, cold tone.

**HERE'S THE TEXT YOU MUST NARRATE FOLLOWING ALL PREVIOUS INSTRUCTIONS, DO NOT NARRATE ANY OTHER TEXT:**
`;
          finalText = `${defaultStyle}\n${processedText}`;
        }

        const body = {
          contents: [ { parts: [ { text: finalText } ] } ],
          generationConfig: {
            responseModalities: ['AUDIO'],
            speechConfig: {
              voiceConfig: {
                prebuiltVoiceConfig: { voiceName }
              }
            }
          }
        };

        // ⚡ OPTIMIZATION: Execute Gemini emotion analysis and TTS generation in PARALLEL
        console.log("⚡ [Gemini] Step 5: Starting PARALLEL execution of emotion analysis and TTS generation...");
        updateAllButtonStates(true);

        const parallelStartTime = performance.now();

        // Create promises for both operations
        const emotionAnalysisPromise = analyzeTextWithGemini(processedText)
          .then(segments => {
            console.log(`✓ [Gemini] Emotion analysis completed (${((performance.now() - parallelStartTime) / 1000).toFixed(2)}s)`);
            return segments;
          })
          .catch(err => {
            console.warn("⚠️ [Gemini] Emotion analysis failed, will use Neutral:", err);
            return null;
          });

        const ttsGenerationPromise = new Promise((resolve, reject) => {
          GM_xmlhttpRequest({
            method: "POST",
            url: `https://generativelanguage.googleapis.com/v1beta/models/${modelId}:generateContent`,
            headers: {
              "Content-Type": "application/json",
              "x-goog-api-key": apiKey
            },
            data: JSON.stringify(body),
            onload: async (res) => {
              if (res.status < 200 || res.status >= 300) {
                reject(new Error(`HTTP ${res.status}: ${res.responseText}`));
                return;
              }
              try {
                const json = JSON.parse(res.responseText);
                const base64 = json?.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
                if (!base64) {
                  reject(new Error("No audio in response"));
                  return;
                }
                console.log(`✓ [Gemini] TTS generation completed (${((performance.now() - parallelStartTime) / 1000).toFixed(2)}s)`);
                resolve({ base64, json });
              } catch (e) {
                reject(e);
              }
            },
            onerror: (err) => reject(new Error("TTS request failed: " + err))
          });
        });

        // Wait for BOTH operations to complete in parallel
        console.log("⏳ [Gemini] Waiting for parallel operations to complete...");
        const [emotionSegments, ttsResult] = await Promise.all([emotionAnalysisPromise, ttsGenerationPromise]);
        
        const parallelTotalTime = ((performance.now() - parallelStartTime) / 1000).toFixed(2);
        console.log(`⚡ [Gemini] ✓ PARALLEL execution completed in ${parallelTotalTime}s (vs sequential: would take ~${(parseFloat(parallelTotalTime) * 1.5).toFixed(2)}s)`);

        // Process emotion analysis results
        const mainSeg = (emotionSegments && emotionSegments[0]) || { emotion: "Neutral", action: null, text: processedText };
        const mainEmotion = mainSeg.emotion || "Neutral";
        const mainAction = mainSeg.action || null;
        console.log(`✓ [Gemini] Main emotion: ${mainEmotion}${mainAction ? `, action: ${mainAction}` : ''}`);

        // Process TTS results
        try {
          const { base64 } = ttsResult;
          console.log("🔄 [Gemini] Step 6: Decoding PCM16 audio and wrapping as WAV blob...");
              const pcmBuffer = base64ToArrayBuffer(base64);
              const wavBuffer = createWavFromPCM(pcmBuffer, 24000, 1, 16);
              const blob = new Blob([wavBuffer], { type: 'audio/wav' });
              const url = URL.createObjectURL(blob);

              // Build a single segment queue item
              console.log("📦 [Gemini] Building single emotion segment for Live2D...");
              const singleSegment = {
                emotion: mainEmotion,
                action: mainAction,
                text: mainSeg.text || processedText,
                blobUrl: url,
                startTime: 0,
                endTime: null,
                duration: null
              };

          const audioElement = new Audio(url);
          audioElement.onloadedmetadata = () => {
            singleSegment.duration = audioElement.duration;
            singleSegment.endTime = audioElement.duration;

            console.log("⏱️ [Gemini] Step 7: Audio duration:", audioElement.duration, "seconds");
            console.log("📤 [Gemini] Step 8: Dispatching 'TTSEmotionSegmentsReady' to Live2D with a single segment...");

                const event = new CustomEvent('TTSEmotionSegmentsReady', {
                  detail: {
                    segments: [singleSegment],
                    totalDuration: audioElement.duration,
                    sampleRate: 24000
                  }
                });
                window.dispatchEvent(event);

                console.log("✅ [Gemini] Single-segment emotion queue dispatched to Live2D");
                updateAllButtonStates(false);
              };

              // Don't play directly: Live2D script will control playback using the blobUrl
              audioElement.onerror = () => {
                console.error("❌ [Gemini] Error loading audio metadata for duration");
                updateAllButtonStates(false);
              };

              // Force metadata load
              audioElement.load();

          audioElement.onerror = (e) => {
            console.error("❌ [Gemini] Failed to load audio for duration check:", e);
            updateAllButtonStates(false);
          };

        } catch (error) {
          console.error("❌ [Gemini] Error processing audio:", error);
          updateAllButtonStates(false);
        }

      } catch (e) {
        console.error("❌ [Gemini] Unexpected error in Live2D flow", e);
        updateAllButtonStates(false);
      }

      // Live2D will handle playback using the blobUrl queue
      return;
    }

    // If Live2D is NOT active, fall back to local playback (original behavior)
    stopTTS();

    // Resolve speaker names
    let botName = "char";
    try {
        const botNameElem = document.querySelector('[class^="_nameText_"]');
        if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
    } catch (e) {}
    let userPersona = "User";
    try {
        const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
        for (let i = allMessageNodes.length - 1; i >= 0; i--) {
            const node = allMessageNodes[i];
            if (!node.querySelector('[class^="_nameIcon_"]')) {
                const nameElem = node.querySelector('[class^="_nameText_"]');
                if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
            }
        }
    } catch (e) {}

    // Resolve voice
    let voiceName;
    if (isBot) voiceName = settings[`gemini_charVoice_${botName}`] || settings.gemini_defaultVoice;
    else voiceName = settings[`gemini_userVoice_${userPersona}`] || settings.gemini_defaultVoice;
    if (!voiceName || voiceName === 'Default') {
        console.error("TTS Userscript: No Gemini voice selected for this speaker.");
        return;
    }

    const modelId = settings.gemini_modelId || 'gemini-2.5-flash-preview-tts';
    const playbackSpeed = parseFloat(settings.gemini_playbackSpeed) || 1.0;

    // Build final text depending on static style setting
    const staticStyleEnabled = !!settings.gemini_static_style_enabled;
    let finalText;
    if (staticStyleEnabled && settings.gemini_static_style_text) {
      finalText = `${settings.gemini_static_style_text}\n${text}`;
    } else {
      // Default style prompt when Static Style is disabled or empty
      const defaultStyle = `### ROLE AND PERSONA
You are a professional voice actor specializing in realistic, naturalistic roleplay narration.

### VOICE PROFILE
1.  **Base Tone:** Low, quiet, and measured. A steady, grounded hum.
2.  **Performance Style:** Understated and conversational. Use a "less is more" approach.
3.  **Negative Constraints (Crucial):**
    -   **No Theatrics:** Do not sound exaggerated, melodramatic, or "acty."
    -   **No Sultry Tone:** Do not use a breathy or seductive voice.
    -   **No Cartoons:** Avoid high-pitch spikes or anime-style exclamations. Keep it human.

### INSTRUCTIONS
1.  **Narrate Everything:** Read every word provided, including the descriptive text (narration) and the dialogue.
2.  **Context-Driven Emotion:** You must analyze the narration to understand the character's mental state, but express it subtly.
    -   **Subtlety is Key:** If the text describes anger, do not shout. Instead, drop your pitch and speak with cold intensity. If the text describes excitement, do not get loud; just quicken your pace slightly.
    -   **Continuity:** Ensure the narration and the dialogue flow together as one cohesive story, not two separate voices.

### EXAMPLE

**Input:**
*The guard slams his hand against the table, causing the mugs to rattle.*
"I told you already," *he hisses through gritted teeth, leaning in close so no one else hears,* "I don't have the money."

**Required Performance Logic:**
1.  **Narration (*The guard slams...*):** Read this calmly but firmly to set the weight of the action.
2.  **Dialogue ("I told you already")::** The text says "hisses" and "leaning in close." Do NOT shout. Whisper-talk this line with a tight, sharp intensity.
3.  **Narration (*he hisses...*):** Maintain the low, measured storytelling tone.
4.  **Dialogue ("I don't have the money")::** Deliver this with finality and a flat, cold tone.

**HERE'S THE TEXT YOU MUST NARRATE FOLLOWING ALL PREVIOUS INSTRUCTIONS, DO NOT NARRATE ANY OTHER TEXT:**
`;
      finalText = `${defaultStyle}\n${text}`;
    }

    const body = {
      contents: [ { parts: [ { text: finalText } ] } ],
      generationConfig: {
        responseModalities: ['AUDIO'],
        speechConfig: {
          voiceConfig: {
            prebuiltVoiceConfig: { voiceName }
          }
        }
      }
    };

    updateAllButtonStates(true);

    try {
      GM_xmlhttpRequest({
        method: "POST",
        url: `https://generativelanguage.googleapis.com/v1beta/models/${modelId}:generateContent`,
        headers: {
          "Content-Type": "application/json",
          "x-goog-api-key": apiKey
        },
        data: JSON.stringify(body),
        onload: async (res) => {
          if (res.status < 200 || res.status >= 300) {
            console.error("Gemini TTS HTTP error", res.status, res.responseText);
            updateAllButtonStates(false);
            return;
          }
          try {
            const json = JSON.parse(res.responseText);
            const base64 = json?.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
            if (!base64) {
              console.error("Gemini TTS: no audio in response", json);
              updateAllButtonStates(false);
              return;
            }

            // PCM16 mono @24kHz by spec
            const pcmBuffer = base64ToArrayBuffer(base64);
            const wavBuffer = createWavFromPCM(pcmBuffer, 24000, 1, 16);
            const blob = new Blob([wavBuffer], { type: 'audio/wav' });
            const url = URL.createObjectURL(blob);

            currentElevenLabsAudio = new Audio(url); // reuse same audio holder
            currentElevenLabsAudio.playbackRate = playbackSpeed;
            currentElevenLabsAudio.onended = () => { updateAllButtonStates(false); currentElevenLabsAudio = null; };
            currentElevenLabsAudio.onerror = () => { updateAllButtonStates(false); currentElevenLabsAudio = null; };
            currentElevenLabsAudio.play();
          } catch (e) {
            console.error("Gemini TTS parse/play error", e);
            updateAllButtonStates(false);
          }
        },
        onerror: (e) => {
          console.error("Gemini TTS network error", e);
          updateAllButtonStates(false);
        }
      });
    } catch (e) {
      console.error("Gemini TTS unexpected error", e);
      updateAllButtonStates(false);
    }
  }

  // Built-in (Web Speech)
  function playBuiltinTTS(text, isBot, settings) {
    if (!settings['tts-enabled']) { stopTTS(); return; }
    if (!window.speechSynthesis || !text || typeof text !== 'string') return;
    stopTTS();

    const utter = new SpeechSynthesisUtterance(text);
    currentUtterance = utter;

    utter.rate = parseFloat(settings.playbackSpeed) || 1.0;
    utter.pitch = 1;

    const allVoices = window.speechSynthesis.getVoices();
    if (allVoices.length === 0) {
      window.speechSynthesis.speak(utter);
      updateAllButtonStates(true);
      return;
    }

    let defaultVoice = allVoices.find(v => v.lang === 'en-US' && v.default) || allVoices.find(v => v.lang === 'en-US') || allVoices.find(v => v.lang.startsWith('en')) || allVoices[0];
    let botName = "char";
    try {
      const botNameElem = document.querySelector('[class^="_nameText_"]');
      if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
    } catch (e) {}
    let userPersona = "User";
    try {
      const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
      for (let i = allMessageNodes.length - 1; i >= 0; i--) {
        const node = allMessageNodes[i];
        if (!node.querySelector('[class^="_nameIcon_"]')) {
          const nameElem = node.querySelector('[class^="_nameText_"]');
          if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
        }
      }
    } catch (e) {}
    let targetVoiceName = 'Default';
    if (isBot) {
      targetVoiceName = settings[`charVoice_${botName}`] || settings.defaultVoice || 'Default';
    } else {
      targetVoiceName = settings[`userVoice_${userPersona}`] || settings.defaultVoice || 'Default';
    }
    let selectedVoice = (targetVoiceName !== 'Default') ? allVoices.find(v => v.name === targetVoiceName) : null;
    utter.voice = selectedVoice || defaultVoice;

    utter.onstart = () => { updateAllButtonStates(true); };
    utter.onend = () => { updateAllButtonStates(false); };
    utter.onerror = () => { updateAllButtonStates(false); };

    window.speechSynthesis.speak(utter);
  }

  // ElevenLabs (con timestamps, WAV generation y AudioBuffer)
  async function playElevenLabsTTS(text, isBot, settings) {
    const apiKey = settings.elevenlabs_apiKey;
    if (!settings['elevenlabs_tts-enabled'] || !apiKey) { stopTTS(); return; }

    // Si Live2D está activo, usar análisis de Gemini y segmentación de audio
    if (live2dScriptDetected) {
        console.log("🎭 Live2D script detected - Starting emotion-based audio segmentation");

        try {
            // 1. Procesar el texto según los ajustes del usuario
            console.log("📝 Step 1: Processing text with TTS filters...");
            const { processed: processedText } = processTTSOutput(text);

            if (!processedText || processedText.trim() === '') {
                console.warn("⚠️ Processed text is empty after filters, skipping emotion analysis");
                return;
            }
            console.log(`✓ Text processed: ${processedText.length} characters`);

            // 2. Obtener nombre del personaje/usuario (needed for voice selection in parallel call)user names...");
            let botName = "char";
            try {
                const botNameElem = document.querySelector('[class^="_nameText_"]');
                if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
            } catch (e) {
                console.warn("⚠️ Could not get bot name:", e);
            }
            let userPersona = "User";
            try {
                const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
                for (let i = allMessageNodes.length - 1; i >= 0; i--) {
                    const node = allMessageNodes[i];
                    if (!node.querySelector('[class^="_nameIcon_"]')) {
                        const nameElem = node.querySelector('[class^="_nameText_"]');
                        if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
                    }
                }
            } catch (e) {
                console.warn("⚠️ Could not get user persona:", e);
            }
            console.log(`✓ Bot: "${botName}", User: "${userPersona}"`);

            // 3. Obtener configuración de voz
            console.log("🎤 Step 3: Getting voice configuration...");
            let voiceId;
            if (isBot) voiceId = settings[`elevenlabs_charVoice_${botName}`] || settings.elevenlabs_defaultVoice;
            else voiceId = settings[`elevenlabs_userVoice_${userPersona}`] || settings.elevenlabs_defaultVoice;

            if (!voiceId || voiceId === 'Default') {
                console.error("❌ No ElevenLabs voice selected for this speaker.");
                console.error(`   Looking for: ${isBot ? `elevenlabs_charVoice_${botName}` : `elevenlabs_userVoice_${userPersona}`}`);
                console.error(`   Fallback: elevenlabs_defaultVoice = ${settings.elevenlabs_defaultVoice}`);
                return;
            }
            console.log(`✓ Using voice ID: ${voiceId}`);

            // 4. Preparar request body
            console.log("⚙️ Step 4: Preparing ElevenLabs request...");
            const stability = typeof settings.elevenlabs_stability !== "undefined" ? parseFloat(settings.elevenlabs_stability) : 0.50;
            const similarity = typeof settings.elevenlabs_similarity !== "undefined" ? parseFloat(settings.elevenlabs_similarity) : 0.75;
            const style = typeof settings.elevenlabs_style !== "undefined" ? parseFloat(settings.elevenlabs_style) : 0.00;
            const speakerBoost = !!settings['elevenlabs_speaker-boost'];

            const requestBody = {
                text: processedText,
                model_id: settings.elevenlabs_modelId,
                voice_settings: {
                    stability: stability,
                    similarity_boost: similarity,
                    style: style,
                    use_speaker_boost: speakerBoost
                }
            };
            console.log(`✓ Request body prepared (model: ${settings.elevenlabs_modelId})`);

            // ⚡ OPTIMIZATION: Execute Gemini emotion analysis and ElevenLabs TTS in PARALLEL
            console.log("⚡ Step 5: Starting PARALLEL execution of emotion analysis and TTS generation...");
            const parallelStartTime = performance.now();

            const [emotionSegments, responseData] = await Promise.all([
                analyzeTextWithGemini(processedText)
                    .then(segments => {
                        console.log(`✓ Emotion analysis completed (${((performance.now() - parallelStartTime) / 1000).toFixed(2)}s)`);
                        return segments;
                    })
                    .catch(err => {
                        console.warn("⚠️ Emotion analysis failed, will continue without emotions:", err);
                        return null;
                    }),
                elevenLabsApiRequest({
                    method: 'POST',
                    endpoint: `/v1/text-to-speech/${voiceId}/with-timestamps`,
                    apiKey: apiKey,
                    data: requestBody,
                    responseType: 'json'
                })
                    .then(data => {
                        console.log(`✓ ElevenLabs TTS completed (${((performance.now() - parallelStartTime) / 1000).toFixed(2)}s)`);
                        return data;
                    })
            ]);

            const parallelTotalTime = ((performance.now() - parallelStartTime) / 1000).toFixed(2);
            console.log(`⚡ ✓ PARALLEL execution completed in ${parallelTotalTime}s (vs sequential: would take ~${(parseFloat(parallelTotalTime) * 1.5).toFixed(2)}s)`);

            if (!emotionSegments || emotionSegments.length === 0) {
                console.error("❌ No emotion segments returned from Gemini");
                return;
            }
            console.log(`✓ Got ${emotionSegments.length} emotion segments from Gemini`);

            if (!responseData || !responseData.audio_base64 || !responseData.alignment) {
                console.error("❌ Invalid response from ElevenLabs:", responseData);
                return;
            }
            console.log("✓ Timestamps received from ElevenLabs");

            // 6. Decodificar audio base64
            console.log("🔄 Step 6: Decoding base64 audio...");
            const audioData = base64ToArrayBuffer(responseData.audio_base64);
            console.log(`✓ Audio data decoded: ${audioData.byteLength} bytes`);

            // 7. Decodificar a AudioBuffer
            console.log("🎵 Step 7: Converting to AudioBuffer...");
            const audioBuffer = await decodeTTSArrayBuffer(audioData);
            console.log(`✓ Audio decoded to AudioBuffer (${audioBuffer.duration.toFixed(2)}s)`);

            // 8. Calcular tiempos exactos de cada segmento usando alignment
            console.log("⏱️ Step 8: Calculating segment timings...");
            const segmentTimings = calculateSegmentEndTimes(responseData.alignment, emotionSegments);

            if (!segmentTimings || segmentTimings.length === 0) {
                console.error("❌ Failed to calculate segment timings - no valid segments returned");
                return;
            }
            console.log(`✓ Calculated timings for ${segmentTimings.length} segments`);

            // 9. Dividir audio en segmentos según timestamps
            console.log("✂️ Step 9: Splitting audio by timestamps...");
            const audioBlobs = await splitAudioByTimestamps(audioBuffer, segmentTimings);

            if (!audioBlobs || audioBlobs.length === 0) {
                console.error("❌ Failed to split audio - no blobs created");
                return;
            }
            console.log(`✓ Created ${audioBlobs.length} audio blobs`);

            // 10. Generar blob URLs para cada segmento
            console.log("🔗 Step 10: Generating blob URLs...");
            console.log("\n🎵 === EMOTION-BASED AUDIO SEGMENTS ===");
            const emotionAudioSegments = audioBlobs.map((wavBlob, index) => {
                const blobUrl = URL.createObjectURL(wavBlob);
                const timing = segmentTimings[index];
                const textPreview = timing.text.substring(0, 50);
                const actionText = timing.action ? ` [Action: ${timing.action}]` : '';

                console.log(`\n📦 Segment ${index + 1} [${timing.emotion}]${actionText}:`);
                console.log(`   Text: "${textPreview}${timing.text.length > 50 ? '...' : ''}"`);
                console.log(`   Time: ${timing.startTime.toFixed(3)}s - ${timing.endTime.toFixed(3)}s`);
                console.log(`   Duration: ${timing.duration.toFixed(3)}s`);
                console.log(`   WAV Blob URL: ${blobUrl}`);

                return {
                    emotion: timing.emotion,
                    action: timing.action,
                    text: timing.text,
                    blobUrl: blobUrl,
                    startTime: timing.startTime,
                    endTime: timing.endTime,
                    duration: timing.duration
                };
            });

            console.log("\n✅ All emotion-based segments generated successfully");

            // 11. Enviar los segmentos al script de Live2D mediante CustomEvent
            console.log("📤 Step 11: Dispatching segments to Live2D...");
            const event = new CustomEvent('TTSEmotionSegmentsReady', {
                detail: {
                    segments: emotionAudioSegments,
                    totalDuration: audioBuffer.duration,
                    sampleRate: audioBuffer.sampleRate
                }
            });
            window.dispatchEvent(event);

            console.log("✅ Emotion segments dispatched to Live2D script via 'TTSEmotionSegmentsReady' event");
            console.log("⚠️ Playback skipped - Live2D script will handle audio playback\n");

        } catch (error) {
            console.error("❌ Error during emotion-based segmentation:");
            console.error("   Error type:", error.name);
            console.error("   Error message:", error.message);
            console.error("   Error stack:", error.stack);

            // Intentar identificar el paso donde falló
            if (error.message.includes('Gemini') || error.message.includes('fetch')) {
                console.error("   → Likely failed during Gemini API call (Step 2)");
            } else if (error.message.includes('elevenlabs') || error.message.includes('API')) {
                console.error("   → Likely failed during ElevenLabs API call (Step 6)");
            } else if (error.message.includes('decode') || error.message.includes('Audio')) {
                console.error("   → Likely failed during audio decoding (Step 7-8)");
            } else if (error.message.includes('segment') || error.message.includes('timing')) {
                console.error("   → Likely failed during segment processing (Step 9-10)");
            }
        }

        return;
    }

    // Si no hay Live2D, reproducir normalmente
    stopTTS();

    let botName = "char";
    try {
        const botNameElem = document.querySelector('[class^="_nameText_"]');
        if (botNameElem && botNameElem.textContent.trim()) botName = botNameElem.textContent.trim();
    } catch (e) {}
    let userPersona = "User";
    try {
        const allMessageNodes = document.querySelectorAll('[data-testid="virtuoso-item-list"] > div[data-index]');
        for (let i = allMessageNodes.length - 1; i >= 0; i--) {
            const node = allMessageNodes[i];
            if (!node.querySelector('[class^="_nameIcon_"]')) {
                const nameElem = node.querySelector('[class^="_nameText_"]');
                if (nameElem && nameElem.textContent.trim()) { userPersona = nameElem.textContent.trim(); break; }
            }
        }
    } catch (e) {}

    let voiceId;
    if (isBot) voiceId = settings[`elevenlabs_charVoice_${botName}`] || settings.elevenlabs_defaultVoice;
    else voiceId = settings[`elevenlabs_userVoice_${userPersona}`] || settings.elevenlabs_defaultVoice;

    if (!voiceId || voiceId === 'Default') {
        console.error("TTS Userscript: No ElevenLabs voice selected for this speaker.");
        return;
    }

    const playbackSpeed = parseFloat(settings.elevenlabs_playbackSpeed) || 1.0;
    const stability = typeof settings.elevenlabs_stability !== "undefined" ? parseFloat(settings.elevenlabs_stability) : 0.50;
    const similarity = typeof settings.elevenlabs_similarity !== "undefined" ? parseFloat(settings.elevenlabs_similarity) : 0.75;
    const style = typeof settings.elevenlabs_style !== "undefined" ? parseFloat(settings.elevenlabs_style) : 0.00;
    const speakerBoost = !!settings['elevenlabs_speaker-boost'];

    const requestBody = {
        text: text,
        model_id: settings.elevenlabs_modelId,
        voice_settings: {
            stability: stability,
            similarity_boost: similarity,
            style: style,
            use_speaker_boost: speakerBoost
        }
    };

    try {
        updateAllButtonStates(true);

        const responseData = await elevenLabsApiRequest({
            method: 'POST',
            endpoint: `/v1/text-to-speech/${voiceId}/with-timestamps`,
            apiKey: apiKey,
            data: requestBody,
            responseType: 'json'
        });

        const audioBase64 = responseData.audio_base64;
        const alignment = responseData.alignment;
        console.log("ElevenLabs Timestamps (Alignment):", alignment);

        const audioData = base64ToArrayBuffer(audioBase64);

        // Decodificar a AudioBuffer
        const audioBuffer = await decodeTTSArrayBuffer(audioData);
        logAudioBuffer(audioBuffer);

        // Crear WAV desde AudioBuffer y generar blob URL descargable
        const wavBlob = bufferToWave(audioBuffer);
        const wavBlobUrl = URL.createObjectURL(wavBlob);
        console.log("🎵 ElevenLabs Audio WAV Download URL:");
        console.log(wavBlobUrl);
        console.log("💾 To download: Right-click the link above and 'Save link as...' or run:");
        console.log(`const a = document.createElement('a'); a.href = '${wavBlobUrl}'; a.download = 'tts_audio_${Date.now()}.wav'; a.click();`);
        console.log("--------------------");

        // Despachar evento con AudioBuffer decodificado (para otros usos si es necesario)
        dispatchTTSDecodedAudio(audioBuffer, playbackSpeed, alignment);

        // Crear blob MP3 para reproducción
        const blob = new Blob([audioData], { type: 'audio/mpeg' });
        const audioUrl = URL.createObjectURL(blob);

        // Reproducir audio directamente (ya verificamos que Live2D no está activo)
        currentElevenLabsAudio = new Audio(audioUrl);
        currentElevenLabsAudio.playbackRate = playbackSpeed;

        currentElevenLabsAudio.onended = () => {
            updateAllButtonStates(false);
            currentElevenLabsAudio = null;
        };

        currentElevenLabsAudio.onerror = () => {
            updateAllButtonStates(false);
            currentElevenLabsAudio = null;
        };

        currentElevenLabsAudio.play();

    } catch (error) {
        console.error("TTS Userscript: ElevenLabs generation failed:", error);
        alert(`ElevenLabs TTS failed: ${error.message}`);
        updateAllButtonStates(false);
    }
  }

  // ==========================================================
  // SECCIÓN 7. HELPERS DE API ELEVENLABS
  // ----------------------------------------------------------
  // - Peticiones a la API (GM_xmlhttpRequest)
  // - Validación de API key
  // ==========================================================

  function elevenLabsApiRequest(options) {
      const { method, endpoint, apiKey, params = {}, data = null, responseType = 'json' } = options;
      let url = `https://api.elevenlabs.io${endpoint}`;
      if (Object.keys(params).length > 0) url += `?${new URLSearchParams(params).toString()}`;

      return new Promise((resolve, reject) => {
          GM_xmlhttpRequest({
              method: method,
              url: url,
              headers: { "xi-api-key": apiKey, "Content-Type": "application/json" },
              data: data ? JSON.stringify(data) : null,
              responseType: responseType,
              onload: function(response) {
                  if (response.status === 200) {
                      resolve(responseType === 'json' ? JSON.parse(response.responseText) : response.response);
                  } else {
                      let errorMessage = `Error: ${response.status}`;
                      try {
                          const errorDetail = JSON.parse(response.responseText).detail;
                          if (typeof errorDetail === 'string') errorMessage = errorDetail;
                          else if (errorDetail[0]?.msg) errorMessage = errorDetail[0].msg;
                      } catch (e) { /* ignore */ }
                      reject({ status: response.status, message: errorMessage });
                  }
              },
              onerror: function(error) {
                  reject({ status: 0, message: `Network error: ${error.statusText || 'Unknown'}` });
              }
          });
      });
  }

  async function validateElevenLabsKey(apiKey) {
      try {
          await elevenLabsApiRequest({ method: "GET", endpoint: "/v1/models", apiKey });
          return { isValid: true, message: "API Key Valid" };
      } catch (error) {
          return { isValid: false, message: `Invalid API Key` };
      }
  }

  // ==========================================================
  // SECCIÓN 8. BOTONES DE CONTROL EN CADA MENSAJE
  // ----------------------------------------------------------
  // - Inyecta botón play/stop en panel del mensaje
  // - Respeta ajustes (proveedor, narrar usuario, etc.)
  // ==========================================================

  const PLAY_SVG = `
    <svg class="w-6 h-6 text-gray-800 dark:text-white" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" viewBox="0 0 24 24">
      <path fill-rule="evenodd" d="M12 5a7 7 0 0 0-7 7v1.17c.313-.11.65-.17 1-.17h2a1 1 0 0 1 1 1v6a1 1 0 0 1-1 1H6a3 3 0 0 1-3-3v-6a9 9 0 0 1 18 0v6a3 3 0 0 1-3 3h-2a1 1 0 0 1-1-1v-6a1 1 0 0 1 1-1h2c.35 0 .687.06 1 .17V12a7 7 0 0 0-7-7Z" clip-rule="evenodd"/>
    </svg>`;

  const STOP_SVG = `
    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-circle-stop-icon">
      <circle cx="12" cy="12" r="10"/><rect x="9" y="9" width="6" height="6" rx="1"/>
    </svg>`;

  function injectTempButton(panel) {
    if (!panel || panel.querySelector('.temp-btn')) return;
    const settings = JSON.parse(localStorage.getItem("ttsSettings") || "{}");
    const provider = settings.provider || 'builtin';
    const prefix = provider === 'elevenlabs' ? 'elevenlabs_' : provider === 'gemini' ? 'gemini_' : '';

    const ttsEnabled = !!settings[`${prefix}tts-enabled`];
    const narrateUser = !!settings[`${prefix}tts-narrate-user`];
    if (!ttsEnabled) return;

    const isBot = !!(panel.closest && panel.closest('[data-index]') && panel.closest('[data-index]').querySelector(BOT_NAME_ICON_SELECTOR));
    if (!narrateUser && !isBot) return;

    const btn = document.createElement('button');
    btn.type = 'button';
    btn.className = '_controlPanelButton_prxth_8 temp-btn';
    btn.style.marginLeft = '0px';
    btn.innerHTML = isPlaying ? STOP_SVG : PLAY_SVG;

    btn.onclick = function() {
      if ((window.speechSynthesis && window.speechSynthesis.speaking) || currentElevenLabsAudio) {
          stopTTS();
          return;
      }
      const messageWrapper = this.closest(MESSAGE_WRAPPER_SELECTOR);
      if (messageWrapper) {
          const messageText = extractFormattedMessageText(messageWrapper);
          const { processed: processedTTS } = processTTSOutput(messageText);

          // Mostrar logs al hacer clic manual
          console.log("📜 Raw extracted text (Manual):");
          console.log(messageText);
          console.log("\n🎤 Processed TTS (Manual):");
          console.log(processedTTS || "[No TTS output]");
          console.log("--------------------");

          if (processedTTS) playTTS(processedTTS, isBot);
      }
    };

    panel.insertBefore(btn, panel.firstChild);
  }

  // Observa aparición de paneles de control para inyectar botón
  const controlPanelObserver = new MutationObserver(mutations => {
    for (const mutation of mutations) {
      for (const node of mutation.addedNodes) {
        if (node.nodeType === Node.ELEMENT_NODE) {
          if (node.matches(CONTROL_PANEL_SELECTOR)) injectTempButton(node);
          node.querySelectorAll?.(CONTROL_PANEL_SELECTOR).forEach(injectTempButton);
        }
      }
    }
  });

  function startControlPanelObserver() {
    const chatContainer = document.querySelector(CHAT_CONTAINER_SELECTOR);
    if (chatContainer) {
      document.querySelectorAll(CONTROL_PANEL_SELECTOR).forEach(injectTempButton);
      controlPanelObserver.observe(chatContainer, { childList: true, subtree: true });
    } else {
      setTimeout(startControlPanelObserver, 1000);
    }
  }

  startControlPanelObserver();

})();