import { loadTextToSpeech, loadVoiceStyle, writeWavFile } from './helper.js'; // Configuration const DEFAULT_VOICE_STYLE_PATH = 'assets/voice_styles/M1.json'; // Helper function to extract filename from path function getFilenameFromPath(path) { return path.split('/').pop(); } // Global state let textToSpeech = null; let cfgs = null; // Pre-computed style let currentStyle = null; let currentStylePath = DEFAULT_VOICE_STYLE_PATH; // UI Elements const textInput = document.getElementById('text'); const voiceStyleSelect = document.getElementById('voiceStyleSelect'); const voiceStyleInfo = document.getElementById('voiceStyleInfo'); const langSelect = document.getElementById('langSelect'); const totalStepInput = document.getElementById('totalStep'); const speedInput = document.getElementById('speed'); const generateBtn = document.getElementById('generateBtn'); const statusBox = document.getElementById('statusBox'); const statusText = document.getElementById('statusText'); const backendBadge = document.getElementById('backendBadge'); const resultsContainer = document.getElementById('results'); const errorBox = document.getElementById('error'); function showStatus(message, type = 'info') { statusText.innerHTML = message; statusBox.className = 'status-box'; if (type === 'success') { statusBox.classList.add('success'); } else if (type === 'error') { statusBox.classList.add('error'); } } function showError(message) { errorBox.textContent = message; errorBox.classList.add('active'); } function hideError() { errorBox.classList.remove('active'); } function showBackendBadge() { backendBadge.classList.add('visible'); } // Load voice style from JSON async function loadStyleFromJSON(stylePath) { try { const style = await loadVoiceStyle([stylePath], true); return style; } catch (error) { console.error('Error loading voice style:', error); throw error; } } // Load models on page load async function initializeModels() { try { showStatus('ℹ️ Loading configuration...'); const basePath = 'assets/onnx'; // Try WebGPU first, fallback to WASM let executionProvider = 'wasm'; try { const result = await loadTextToSpeech(basePath, { executionProviders: ['webgpu'], graphOptimizationLevel: 'all' }, (modelName, current, total) => { showStatus(`ℹ️ Loading ONNX models (${current}/${total}): ${modelName}...`); }); textToSpeech = result.textToSpeech; cfgs = result.cfgs; executionProvider = 'webgpu'; backendBadge.textContent = 'WebGPU'; backendBadge.style.background = '#4caf50'; } catch (webgpuError) { console.log('WebGPU not available, falling back to WebAssembly'); const result = await loadTextToSpeech(basePath, { executionProviders: ['wasm'], graphOptimizationLevel: 'all' }, (modelName, current, total) => { showStatus(`ℹ️ Loading ONNX models (${current}/${total}): ${modelName}...`); }); textToSpeech = result.textToSpeech; cfgs = result.cfgs; } showStatus('ℹ️ Loading default voice style...'); // Load default voice style currentStyle = await loadStyleFromJSON(currentStylePath); voiceStyleInfo.textContent = `${getFilenameFromPath(currentStylePath)} (default)`; showStatus(`✅ Models loaded! Using ${executionProvider.toUpperCase()}. You can now generate speech.`, 'success'); showBackendBadge(); generateBtn.disabled = false; } catch (error) { console.error('Error loading models:', error); showStatus(`❌ Error loading models: ${error.message}`, 'error'); } } // Handle voice style selection voiceStyleSelect.addEventListener('change', async (e) => { const selectedValue = e.target.value; if (!selectedValue) return; try { generateBtn.disabled = true; showStatus(`ℹ️ Loading voice style...`, 'info'); currentStylePath = selectedValue; currentStyle = await loadStyleFromJSON(currentStylePath); voiceStyleInfo.textContent = getFilenameFromPath(currentStylePath); showStatus(`✅ Voice style loaded: ${getFilenameFromPath(currentStylePath)}`, 'success'); generateBtn.disabled = false; } catch (error) { showError(`Error loading voice style: ${error.message}`); // Restore default style currentStylePath = DEFAULT_VOICE_STYLE_PATH; voiceStyleSelect.value = currentStylePath; try { currentStyle = await loadStyleFromJSON(currentStylePath); voiceStyleInfo.textContent = `${getFilenameFromPath(currentStylePath)} (default)`; } catch (styleError) { console.error('Error restoring default style:', styleError); } generateBtn.disabled = false; } }); // Main synthesis function async function generateSpeech() { const text = textInput.value.trim(); if (!text) { showError('Please enter some text to synthesize.'); return; } if (!textToSpeech || !cfgs) { showError('Models are still loading. Please wait.'); return; } if (!currentStyle) { showError('Voice style is not ready. Please wait.'); return; } const startTime = Date.now(); try { generateBtn.disabled = true; hideError(); // Clear results and show placeholder resultsContainer.innerHTML = `
`; const totalStep = parseInt(totalStepInput.value); const speed = parseFloat(speedInput.value); const lang = langSelect.value; showStatus('ℹ️ Generating speech from text...'); const tic = Date.now(); const { wav, duration } = await textToSpeech.call( text, lang, currentStyle, totalStep, speed, 0.3, (step, total) => { showStatus(`ℹ️ Denoising (${step}/${total})...`); } ); const toc = Date.now(); console.log(`Text-to-speech synthesis: ${((toc - tic) / 1000).toFixed(2)}s`); showStatus('ℹ️ Creating audio file...'); const wavLen = Math.floor(textToSpeech.sampleRate * duration[0]); const wavOut = wav.slice(0, wavLen); // Create WAV file const wavBuffer = writeWavFile(wavOut, textToSpeech.sampleRate); const blob = new Blob([wavBuffer], { type: 'audio/wav' }); const url = URL.createObjectURL(blob); // Calculate total time and audio duration const endTime = Date.now(); const totalTimeSec = ((endTime - startTime) / 1000).toFixed(2); const audioDurationSec = duration[0].toFixed(2); // Display result with full text resultsContainer.innerHTML = `Generated speech will appear here