Files
Supertonic/web/main.js
2026-01-25 18:58:40 +09:00

292 lines
9.8 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import {
loadTextToSpeech,
loadVoiceStyle,
writeWavFile
} from './helper.js';
// Configuration
const DEFAULT_VOICE_STYLE_PATH = 'assets/voice_styles/M1.json';
// Helper function to extract filename from path
function getFilenameFromPath(path) {
return path.split('/').pop();
}
// Global state
let textToSpeech = null;
let cfgs = null;
// Pre-computed style
let currentStyle = null;
let currentStylePath = DEFAULT_VOICE_STYLE_PATH;
// UI Elements
const textInput = document.getElementById('text');
const voiceStyleSelect = document.getElementById('voiceStyleSelect');
const voiceStyleInfo = document.getElementById('voiceStyleInfo');
const langSelect = document.getElementById('langSelect');
const totalStepInput = document.getElementById('totalStep');
const speedInput = document.getElementById('speed');
const generateBtn = document.getElementById('generateBtn');
const statusBox = document.getElementById('statusBox');
const statusText = document.getElementById('statusText');
const backendBadge = document.getElementById('backendBadge');
const resultsContainer = document.getElementById('results');
const errorBox = document.getElementById('error');
function showStatus(message, type = 'info') {
statusText.innerHTML = message;
statusBox.className = 'status-box';
if (type === 'success') {
statusBox.classList.add('success');
} else if (type === 'error') {
statusBox.classList.add('error');
}
}
function showError(message) {
errorBox.textContent = message;
errorBox.classList.add('active');
}
function hideError() {
errorBox.classList.remove('active');
}
function showBackendBadge() {
backendBadge.classList.add('visible');
}
// Load voice style from JSON
async function loadStyleFromJSON(stylePath) {
try {
const style = await loadVoiceStyle([stylePath], true);
return style;
} catch (error) {
console.error('Error loading voice style:', error);
throw error;
}
}
// Load models on page load
async function initializeModels() {
try {
showStatus(' <strong>Loading configuration...</strong>');
const basePath = 'assets/onnx';
// Try WebGPU first, fallback to WASM
let executionProvider = 'wasm';
try {
const result = await loadTextToSpeech(basePath, {
executionProviders: ['webgpu'],
graphOptimizationLevel: 'all'
}, (modelName, current, total) => {
showStatus(` <strong>Loading ONNX models (${current}/${total}):</strong> ${modelName}...`);
});
textToSpeech = result.textToSpeech;
cfgs = result.cfgs;
executionProvider = 'webgpu';
backendBadge.textContent = 'WebGPU';
backendBadge.style.background = '#4caf50';
} catch (webgpuError) {
console.log('WebGPU not available, falling back to WebAssembly');
const result = await loadTextToSpeech(basePath, {
executionProviders: ['wasm'],
graphOptimizationLevel: 'all'
}, (modelName, current, total) => {
showStatus(` <strong>Loading ONNX models (${current}/${total}):</strong> ${modelName}...`);
});
textToSpeech = result.textToSpeech;
cfgs = result.cfgs;
}
showStatus(' <strong>Loading default voice style...</strong>');
// Load default voice style
currentStyle = await loadStyleFromJSON(currentStylePath);
voiceStyleInfo.textContent = `${getFilenameFromPath(currentStylePath)} (default)`;
showStatus(`✅ <strong>Models loaded!</strong> Using ${executionProvider.toUpperCase()}. You can now generate speech.`, 'success');
showBackendBadge();
generateBtn.disabled = false;
} catch (error) {
console.error('Error loading models:', error);
showStatus(`❌ <strong>Error loading models:</strong> ${error.message}`, 'error');
}
}
// Handle voice style selection
voiceStyleSelect.addEventListener('change', async (e) => {
const selectedValue = e.target.value;
if (!selectedValue) return;
try {
generateBtn.disabled = true;
showStatus(` <strong>Loading voice style...</strong>`, 'info');
currentStylePath = selectedValue;
currentStyle = await loadStyleFromJSON(currentStylePath);
voiceStyleInfo.textContent = getFilenameFromPath(currentStylePath);
showStatus(`✅ <strong>Voice style loaded:</strong> ${getFilenameFromPath(currentStylePath)}`, 'success');
generateBtn.disabled = false;
} catch (error) {
showError(`Error loading voice style: ${error.message}`);
// Restore default style
currentStylePath = DEFAULT_VOICE_STYLE_PATH;
voiceStyleSelect.value = currentStylePath;
try {
currentStyle = await loadStyleFromJSON(currentStylePath);
voiceStyleInfo.textContent = `${getFilenameFromPath(currentStylePath)} (default)`;
} catch (styleError) {
console.error('Error restoring default style:', styleError);
}
generateBtn.disabled = false;
}
});
// Main synthesis function
async function generateSpeech() {
const text = textInput.value.trim();
if (!text) {
showError('Please enter some text to synthesize.');
return;
}
if (!textToSpeech || !cfgs) {
showError('Models are still loading. Please wait.');
return;
}
if (!currentStyle) {
showError('Voice style is not ready. Please wait.');
return;
}
const startTime = Date.now();
try {
generateBtn.disabled = true;
hideError();
// Clear results and show placeholder
resultsContainer.innerHTML = `
<div class="results-placeholder generating">
<div class="results-placeholder-icon">⏳</div>
<p>Generating speech...</p>
</div>
`;
const totalStep = parseInt(totalStepInput.value);
const speed = parseFloat(speedInput.value);
const lang = langSelect.value;
showStatus(' <strong>Generating speech from text...</strong>');
const tic = Date.now();
const { wav, duration } = await textToSpeech.call(
text,
lang,
currentStyle,
totalStep,
speed,
0.3,
(step, total) => {
showStatus(` <strong>Denoising (${step}/${total})...</strong>`);
}
);
const toc = Date.now();
console.log(`Text-to-speech synthesis: ${((toc - tic) / 1000).toFixed(2)}s`);
showStatus(' <strong>Creating audio file...</strong>');
const wavLen = Math.floor(textToSpeech.sampleRate * duration[0]);
const wavOut = wav.slice(0, wavLen);
// Create WAV file
const wavBuffer = writeWavFile(wavOut, textToSpeech.sampleRate);
const blob = new Blob([wavBuffer], { type: 'audio/wav' });
const url = URL.createObjectURL(blob);
// Calculate total time and audio duration
const endTime = Date.now();
const totalTimeSec = ((endTime - startTime) / 1000).toFixed(2);
const audioDurationSec = duration[0].toFixed(2);
// Display result with full text
resultsContainer.innerHTML = `
<div class="result-item">
<div class="result-text-container">
<div class="result-text-label">Input Text</div>
<div class="result-text">${text}</div>
</div>
<div class="result-info">
<div class="info-item">
<span>📊 Audio Length</span>
<strong>${audioDurationSec}s</strong>
</div>
<div class="info-item">
<span>⏱️ Generation Time</span>
<strong>${totalTimeSec}s</strong>
</div>
</div>
<div class="result-player">
<audio controls>
<source src="${url}" type="audio/wav">
</audio>
</div>
<div class="result-actions">
<button onclick="downloadAudio('${url}', 'synthesized_speech.wav')">
<span>⬇️</span>
<span>Download WAV</span>
</button>
</div>
</div>
`;
showStatus('✅ <strong>Speech synthesis completed successfully!</strong>', 'success');
} catch (error) {
console.error('Error during synthesis:', error);
showStatus(`❌ <strong>Error during synthesis:</strong> ${error.message}`, 'error');
showError(`Error during synthesis: ${error.message}`);
// Restore placeholder
resultsContainer.innerHTML = `
<div class="results-placeholder">
<div class="results-placeholder-icon">🎤</div>
<p>Generated speech will appear here</p>
</div>
`;
} finally {
generateBtn.disabled = false;
}
}
// Download handler (make it global so it can be called from onclick)
window.downloadAudio = function(url, filename) {
const a = document.createElement('a');
a.href = url;
a.download = filename;
a.click();
};
// Attach generate function to button
generateBtn.addEventListener('click', generateSpeech);
// Initialize on load
window.addEventListener('load', async () => {
generateBtn.disabled = true;
await initializeModels();
});