120 lines
4.3 KiB
JavaScript
120 lines
4.3 KiB
JavaScript
import fs from 'fs';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
import { loadTextToSpeech, loadVoiceStyle, timer, writeWavFile, sanitizeFilename } from './helper.js';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
|
|
/**
|
|
* Parse command line arguments
|
|
*/
|
|
function parseArgs() {
|
|
const args = {
|
|
useGpu: false,
|
|
onnxDir: 'assets/onnx',
|
|
totalStep: 5,
|
|
speed: 1.05,
|
|
nTest: 4,
|
|
voiceStyle: ['assets/voice_styles/M1.json'],
|
|
text: ['This morning, I took a walk in the park, and the sound of the birds and the breeze was so pleasant that I stopped for a long time just to listen.'],
|
|
lang: ['en'],
|
|
saveDir: 'results',
|
|
batch: false
|
|
};
|
|
|
|
for (let i = 2; i < process.argv.length; i++) {
|
|
const arg = process.argv[i];
|
|
if (arg === '--use-gpu') {
|
|
args.useGpu = true;
|
|
} else if (arg === '--batch') {
|
|
args.batch = true;
|
|
} else if (arg === '--onnx-dir' && i + 1 < process.argv.length) {
|
|
args.onnxDir = process.argv[++i];
|
|
} else if (arg === '--total-step' && i + 1 < process.argv.length) {
|
|
args.totalStep = parseInt(process.argv[++i]);
|
|
} else if (arg === '--speed' && i + 1 < process.argv.length) {
|
|
args.speed = parseFloat(process.argv[++i]);
|
|
} else if (arg === '--n-test' && i + 1 < process.argv.length) {
|
|
args.nTest = parseInt(process.argv[++i]);
|
|
} else if (arg === '--voice-style' && i + 1 < process.argv.length) {
|
|
args.voiceStyle = process.argv[++i].split(',');
|
|
} else if (arg === '--text' && i + 1 < process.argv.length) {
|
|
args.text = process.argv[++i].split('|');
|
|
} else if (arg === '--lang' && i + 1 < process.argv.length) {
|
|
args.lang = process.argv[++i].split(',');
|
|
} else if (arg === '--save-dir' && i + 1 < process.argv.length) {
|
|
args.saveDir = process.argv[++i];
|
|
}
|
|
}
|
|
|
|
return args;
|
|
}
|
|
|
|
/**
|
|
* Main inference function
|
|
*/
|
|
async function main() {
|
|
console.log('=== TTS Inference with ONNX Runtime (Node.js) ===\n');
|
|
|
|
// --- 1. Parse arguments --- //
|
|
const args = parseArgs();
|
|
const totalStep = args.totalStep;
|
|
const speed = args.speed;
|
|
const nTest = args.nTest;
|
|
const saveDir = args.saveDir;
|
|
const voiceStylePaths = args.voiceStyle.map(p => path.resolve(__dirname, p));
|
|
const textList = args.text;
|
|
const langList = args.lang;
|
|
const batch = args.batch;
|
|
|
|
if (voiceStylePaths.length !== textList.length) {
|
|
throw new Error(`Number of voice styles (${voiceStylePaths.length}) must match number of texts (${textList.length})`);
|
|
}
|
|
const bsz = voiceStylePaths.length;
|
|
|
|
// --- 2. Load Text to Speech --- //
|
|
const onnxDir = path.resolve(__dirname, args.onnxDir);
|
|
const textToSpeech = await loadTextToSpeech(onnxDir, args.useGpu);
|
|
|
|
// --- 3. Load Voice Style --- //
|
|
const style = loadVoiceStyle(voiceStylePaths, true);
|
|
|
|
// --- 4. Synthesize speech --- //
|
|
for (let n = 0; n < nTest; n++) {
|
|
console.log(`\n[${n + 1}/${nTest}] Starting synthesis...`);
|
|
|
|
const { wav, duration } = await timer('Generating speech from text', async () => {
|
|
if (batch) {
|
|
return await textToSpeech.batch(textList, langList, style, totalStep, speed);
|
|
} else {
|
|
return await textToSpeech.call(textList[0], langList[0], style, totalStep, speed);
|
|
}
|
|
});
|
|
|
|
if (!fs.existsSync(saveDir)) {
|
|
fs.mkdirSync(saveDir, { recursive: true });
|
|
}
|
|
|
|
const wavShape = [bsz, wav.length / bsz];
|
|
for (let b = 0; b < bsz; b++) {
|
|
const fname = `${sanitizeFilename(textList[b], 20)}_${n + 1}.wav`;
|
|
const wavLen = Math.floor(textToSpeech.sampleRate * duration[b]);
|
|
const wavOut = wav.slice(b * wavShape[1], b * wavShape[1] + wavLen);
|
|
|
|
const outputPath = path.join(saveDir, fname);
|
|
writeWavFile(outputPath, wavOut, textToSpeech.sampleRate);
|
|
console.log(`Saved: ${outputPath}`);
|
|
}
|
|
}
|
|
|
|
console.log('\n=== Synthesis completed successfully! ===');
|
|
}
|
|
|
|
// Run main function
|
|
main().catch(err => {
|
|
console.error('Error during inference:', err);
|
|
process.exit(1);
|
|
});
|