using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Media; namespace Supertonic { class Program { class Args { public bool UseGpu { get; set; } = false; public string OnnxDir { get; set; } = "./assets/onnx"; public int TotalStep { get; set; } = 5; public float Speed { get; set; } = 1.05f; public int NTest { get; set; } = 4; public List VoiceStyle { get; set; } = new List { "assets/voice_styles/F2.json" }; public List Text { get; set; } = new List { "동해물과 백두산이 마르고 닳도록 하느님이 보우하사. 우리 나라 만세~~" }; public List Lang { get; set; } = new List { "ko" }; public string SaveDir { get; set; } = "results"; public bool Batch { get; set; } = false; public int? Seed { get; set; } = null; public float PreSilence { get; set; } = 0.2f; } static Args ParseArgs(string[] args) { var result = new Args(); for (int i = 0; i < args.Length; i++) { switch (args[i]) { case "--use-gpu": result.UseGpu = true; break; case "--batch": result.Batch = true; break; case "--onnx-dir" when i + 1 < args.Length: result.OnnxDir = args[++i]; break; case "--total-step" when i + 1 < args.Length: result.TotalStep = int.Parse(args[++i]); break; case "--speed" when i + 1 < args.Length: result.Speed = float.Parse(args[++i]); break; case "--n-test" when i + 1 < args.Length: result.NTest = int.Parse(args[++i]); break; case "--voice-style" when i + 1 < args.Length: result.VoiceStyle = args[++i].Split(',').ToList(); break; case "--text" when i + 1 < args.Length: result.Text = args[++i].Split('|').ToList(); break; case "--lang" when i + 1 < args.Length: result.Lang = args[++i].Split(',').ToList(); break; case "--save-dir" when i + 1 < args.Length: result.SaveDir = args[++i]; break; case "--seed" when i + 1 < args.Length: result.Seed = int.Parse(args[++i]); break; case "--pre-silence" when i + 1 < args.Length: result.PreSilence = float.Parse(args[++i]); break; } } return result; } static void Main(string[] args) { Console.WriteLine("=== TTS Inference with ONNX Runtime (C#) ===\n"); Console.WriteLine("sample seed : 371279630"); // --- 1. Parse arguments --- // var parsedArgs = ParseArgs(args); int totalStep = parsedArgs.TotalStep; float speed = parsedArgs.Speed; int nTest = parsedArgs.NTest; string saveDir = parsedArgs.SaveDir; var voiceStylePaths = parsedArgs.VoiceStyle; var textList = parsedArgs.Text; var langList = parsedArgs.Lang; bool batch = parsedArgs.Batch; if (voiceStylePaths.Count != textList.Count) { throw new ArgumentException( $"Number of voice styles ({voiceStylePaths.Count}) must match number of texts ({textList.Count})"); } int bsz = voiceStylePaths.Count; // --- 2. Load Text to Speech --- // var textToSpeech = Helper.LoadTextToSpeech(parsedArgs.OnnxDir, parsedArgs.UseGpu); Console.WriteLine(); // --- 3. Load Voice Style --- // var style = Helper.LoadVoiceStyle(voiceStylePaths, verbose: true); // --- 4. Synthesize speech --- // Random seedGenerator = new Random(); for (int n = 0; n < nTest; n++) { int currentSeed = parsedArgs.Seed ?? seedGenerator.Next(); Console.WriteLine($"\n[{n + 1}/{nTest}] Starting synthesis (Seed: {currentSeed})..."); var (wav, duration) = Helper.Timer("Generating speech from text", () => { if (batch) { return textToSpeech.Batch(textList, langList, style, totalStep, speed, currentSeed); } else { return textToSpeech.Call(textList[0], langList[0], style, totalStep, speed, seed: currentSeed); } }); if (!Directory.Exists(saveDir)) { Directory.CreateDirectory(saveDir); } for (int b = 0; b < bsz; b++) { string fname = $"{Helper.SanitizeFilename(textList[b], 20)}_{n + 1}_s{currentSeed}.wav"; int wavLen = (int)(textToSpeech.SampleRate * duration[b]); // --- Add Pre-Silence (Delay) --- // int silenceSamples = (int)(textToSpeech.SampleRate * parsedArgs.PreSilence); var wavOut = new float[wavLen + silenceSamples]; // The array is initialized to 0 by default, so we just copy the audio after the silence Array.Copy(wav, b * wav.Length / bsz, wavOut, silenceSamples, Math.Min(wavLen, wav.Length / bsz)); string outputPath = Path.Combine(saveDir, fname); Helper.WriteWavFile(outputPath, wavOut, textToSpeech.SampleRate); Console.WriteLine($"Saved: {outputPath}"); // --- Play the generated audio --- // try { using (var player = new SoundPlayer(outputPath)) { Console.WriteLine("Playing audio..."); player.PlaySync(); } } catch (Exception ex) { Console.WriteLine($"Warning: Could not play audio. {ex.Message}"); } } } Console.WriteLine("\n=== Synthesis completed successfully! ==="); } } }