import Foundation import OnnxRuntimeBindings struct Args { var useGpu: Bool = false var onnxDir: String = "assets/onnx" var totalStep: Int = 5 var speed: Float = 1.05 var nTest: Int = 4 var voiceStyle: [String] = ["assets/voice_styles/M1.json"] var text: [String] = ["This morning, I took a walk in the park, and the sound of the birds and the breeze was so pleasant that I stopped for a long time just to listen."] var lang: [String] = ["en"] var saveDir: String = "results" var batch: Bool = false } func parseArgs() -> Args { var args = Args() let arguments = CommandLine.arguments var i = 1 while i < arguments.count { let arg = arguments[i] switch arg { case "--use-gpu": args.useGpu = true case "--onnx-dir": if i + 1 < arguments.count { args.onnxDir = arguments[i + 1] i += 1 } case "--total-step": if i + 1 < arguments.count { args.totalStep = Int(arguments[i + 1]) ?? 5 i += 1 } case "--speed": if i + 1 < arguments.count { args.speed = Float(arguments[i + 1]) ?? 1.05 i += 1 } case "--n-test": if i + 1 < arguments.count { args.nTest = Int(arguments[i + 1]) ?? 4 i += 1 } case "--voice-style": if i + 1 < arguments.count { args.voiceStyle = arguments[i + 1].components(separatedBy: ",") i += 1 } case "--text": if i + 1 < arguments.count { args.text = arguments[i + 1].components(separatedBy: "|") i += 1 } case "--lang": if i + 1 < arguments.count { args.lang = arguments[i + 1].components(separatedBy: ",") i += 1 } case "--save-dir": if i + 1 < arguments.count { args.saveDir = arguments[i + 1] i += 1 } case "--batch": args.batch = true default: break } i += 1 } return args } @main struct ExampleONNX { static func main() async { print("=== TTS Inference with ONNX Runtime (Swift) ===\n") // --- 1. Parse arguments --- // let args = parseArgs() if args.batch { guard args.voiceStyle.count == args.text.count else { print("Error: Number of voice styles (\(args.voiceStyle.count)) must match number of texts (\(args.text.count))") return } guard args.lang.count == args.text.count else { print("Error: Number of languages (\(args.lang.count)) must match number of texts (\(args.text.count))") return } } let bsz = args.voiceStyle.count do { let env = try ORTEnv(loggingLevel: .warning) // --- 2. Load TTS components --- // let textToSpeech = try loadTextToSpeech(args.onnxDir, args.useGpu, env) // --- 3. Load voice styles --- // let style = try loadVoiceStyle(args.voiceStyle, verbose: true) // --- 4. Synthesize speech --- // try? FileManager.default.createDirectory(atPath: args.saveDir, withIntermediateDirectories: true) for n in 0..