diff --git a/HMI/SubProject/tts/App.config b/HMI/SubProject/tts/App.config
new file mode 100644
index 0000000..e9aa520
--- /dev/null
+++ b/HMI/SubProject/tts/App.config
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/HMI/SubProject/tts/Helper.cs b/HMI/SubProject/tts/Helper.cs
new file mode 100644
index 0000000..d13293b
--- /dev/null
+++ b/HMI/SubProject/tts/Helper.cs
@@ -0,0 +1,889 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Text.Json;
+using System.Text.RegularExpressions;
+using Microsoft.ML.OnnxRuntime;
+using Microsoft.ML.OnnxRuntime.Tensors;
+
+namespace Supertonic.WinForms
+{
+ // Available languages for multilingual TTS
+ public static class Languages
+ {
+ public static readonly string[] Available = { "en", "ko", "es", "pt", "fr" };
+ }
+
+ // ============================================================================
+ // Configuration classes
+ // ============================================================================
+
+ public class Config
+ {
+ public AEConfig AE { get; set; } = null;
+ public TTLConfig TTL { get; set; } = null;
+
+ public class AEConfig
+ {
+ public int SampleRate { get; set; }
+ public int BaseChunkSize { get; set; }
+ }
+
+ public class TTLConfig
+ {
+ public int ChunkCompressFactor { get; set; }
+ public int LatentDim { get; set; }
+ }
+ }
+
+ // ============================================================================
+ // Style class
+ // ============================================================================
+
+ public class Style
+ {
+ public float[] Ttl { get; set; }
+ public long[] TtlShape { get; set; }
+ public float[] Dp { get; set; }
+ public long[] DpShape { get; set; }
+
+ public Style(float[] ttl, long[] ttlShape, float[] dp, long[] dpShape)
+ {
+ Ttl = ttl;
+ TtlShape = ttlShape;
+ Dp = dp;
+ DpShape = dpShape;
+ }
+ }
+
+ // ============================================================================
+ // Unicode text processor
+ // ============================================================================
+
+ public class UnicodeProcessor
+ {
+ private readonly Dictionary _indexer;
+
+ public UnicodeProcessor(string unicodeIndexerPath)
+ {
+ var json = File.ReadAllText(unicodeIndexerPath);
+ var indexerArray = JsonSerializer.Deserialize(json) ?? throw new Exception("Failed to load indexer");
+ _indexer = new Dictionary();
+ for (int i = 0; i < indexerArray.Length; i++)
+ {
+ _indexer[i] = indexerArray[i];
+ }
+ }
+
+ private static string RemoveEmojis(string text)
+ {
+ var result = new StringBuilder();
+ for (int i = 0; i < text.Length; i++)
+ {
+ int codePoint;
+ if (char.IsHighSurrogate(text[i]) && i + 1 < text.Length && char.IsLowSurrogate(text[i + 1]))
+ {
+ // Get the full code point from surrogate pair
+ codePoint = char.ConvertToUtf32(text[i], text[i + 1]);
+ i++; // Skip the low surrogate
+ }
+ else
+ {
+ codePoint = text[i];
+ }
+
+ // Check if code point is in emoji ranges
+ bool isEmoji = (codePoint >= 0x1F600 && codePoint <= 0x1F64F) ||
+ (codePoint >= 0x1F300 && codePoint <= 0x1F5FF) ||
+ (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) ||
+ (codePoint >= 0x1F700 && codePoint <= 0x1F77F) ||
+ (codePoint >= 0x1F780 && codePoint <= 0x1F7FF) ||
+ (codePoint >= 0x1F800 && codePoint <= 0x1F8FF) ||
+ (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) ||
+ (codePoint >= 0x1FA00 && codePoint <= 0x1FA6F) ||
+ (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) ||
+ (codePoint >= 0x2600 && codePoint <= 0x26FF) ||
+ (codePoint >= 0x2700 && codePoint <= 0x27BF) ||
+ (codePoint >= 0x1F1E6 && codePoint <= 0x1F1FF);
+
+ if (!isEmoji)
+ {
+ if (codePoint > 0xFFFF)
+ {
+ // Add back as surrogate pair
+ result.Append(char.ConvertFromUtf32(codePoint));
+ }
+ else
+ {
+ result.Append((char)codePoint);
+ }
+ }
+ }
+ return result.ToString();
+ }
+
+ private string PreprocessText(string text, string lang)
+ {
+ // TODO: Need advanced normalizer for better performance
+ text = text.Normalize(NormalizationForm.FormKD);
+
+ // Remove emojis (wide Unicode range)
+ // C# doesn't support \u{...} syntax in regex, so we use character filtering instead
+ text = RemoveEmojis(text);
+
+ // Replace various dashes and symbols
+ var replacements = new Dictionary
+ {
+ {"–", "-"}, // en dash
+ {"‑", "-"}, // non-breaking hyphen
+ {"—", "-"}, // em dash
+ {"_", " "}, // underscore
+ {"\u201C", "\""}, // left double quote
+ {"\u201D", "\""}, // right double quote
+ {"\u2018", "'"}, // left single quote
+ {"\u2019", "'"}, // right single quote
+ {"´", "'"}, // acute accent
+ {"`", "'"}, // grave accent
+ {"[", " "}, // left bracket
+ {"]", " "}, // right bracket
+ {"|", " "}, // vertical bar
+ {"/", " "}, // slash
+ {"#", " "}, // hash
+ {"→", " "}, // right arrow
+ {"←", " "}, // left arrow
+ };
+
+ foreach (var kvp in replacements)
+ {
+ text = text.Replace(kvp.Key, kvp.Value);
+ }
+
+ // Remove special symbols
+ text = Regex.Replace(text, @"[♥☆♡©\\]", "");
+
+ // Replace known expressions
+ var exprReplacements = new Dictionary
+ {
+ {"@", " at "},
+ {"e.g.,", "for example, "},
+ {"i.e.,", "that is, "},
+ };
+
+ foreach (var kvp in exprReplacements)
+ {
+ text = text.Replace(kvp.Key, kvp.Value);
+ }
+
+ // Fix spacing around punctuation
+ text = Regex.Replace(text, @" ,", ",");
+ text = Regex.Replace(text, @" \.", ".");
+ text = Regex.Replace(text, @" !", "!");
+ text = Regex.Replace(text, @" \?", "?");
+ text = Regex.Replace(text, @" ;", ";");
+ text = Regex.Replace(text, @" :", ":");
+ text = Regex.Replace(text, @" '", "'");
+
+ // Remove duplicate quotes
+ while (text.Contains("\"\""))
+ {
+ text = text.Replace("\"\"", "\"");
+ }
+ while (text.Contains("''"))
+ {
+ text = text.Replace("''", "'");
+ }
+ while (text.Contains("``"))
+ {
+ text = text.Replace("``", "`");
+ }
+
+ // Remove extra spaces
+ text = Regex.Replace(text, @"\s+", " ").Trim();
+
+ // If text doesn't end with punctuation, quotes, or closing brackets, add a period
+ if (!Regex.IsMatch(text, @"[.!?;:,'\u0022\u201C\u201D\u2018\u2019)\]}…。」』】〉》›»]$"))
+ {
+ text += ".";
+ }
+
+ // Validate language
+ if (!Languages.Available.Contains(lang))
+ {
+ throw new ArgumentException($"Invalid language: {lang}. Available: {string.Join(", ", Languages.Available)}");
+ }
+
+ // Wrap text with language tags
+ text = $"<{lang}>" + text + $"{lang}>";
+
+ return text;
+ }
+
+ private int[] TextToUnicodeValues(string text)
+ {
+ return text.Select(c => (int)c).ToArray();
+ }
+
+ private float[][][] GetTextMask(long[] textIdsLengths)
+ {
+ return Helper.LengthToMask(textIdsLengths);
+ }
+
+ public (long[][] textIds, float[][][] textMask) Call(List textList, List langList)
+ {
+ var processedTexts = textList.Select((t, i) => PreprocessText(t, langList[i])).ToList();
+ var textIdsLengths = processedTexts.Select(t => (long)t.Length).ToArray();
+ long maxLen = textIdsLengths.Max();
+
+ var textIds = new long[textList.Count][];
+ for (int i = 0; i < processedTexts.Count; i++)
+ {
+ textIds[i] = new long[maxLen];
+ var unicodeVals = TextToUnicodeValues(processedTexts[i]);
+ for (int j = 0; j < unicodeVals.Length; j++)
+ {
+ if (_indexer.TryGetValue(unicodeVals[j], out long val))
+ {
+ textIds[i][j] = val;
+ }
+ }
+ }
+
+ var textMask = GetTextMask(textIdsLengths);
+ return (textIds, textMask);
+ }
+ }
+
+ // ============================================================================
+ // TextToSpeech class
+ // ============================================================================
+
+ public class TextToSpeech
+ {
+ private readonly Config _cfgs;
+ private readonly UnicodeProcessor _textProcessor;
+ private readonly InferenceSession _dpOrt;
+ private readonly InferenceSession _textEncOrt;
+ private readonly InferenceSession _vectorEstOrt;
+ private readonly InferenceSession _vocoderOrt;
+ public readonly int SampleRate;
+ private readonly int _baseChunkSize;
+ private readonly int _chunkCompressFactor;
+ private readonly int _ldim;
+
+ public TextToSpeech(
+ Config cfgs,
+ UnicodeProcessor textProcessor,
+ InferenceSession dpOrt,
+ InferenceSession textEncOrt,
+ InferenceSession vectorEstOrt,
+ InferenceSession vocoderOrt)
+ {
+ _cfgs = cfgs;
+ _textProcessor = textProcessor;
+ _dpOrt = dpOrt;
+ _textEncOrt = textEncOrt;
+ _vectorEstOrt = vectorEstOrt;
+ _vocoderOrt = vocoderOrt;
+ SampleRate = cfgs.AE.SampleRate;
+ _baseChunkSize = cfgs.AE.BaseChunkSize;
+ _chunkCompressFactor = cfgs.TTL.ChunkCompressFactor;
+ _ldim = cfgs.TTL.LatentDim;
+ }
+
+ private (float[][][] noisyLatent, float[][][] latentMask) SampleNoisyLatent(float[] duration)
+ {
+ int bsz = duration.Length;
+ float wavLenMax = duration.Max() * SampleRate;
+ var wavLengths = duration.Select(d => (long)(d * SampleRate)).ToArray();
+ int chunkSize = _baseChunkSize * _chunkCompressFactor;
+ int latentLen = (int)((wavLenMax + chunkSize - 1) / chunkSize);
+ int latentDim = _ldim * _chunkCompressFactor;
+
+ // Generate random noise
+ var random = new Random();
+ var noisyLatent = new float[bsz][][];
+ for (int b = 0; b < bsz; b++)
+ {
+ noisyLatent[b] = new float[latentDim][];
+ for (int d = 0; d < latentDim; d++)
+ {
+ noisyLatent[b][d] = new float[latentLen];
+ for (int t = 0; t < latentLen; t++)
+ {
+ // Box-Muller transform for normal distribution
+ double u1 = 1.0 - random.NextDouble();
+ double u2 = 1.0 - random.NextDouble();
+ noisyLatent[b][d][t] = (float)(Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Cos(2.0 * Math.PI * u2));
+ }
+ }
+ }
+
+ var latentMask = Helper.GetLatentMask(wavLengths, _baseChunkSize, _chunkCompressFactor);
+
+ // Apply mask
+ for (int b = 0; b < bsz; b++)
+ {
+ for (int d = 0; d < latentDim; d++)
+ {
+ for (int t = 0; t < latentLen; t++)
+ {
+ noisyLatent[b][d][t] *= latentMask[b][0][t];
+ }
+ }
+ }
+
+ return (noisyLatent, latentMask);
+ }
+
+ private (float[] wav, float[] duration) _Infer(List textList, List langList, Style style, int totalStep, float speed = 1.05f)
+ {
+ int bsz = textList.Count;
+ if (bsz != style.TtlShape[0])
+ {
+ throw new ArgumentException("Number of texts must match number of style vectors");
+ }
+
+ // Process text
+ var (textIds, textMask) = _textProcessor.Call(textList, langList);
+ var textIdsShape = new long[] { bsz, textIds[0].Length };
+ var textMaskShape = new long[] { bsz, 1, textMask[0][0].Length };
+
+ var textIdsTensor = Helper.IntArrayToTensor(textIds, textIdsShape);
+ var textMaskTensor = Helper.ArrayToTensor(textMask, textMaskShape);
+
+ var styleTtlTensor = new DenseTensor(style.Ttl, style.TtlShape.Select(x => (int)x).ToArray());
+ var styleDpTensor = new DenseTensor(style.Dp, style.DpShape.Select(x => (int)x).ToArray());
+
+ // Run duration predictor
+ var dpInputs = new List
+ {
+ NamedOnnxValue.CreateFromTensor("text_ids", textIdsTensor),
+ NamedOnnxValue.CreateFromTensor("style_dp", styleDpTensor),
+ NamedOnnxValue.CreateFromTensor("text_mask", textMaskTensor)
+ };
+ using (var dpOutputs = _dpOrt.Run(dpInputs))
+ {
+ var durOnnx = dpOutputs.First(o => o.Name == "duration").AsTensor().ToArray();
+
+ // Apply speed factor to duration
+ for (int i = 0; i < durOnnx.Length; i++)
+ {
+ durOnnx[i] /= speed;
+ }
+
+ var textEncInputs = new List
+ {
+ NamedOnnxValue.CreateFromTensor("text_ids", textIdsTensor),
+ NamedOnnxValue.CreateFromTensor("style_ttl", styleTtlTensor),
+ NamedOnnxValue.CreateFromTensor("text_mask", textMaskTensor)
+ };
+ using (var textEncOutputs = _textEncOrt.Run(textEncInputs))
+ {
+ var textEmbTensor = textEncOutputs.First(o => o.Name == "text_emb").AsTensor();
+ // Sample noisy latent
+ var (xt, latentMask) = SampleNoisyLatent(durOnnx);
+ var latentShape = new long[] { bsz, xt[0].Length, xt[0][0].Length };
+ var latentMaskShape = new long[] { bsz, 1, latentMask[0][0].Length };
+
+ var totalStepArray = Enumerable.Repeat((float)totalStep, bsz).ToArray();
+
+ // Iterative denoising
+ for (int step = 0; step < totalStep; step++)
+ {
+ var currentStepArray = Enumerable.Repeat((float)step, bsz).ToArray();
+
+ var vectorEstInputs = new List
+ {
+ NamedOnnxValue.CreateFromTensor("noisy_latent", Helper.ArrayToTensor(xt, latentShape)),
+ NamedOnnxValue.CreateFromTensor("text_emb", textEmbTensor),
+ NamedOnnxValue.CreateFromTensor("style_ttl", styleTtlTensor),
+ NamedOnnxValue.CreateFromTensor("text_mask", textMaskTensor),
+ NamedOnnxValue.CreateFromTensor("latent_mask", Helper.ArrayToTensor(latentMask, latentMaskShape)),
+ NamedOnnxValue.CreateFromTensor("total_step", new DenseTensor(totalStepArray, new int[] { bsz })),
+ NamedOnnxValue.CreateFromTensor("current_step", new DenseTensor(currentStepArray, new int[] { bsz }))
+ };
+
+ using (var vectorEstOutputs = _vectorEstOrt.Run(vectorEstInputs))
+ {
+ var denoisedLatent = vectorEstOutputs.First(o => o.Name == "denoised_latent").AsTensor();
+
+ // Update xt
+ int idx = 0;
+ for (int b = 0; b < bsz; b++)
+ {
+ for (int d = 0; d < xt[b].Length; d++)
+ {
+ for (int t = 0; t < xt[b][d].Length; t++)
+ {
+ xt[b][d][t] = denoisedLatent.GetValue(idx++);
+ }
+ }
+ }
+ }
+
+ }
+
+ // Run vocoder
+ var vocoderInputs = new List
+ {
+ NamedOnnxValue.CreateFromTensor("latent", Helper.ArrayToTensor(xt, latentShape))
+ };
+ using (var vocoderOutputs = _vocoderOrt.Run(vocoderInputs))
+ {
+ var wavTensor = vocoderOutputs.First(o => o.Name == "wav_tts").AsTensor();
+
+ return (wavTensor.ToArray(), durOnnx);
+ }
+
+
+ }
+ }
+
+
+ // Run text encoder
+
+
+
+
+ }
+
+ public (float[] wav, float[] duration) Call(string text, string lang, Style style, int totalStep, float speed = 1.05f, float silenceDuration = 0.3f)
+ {
+ if (style.TtlShape[0] != 1)
+ {
+ throw new ArgumentException("Single speaker text to speech only supports single style");
+ }
+
+ int maxLen = lang == "ko" ? 120 : 300;
+ var textList = Helper.ChunkText(text, maxLen);
+ var wavCat = new List();
+ float durCat = 0.0f;
+
+ foreach (var chunk in textList)
+ {
+ var (wav, duration) = _Infer(new List { chunk }, new List { lang }, style, totalStep, speed);
+
+ if (wavCat.Count == 0)
+ {
+ wavCat.AddRange(wav);
+ durCat = duration[0];
+ }
+ else
+ {
+ int silenceLen = (int)(silenceDuration * SampleRate);
+ var silence = new float[silenceLen];
+ wavCat.AddRange(silence);
+ wavCat.AddRange(wav);
+ durCat += duration[0] + silenceDuration;
+ }
+ }
+
+ return (wavCat.ToArray(), new float[] { durCat });
+ }
+
+ public (float[] wav, float[] duration) Batch(List textList, List langList, Style style, int totalStep, float speed = 1.05f)
+ {
+ return _Infer(textList, langList, style, totalStep, speed);
+ }
+ }
+
+ // ============================================================================
+ // Helper class with utility functions
+ // ============================================================================
+
+ public static class Helper
+ {
+ // ============================================================================
+ // Utility functions
+ // ============================================================================
+
+ public static float[][][] LengthToMask(long[] lengths, long maxLen = -1)
+ {
+ if (maxLen == -1)
+ {
+ maxLen = lengths.Max();
+ }
+
+ var mask = new float[lengths.Length][][];
+ for (int i = 0; i < lengths.Length; i++)
+ {
+ mask[i] = new float[1][];
+ mask[i][0] = new float[maxLen];
+ for (int j = 0; j < maxLen; j++)
+ {
+ mask[i][0][j] = j < lengths[i] ? 1.0f : 0.0f;
+ }
+ }
+ return mask;
+ }
+
+ public static float[][][] GetLatentMask(long[] wavLengths, int baseChunkSize, int chunkCompressFactor)
+ {
+ int latentSize = baseChunkSize * chunkCompressFactor;
+ var latentLengths = wavLengths.Select(len => (len + latentSize - 1) / latentSize).ToArray();
+ return LengthToMask(latentLengths);
+ }
+
+ // ============================================================================
+ // ONNX model loading
+ // ============================================================================
+
+ public static InferenceSession LoadOnnx(string onnxPath, SessionOptions opts)
+ {
+ return new InferenceSession(onnxPath, opts);
+ }
+
+ public static (InferenceSession dp, InferenceSession textEnc, InferenceSession vectorEst, InferenceSession vocoder)
+ LoadOnnxAll(string onnxDir, SessionOptions opts)
+ {
+ var dpPath = Path.Combine(onnxDir, "duration_predictor.onnx");
+ var textEncPath = Path.Combine(onnxDir, "text_encoder.onnx");
+ var vectorEstPath = Path.Combine(onnxDir, "vector_estimator.onnx");
+ var vocoderPath = Path.Combine(onnxDir, "vocoder.onnx");
+
+ return (
+ LoadOnnx(dpPath, opts),
+ LoadOnnx(textEncPath, opts),
+ LoadOnnx(vectorEstPath, opts),
+ LoadOnnx(vocoderPath, opts)
+ );
+ }
+
+ // ============================================================================
+ // Configuration loading
+ // ============================================================================
+
+ public static Config LoadCfgs(string onnxDir)
+ {
+ var cfgPath = Path.Combine(onnxDir, "tts.json");
+ var json = File.ReadAllText(cfgPath);
+
+ using (var doc = JsonDocument.Parse(json))
+ {
+ var root = doc.RootElement;
+
+ return new Config
+ {
+ AE = new Config.AEConfig
+ {
+ SampleRate = root.GetProperty("ae").GetProperty("sample_rate").GetInt32(),
+ BaseChunkSize = root.GetProperty("ae").GetProperty("base_chunk_size").GetInt32()
+ },
+ TTL = new Config.TTLConfig
+ {
+ ChunkCompressFactor = root.GetProperty("ttl").GetProperty("chunk_compress_factor").GetInt32(),
+ LatentDim = root.GetProperty("ttl").GetProperty("latent_dim").GetInt32()
+ }
+ };
+ }
+
+ }
+
+ public static UnicodeProcessor LoadTextProcessor(string onnxDir)
+ {
+ var unicodeIndexerPath = Path.Combine(onnxDir, "unicode_indexer.json");
+ return new UnicodeProcessor(unicodeIndexerPath);
+ }
+
+ // ============================================================================
+ // Voice style loading
+ // ============================================================================
+
+ public static Style LoadVoiceStyle(List voiceStylePaths, bool verbose = false)
+ {
+ int bsz = voiceStylePaths.Count;
+
+ // Read first file to get dimensions
+ var firstJson = File.ReadAllText(voiceStylePaths[0]);
+ using (var firstDoc = JsonDocument.Parse(firstJson))
+ {
+ var firstRoot = firstDoc.RootElement;
+
+ var ttlDims = ParseInt64Array(firstRoot.GetProperty("style_ttl").GetProperty("dims"));
+ var dpDims = ParseInt64Array(firstRoot.GetProperty("style_dp").GetProperty("dims"));
+
+ long ttlDim1 = ttlDims[1];
+ long ttlDim2 = ttlDims[2];
+ long dpDim1 = dpDims[1];
+ long dpDim2 = dpDims[2];
+
+ // Pre-allocate arrays with full batch size
+ int ttlSize = (int)(bsz * ttlDim1 * ttlDim2);
+ int dpSize = (int)(bsz * dpDim1 * dpDim2);
+ var ttlFlat = new float[ttlSize];
+ var dpFlat = new float[dpSize];
+
+ // Fill in the data
+ for (int i = 0; i < bsz; i++)
+ {
+ var json = File.ReadAllText(voiceStylePaths[i]);
+ using (var doc = JsonDocument.Parse(json))
+ {
+ var root = doc.RootElement;
+
+ // Flatten data
+ var ttlData3D = ParseFloat3DArray(root.GetProperty("style_ttl").GetProperty("data"));
+ var ttlDataFlat = new List();
+ foreach (var batch in ttlData3D)
+ {
+ foreach (var row in batch)
+ {
+ ttlDataFlat.AddRange(row);
+ }
+ }
+
+ var dpData3D = ParseFloat3DArray(root.GetProperty("style_dp").GetProperty("data"));
+ var dpDataFlat = new List();
+ foreach (var batch in dpData3D)
+ {
+ foreach (var row in batch)
+ {
+ dpDataFlat.AddRange(row);
+ }
+ }
+
+ // Copy to pre-allocated array
+ int ttlOffset = (int)(i * ttlDim1 * ttlDim2);
+ ttlDataFlat.CopyTo(ttlFlat, ttlOffset);
+
+ int dpOffset = (int)(i * dpDim1 * dpDim2);
+ dpDataFlat.CopyTo(dpFlat, dpOffset);
+ }
+
+ }
+
+ var ttlShape = new long[] { bsz, ttlDim1, ttlDim2 };
+ var dpShape = new long[] { bsz, dpDim1, dpDim2 };
+
+ if (verbose)
+ {
+ Console.WriteLine($"Loaded {bsz} voice styles");
+ }
+
+ return new Style(ttlFlat, ttlShape, dpFlat, dpShape);
+ }
+
+ }
+
+ private static float[][][] ParseFloat3DArray(JsonElement element)
+ {
+ var result = new List();
+ foreach (var batch in element.EnumerateArray())
+ {
+ var batch2D = new List();
+ foreach (var row in batch.EnumerateArray())
+ {
+ var rowData = new List();
+ foreach (var val in row.EnumerateArray())
+ {
+ rowData.Add(val.GetSingle());
+ }
+ batch2D.Add(rowData.ToArray());
+ }
+ result.Add(batch2D.ToArray());
+ }
+ return result.ToArray();
+ }
+
+ private static long[] ParseInt64Array(JsonElement element)
+ {
+ var result = new List();
+ foreach (var val in element.EnumerateArray())
+ {
+ result.Add(val.GetInt64());
+ }
+ return result.ToArray();
+ }
+
+ // ============================================================================
+ // TextToSpeech loading
+ // ============================================================================
+
+ public static TextToSpeech LoadTextToSpeech(string onnxDir, bool useGpu = false)
+ {
+ var opts = new SessionOptions();
+ if (useGpu)
+ {
+ throw new NotImplementedException("GPU mode is not supported yet");
+ }
+ else
+ {
+ Console.WriteLine("Using CPU for inference");
+ }
+
+ var cfgs = LoadCfgs(onnxDir);
+ var (dpOrt, textEncOrt, vectorEstOrt, vocoderOrt) = LoadOnnxAll(onnxDir, opts);
+ var textProcessor = LoadTextProcessor(onnxDir);
+
+ return new TextToSpeech(cfgs, textProcessor, dpOrt, textEncOrt, vectorEstOrt, vocoderOrt);
+ }
+
+ // ============================================================================
+ // WAV file writing
+ // ============================================================================
+
+ public static void WriteWavFile(string filename, float[] audioData, int sampleRate)
+ {
+ using (var writer = new BinaryWriter(File.Open(filename, FileMode.Create)))
+ {
+ int numChannels = 1;
+ int bitsPerSample = 16;
+ int byteRate = sampleRate * numChannels * bitsPerSample / 8;
+ short blockAlign = (short)(numChannels * bitsPerSample / 8);
+ int dataSize = audioData.Length * bitsPerSample / 8;
+
+ // RIFF header
+ writer.Write(Encoding.ASCII.GetBytes("RIFF"));
+ writer.Write(36 + dataSize);
+ writer.Write(Encoding.ASCII.GetBytes("WAVE"));
+
+ // fmt chunk
+ writer.Write(Encoding.ASCII.GetBytes("fmt "));
+ writer.Write(16); // fmt chunk size
+ writer.Write((short)1); // audio format (PCM)
+ writer.Write((short)numChannels);
+ writer.Write(sampleRate);
+ writer.Write(byteRate);
+ writer.Write(blockAlign);
+ writer.Write((short)bitsPerSample);
+
+ // data chunk
+ writer.Write(Encoding.ASCII.GetBytes("data"));
+ writer.Write(dataSize);
+
+ // Write audio data
+ foreach (var sample in audioData)
+ {
+ float clamped = Math.Max(-1.0f, Math.Min(1.0f, sample));
+ short intSample = (short)(clamped * 32767);
+ writer.Write(intSample);
+ }
+ }
+
+
+ }
+
+ // ============================================================================
+ // Tensor conversion utilities
+ // ============================================================================
+
+ public static DenseTensor ArrayToTensor(float[][][] array, long[] dims)
+ {
+ var flat = new List();
+ foreach (var batch in array)
+ {
+ foreach (var row in batch)
+ {
+ flat.AddRange(row);
+ }
+ }
+ return new DenseTensor(flat.ToArray(), dims.Select(x => (int)x).ToArray());
+ }
+
+ public static DenseTensor IntArrayToTensor(long[][] array, long[] dims)
+ {
+ var flat = new List();
+ foreach (var row in array)
+ {
+ flat.AddRange(row);
+ }
+ return new DenseTensor(flat.ToArray(), dims.Select(x => (int)x).ToArray());
+ }
+
+ // ============================================================================
+ // Timer utility
+ // ============================================================================
+
+ public static T Timer(string name, Func func)
+ {
+ var start = DateTime.Now;
+ Console.WriteLine($"{name}...");
+ var result = func();
+ var elapsed = (DateTime.Now - start).TotalSeconds;
+ Console.WriteLine($" -> {name} completed in {elapsed:F2} sec");
+ return result;
+ }
+
+ public static string SanitizeFilename(string text, int maxLen)
+ {
+ var result = new StringBuilder();
+ int count = 0;
+ foreach (char c in text)
+ {
+ if (count >= maxLen) break;
+ if (char.IsLetterOrDigit(c))
+ {
+ result.Append(c);
+ }
+ else
+ {
+ result.Append('_');
+ }
+ count++;
+ }
+ return result.ToString();
+ }
+
+ // ============================================================================
+ // Chunk text
+ // ============================================================================
+
+ public static List ChunkText(string text, int maxLen = 300)
+ {
+ var chunks = new List();
+
+ // Split by paragraph (two or more newlines)
+ var paragraphRegex = new Regex(@"\n\s*\n+");
+ var paragraphs = paragraphRegex.Split(text.Trim())
+ .Select(p => p.Trim())
+ .Where(p => !string.IsNullOrEmpty(p))
+ .ToList();
+
+ // Split by sentence boundaries, excluding abbreviations
+ var sentenceRegex = new Regex(@"(?
+ /// The main entry point for the application.
+ ///
+ [STAThread]
+ static void Main()
+ {
+ Application.EnableVisualStyles();
+ Application.SetCompatibleTextRenderingDefault(false);
+ Application.Run(new fMain());
+ }
+ }
+}
diff --git a/HMI/SubProject/tts/Properties/AssemblyInfo.cs b/HMI/SubProject/tts/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..10e3f4f
--- /dev/null
+++ b/HMI/SubProject/tts/Properties/AssemblyInfo.cs
@@ -0,0 +1,17 @@
+using System.Reflection;
+using System.Runtime.InteropServices;
+
+[assembly: AssemblyTitle("Supertonic.WinForms")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Supertonic.WinForms")]
+[assembly: AssemblyCopyright("Copyright © 2026")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+[assembly: ComVisible(false)]
+[assembly: Guid("bd3e8373-c40c-4f7f-aa18-6990f1cfd21a")]
+
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
diff --git a/HMI/SubProject/tts/Properties/Resources.Designer.cs b/HMI/SubProject/tts/Properties/Resources.Designer.cs
new file mode 100644
index 0000000..87457d8
--- /dev/null
+++ b/HMI/SubProject/tts/Properties/Resources.Designer.cs
@@ -0,0 +1,63 @@
+//------------------------------------------------------------------------------
+//
+// 이 코드는 도구를 사용하여 생성되었습니다.
+// 런타임 버전:4.0.30319.42000
+//
+// 파일 내용을 변경하면 잘못된 동작이 발생할 수 있으며, 코드를 다시 생성하면
+// 이러한 변경 내용이 손실됩니다.
+//
+//------------------------------------------------------------------------------
+
+namespace Supertonic.WinForms.Properties {
+ using System;
+
+
+ ///
+ /// 지역화된 문자열 등을 찾기 위한 강력한 형식의 리소스 클래스입니다.
+ ///
+ // 이 클래스는 ResGen 또는 Visual Studio와 같은 도구를 통해 StronglyTypedResourceBuilder
+ // 클래스에서 자동으로 생성되었습니다.
+ // 멤버를 추가하거나 제거하려면 .ResX 파일을 편집한 다음 /str 옵션을 사용하여 ResGen을
+ // 다시 실행하거나 VS 프로젝트를 다시 빌드하십시오.
+ [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
+ [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
+ [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
+ internal class Resources {
+
+ private static global::System.Resources.ResourceManager resourceMan;
+
+ private static global::System.Globalization.CultureInfo resourceCulture;
+
+ [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
+ internal Resources() {
+ }
+
+ ///
+ /// 이 클래스에서 사용하는 캐시된 ResourceManager 인스턴스를 반환합니다.
+ ///
+ [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
+ internal static global::System.Resources.ResourceManager ResourceManager {
+ get {
+ if (object.ReferenceEquals(resourceMan, null)) {
+ global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Supertonic.WinForms.Properties.Resources", typeof(Resources).Assembly);
+ resourceMan = temp;
+ }
+ return resourceMan;
+ }
+ }
+
+ ///
+ /// 이 강력한 형식의 리소스 클래스를 사용하여 모든 리소스 조회에 대한 현재 스레드의 CurrentUICulture
+ /// 속성을 재정의합니다.
+ ///
+ [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
+ internal static global::System.Globalization.CultureInfo Culture {
+ get {
+ return resourceCulture;
+ }
+ set {
+ resourceCulture = value;
+ }
+ }
+ }
+}
diff --git a/HMI/SubProject/tts/Properties/Resources.resx b/HMI/SubProject/tts/Properties/Resources.resx
new file mode 100644
index 0000000..4fdb1b6
--- /dev/null
+++ b/HMI/SubProject/tts/Properties/Resources.resx
@@ -0,0 +1,101 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text/microsoft-resx
+
+
+ 1.3
+
+
+ System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.3500.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
+
+
+ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.3500.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
+
+
\ No newline at end of file
diff --git a/HMI/SubProject/tts/Properties/Settings.Designer.cs b/HMI/SubProject/tts/Properties/Settings.Designer.cs
new file mode 100644
index 0000000..405de7e
--- /dev/null
+++ b/HMI/SubProject/tts/Properties/Settings.Designer.cs
@@ -0,0 +1,26 @@
+//------------------------------------------------------------------------------
+//
+// 이 코드는 도구를 사용하여 생성되었습니다.
+// 런타임 버전:4.0.30319.42000
+//
+// 파일 내용을 변경하면 잘못된 동작이 발생할 수 있으며, 코드를 다시 생성하면
+// 이러한 변경 내용이 손실됩니다.
+//
+//------------------------------------------------------------------------------
+
+namespace Supertonic.WinForms.Properties {
+
+
+ [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
+ [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "15.9.0.0")]
+ internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
+
+ private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
+
+ public static Settings Default {
+ get {
+ return defaultInstance;
+ }
+ }
+ }
+}
diff --git a/HMI/SubProject/tts/Properties/Settings.settings b/HMI/SubProject/tts/Properties/Settings.settings
new file mode 100644
index 0000000..049245f
--- /dev/null
+++ b/HMI/SubProject/tts/Properties/Settings.settings
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/HMI/SubProject/tts/Supertonic.WinForms.csproj b/HMI/SubProject/tts/Supertonic.WinForms.csproj
new file mode 100644
index 0000000..7329d16
--- /dev/null
+++ b/HMI/SubProject/tts/Supertonic.WinForms.csproj
@@ -0,0 +1,91 @@
+
+
+
+
+ Debug
+ AnyCPU
+ {BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}
+ WinExe
+ Supertonic.WinForms
+ Supertonic.WinForms
+ v4.8
+ 512
+ true
+ true
+
+
+ x64
+ true
+ full
+ false
+ bin\Debug\
+ DEBUG;TRACE
+ prompt
+ 4
+
+
+ AnyCPU
+ pdbonly
+ true
+ bin\Release\
+ TRACE
+ prompt
+ 4
+
+
+ bin\Debug\
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Form
+
+
+ fMain.cs
+
+
+
+
+
+ True
+ True
+ Resources.resx
+
+
+ True
+ True
+ Settings.settings
+
+
+ fMain.cs
+
+
+ ResXFileCodeGenerator
+ Resources.Designer.cs
+
+
+
+ SettingsSingleFileGenerator
+ Settings.Designer.cs
+
+
+ 1.24.1
+
+
+ 10.0.2
+
+
+
+
\ No newline at end of file
diff --git a/HMI/SubProject/tts/Supertonic.WinForms.sln b/HMI/SubProject/tts/Supertonic.WinForms.sln
new file mode 100644
index 0000000..8b7fc0d
--- /dev/null
+++ b/HMI/SubProject/tts/Supertonic.WinForms.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Express 15 for Windows Desktop
+VisualStudioVersion = 15.0.36324.19
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Supertonic.WinForms", "Supertonic.WinForms.csproj", "{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Release|Any CPU.Build.0 = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {1DFF2850-1C17-454F-BB29-E08B604073C4}
+ EndGlobalSection
+EndGlobal
diff --git a/HMI/SubProject/tts/fMain.Designer.cs b/HMI/SubProject/tts/fMain.Designer.cs
new file mode 100644
index 0000000..dcb6541
--- /dev/null
+++ b/HMI/SubProject/tts/fMain.Designer.cs
@@ -0,0 +1,187 @@
+namespace Supertonic.WinForms
+{
+ partial class fMain
+ {
+ private System.ComponentModel.IContainer components = null;
+
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing && (components != null))
+ {
+ components.Dispose();
+ }
+ base.Dispose(disposing);
+ }
+
+ #region Windows Form Designer generated code
+
+ private void InitializeComponent()
+ {
+ this.txtInput = new System.Windows.Forms.TextBox();
+ this.btnGenerate = new System.Windows.Forms.Button();
+ this.cmbLang = new System.Windows.Forms.ComboBox();
+ this.txtStylePath = new System.Windows.Forms.TextBox();
+ this.numSteps = new System.Windows.Forms.NumericUpDown();
+ this.numSpeed = new System.Windows.Forms.NumericUpDown();
+ this.lblText = new System.Windows.Forms.Label();
+ this.lblLang = new System.Windows.Forms.Label();
+ this.lblStyle = new System.Windows.Forms.Label();
+ this.lblSteps = new System.Windows.Forms.Label();
+ this.lblSpeed = new System.Windows.Forms.Label();
+ this.txtLog = new System.Windows.Forms.TextBox();
+ ((System.ComponentModel.ISupportInitialize)(this.numSteps)).BeginInit();
+ ((System.ComponentModel.ISupportInitialize)(this.numSpeed)).BeginInit();
+ this.SuspendLayout();
+ //
+ // txtInput
+ //
+ this.txtInput.Location = new System.Drawing.Point(12, 29);
+ this.txtInput.Multiline = true;
+ this.txtInput.Name = "txtInput";
+ this.txtInput.Size = new System.Drawing.Size(460, 60);
+ this.txtInput.TabIndex = 0;
+ this.txtInput.Text = "This morning, I took a walk in the park.";
+ //
+ // btnGenerate
+ //
+ this.btnGenerate.Location = new System.Drawing.Point(372, 169);
+ this.btnGenerate.Name = "btnGenerate";
+ this.btnGenerate.Size = new System.Drawing.Size(100, 30);
+ this.btnGenerate.TabIndex = 1;
+ this.btnGenerate.Text = "Generate TTS";
+ this.btnGenerate.UseVisualStyleBackColor = true;
+ this.btnGenerate.Click += new System.EventHandler(this.btnGenerate_Click);
+ //
+ // cmbLang
+ //
+ this.cmbLang.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
+ this.cmbLang.FormattingEnabled = true;
+ this.cmbLang.Location = new System.Drawing.Point(12, 114);
+ this.cmbLang.Name = "cmbLang";
+ this.cmbLang.Size = new System.Drawing.Size(80, 21);
+ this.cmbLang.TabIndex = 2;
+ //
+ // txtStylePath
+ //
+ this.txtStylePath.Location = new System.Drawing.Point(110, 114);
+ this.txtStylePath.Name = "txtStylePath";
+ this.txtStylePath.Size = new System.Drawing.Size(362, 20);
+ this.txtStylePath.TabIndex = 3;
+ this.txtStylePath.Text = "assets/voice_styles/M1.json";
+ //
+ // numSteps
+ //
+ this.numSteps.Location = new System.Drawing.Point(12, 169);
+ this.numSteps.Name = "numSteps";
+ this.numSteps.Size = new System.Drawing.Size(80, 20);
+ this.numSteps.TabIndex = 4;
+ this.numSteps.Value = new decimal(new int[] { 5, 0, 0, 0 });
+ //
+ // numSpeed
+ //
+ this.numSpeed.DecimalPlaces = 2;
+ this.numSpeed.Increment = new decimal(new int[] { 5, 0, 0, 131072 });
+ this.numSpeed.Location = new System.Drawing.Point(110, 169);
+ this.numSpeed.Name = "numSpeed";
+ this.numSpeed.Size = new System.Drawing.Size(80, 20);
+ this.numSpeed.TabIndex = 5;
+ this.numSpeed.Value = new decimal(new int[] { 105, 0, 0, 131072 });
+ //
+ // lblText
+ //
+ this.lblText.AutoSize = true;
+ this.lblText.Location = new System.Drawing.Point(12, 13);
+ this.lblText.Name = "lblText";
+ this.lblText.Size = new System.Drawing.Size(28, 13);
+ this.lblText.TabIndex = 6;
+ this.lblText.Text = "Text";
+ //
+ // lblLang
+ //
+ this.lblLang.AutoSize = true;
+ this.lblLang.Location = new System.Drawing.Point(12, 98);
+ this.lblLang.Name = "lblLang";
+ this.lblLang.Size = new System.Drawing.Size(55, 13);
+ this.lblLang.TabIndex = 7;
+ this.lblLang.Text = "Language";
+ //
+ // lblStyle
+ //
+ this.lblStyle.AutoSize = true;
+ this.lblStyle.Location = new System.Drawing.Point(110, 98);
+ this.lblStyle.Name = "lblStyle";
+ this.lblStyle.Size = new System.Drawing.Size(87, 13);
+ this.lblStyle.TabIndex = 8;
+ this.lblStyle.Text = "Voice Style Path";
+ //
+ // lblSteps
+ //
+ this.lblSteps.AutoSize = true;
+ this.lblSteps.Location = new System.Drawing.Point(12, 153);
+ this.lblSteps.Name = "lblSteps";
+ this.lblSteps.Size = new System.Drawing.Size(61, 13);
+ this.lblSteps.TabIndex = 9;
+ this.lblSteps.Text = "Total Steps";
+ //
+ // lblSpeed
+ //
+ this.lblSpeed.AutoSize = true;
+ this.lblSpeed.Location = new System.Drawing.Point(110, 153);
+ this.lblSpeed.Name = "lblSpeed";
+ this.lblSpeed.Size = new System.Drawing.Size(38, 13);
+ this.lblSpeed.TabIndex = 10;
+ this.lblSpeed.Text = "Speed";
+ //
+ // txtLog
+ //
+ this.txtLog.Location = new System.Drawing.Point(12, 214);
+ this.txtLog.Multiline = true;
+ this.txtLog.Name = "txtLog";
+ this.txtLog.ReadOnly = true;
+ this.txtLog.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
+ this.txtLog.Size = new System.Drawing.Size(460, 150);
+ this.txtLog.TabIndex = 11;
+ //
+ // fMain
+ //
+ this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
+ this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
+ this.ClientSize = new System.Drawing.Size(484, 376);
+ this.Controls.Add(this.txtLog);
+ this.Controls.Add(this.lblSpeed);
+ this.Controls.Add(this.lblSteps);
+ this.Controls.Add(this.lblStyle);
+ this.Controls.Add(this.lblLang);
+ this.Controls.Add(this.lblText);
+ this.Controls.Add(this.numSpeed);
+ this.Controls.Add(this.numSteps);
+ this.Controls.Add(this.txtStylePath);
+ this.Controls.Add(this.cmbLang);
+ this.Controls.Add(this.btnGenerate);
+ this.Controls.Add(this.txtInput);
+ this.Name = "fMain";
+ this.Text = "Supertonic TTS (WinForms 4.8)";
+ this.Load += new System.EventHandler(this.fMain_Load);
+ ((System.ComponentModel.ISupportInitialize)(this.numSteps)).EndInit();
+ ((System.ComponentModel.ISupportInitialize)(this.numSpeed)).EndInit();
+ this.ResumeLayout(false);
+ this.PerformLayout();
+
+ }
+
+ #endregion
+
+ private System.Windows.Forms.TextBox txtInput;
+ private System.Windows.Forms.Button btnGenerate;
+ private System.Windows.Forms.ComboBox cmbLang;
+ private System.Windows.Forms.TextBox txtStylePath;
+ private System.Windows.Forms.NumericUpDown numSteps;
+ private System.Windows.Forms.NumericUpDown numSpeed;
+ private System.Windows.Forms.Label lblText;
+ private System.Windows.Forms.Label lblLang;
+ private System.Windows.Forms.Label lblStyle;
+ private System.Windows.Forms.Label lblSteps;
+ private System.Windows.Forms.Label lblSpeed;
+ private System.Windows.Forms.TextBox txtLog;
+ }
+}
diff --git a/HMI/SubProject/tts/fMain.cs b/HMI/SubProject/tts/fMain.cs
new file mode 100644
index 0000000..b14c7d2
--- /dev/null
+++ b/HMI/SubProject/tts/fMain.cs
@@ -0,0 +1,89 @@
+using System;
+using System.Collections.Generic;
+using System.Drawing;
+using System.IO;
+using System.Linq;
+using System.Windows.Forms;
+
+namespace Supertonic.WinForms
+{
+ public partial class fMain : Form
+ {
+ private TextToSpeech _tts;
+
+ public fMain()
+ {
+ InitializeComponent();
+ }
+
+ private async void btnGenerate_Click(object sender, EventArgs e)
+ {
+ try
+ {
+ string text = txtInput.Text;
+ string lang = cmbLang.SelectedItem?.ToString() ?? "en";
+ string stylePath = txtStylePath.Text;
+ int totalStep = (int)numSteps.Value;
+ float speed = (float)numSpeed.Value;
+
+ if (string.IsNullOrWhiteSpace(text))
+ {
+ MessageBox.Show("Please enter text.");
+ return;
+ }
+
+ if (_tts == null)
+ {
+ Log("Loading TTS model...");
+ string onnxDir = "assets/onnx"; // This should be updated if assets are moved
+ _tts = await System.Threading.Tasks.Task.Run(() => Helper.LoadTextToSpeech(onnxDir, false));
+ Log("TTS model loaded.");
+ }
+
+ Log($"Generating speech: \"{text}\" ({lang})");
+
+ var style = Helper.LoadVoiceStyle(new List { stylePath }, true);
+
+ var result = await System.Threading.Tasks.Task.Run(() => _tts.Call(text, lang, style, totalStep, speed));
+
+ string saveDir = "results";
+ if (!Directory.Exists(saveDir)) Directory.CreateDirectory(saveDir);
+
+ string fname = $"{Helper.SanitizeFilename(text, 20)}_{DateTime.Now:HHmmss}.wav";
+ string outputPath = Path.Combine(saveDir, fname);
+
+ Helper.WriteWavFile(outputPath, result.wav, _tts.SampleRate);
+ Log($"Saved: {outputPath}");
+
+ MessageBox.Show($"Synthesis completed successfully!\nSaved to: {outputPath}");
+ }
+ catch (Exception ex)
+ {
+ Log($"Error: {ex.Message}");
+ MessageBox.Show($"Error: {ex.Message}");
+ }
+ }
+
+ private void Log(string msg)
+ {
+ if (txtLog.InvokeRequired)
+ {
+ txtLog.Invoke(new Action(() => Log(msg)));
+ return;
+ }
+ txtLog.AppendText($"[{DateTime.Now:HH:mm:ss}] {msg}\r\n");
+ txtLog.SelectionStart = txtLog.Text.Length;
+ txtLog.ScrollToCaret();
+ }
+
+ private void fMain_Load(object sender, EventArgs e)
+ {
+ cmbLang.Items.AddRange(Languages.Available);
+ cmbLang.SelectedIndex = 0;
+
+ // Set default style path if exists
+ string defaultStyle = "assets/voice_styles/M1.json";
+ if (File.Exists(defaultStyle)) txtStylePath.Text = defaultStyle;
+ }
+ }
+}
diff --git a/HMI/SubProject/tts/fMain.resx b/HMI/SubProject/tts/fMain.resx
new file mode 100644
index 0000000..2a981b1
--- /dev/null
+++ b/HMI/SubProject/tts/fMain.resx
@@ -0,0 +1,61 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text/microsoft-resx
+
+
+ 2.0
+
+
+ System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
+
+
+ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
+
+