This commit is contained in:
backuppc
2026-02-09 13:06:47 +09:00
parent 839486db87
commit bd06f59bf1
18 changed files with 24 additions and 1005 deletions

View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8" />
</startup>
</configuration>

View File

@@ -0,0 +1,889 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
namespace Supertonic.WinForms
{
// Available languages for multilingual TTS
public static class Languages
{
public static readonly string[] Available = { "en", "ko", "es", "pt", "fr" };
}
// ============================================================================
// Configuration classes
// ============================================================================
public class Config
{
public AEConfig AE { get; set; } = null;
public TTLConfig TTL { get; set; } = null;
public class AEConfig
{
public int SampleRate { get; set; }
public int BaseChunkSize { get; set; }
}
public class TTLConfig
{
public int ChunkCompressFactor { get; set; }
public int LatentDim { get; set; }
}
}
// ============================================================================
// Style class
// ============================================================================
public class Style
{
public float[] Ttl { get; set; }
public long[] TtlShape { get; set; }
public float[] Dp { get; set; }
public long[] DpShape { get; set; }
public Style(float[] ttl, long[] ttlShape, float[] dp, long[] dpShape)
{
Ttl = ttl;
TtlShape = ttlShape;
Dp = dp;
DpShape = dpShape;
}
}
// ============================================================================
// Unicode text processor
// ============================================================================
public class UnicodeProcessor
{
private readonly Dictionary<int, long> _indexer;
public UnicodeProcessor(string unicodeIndexerPath)
{
var json = File.ReadAllText(unicodeIndexerPath);
var indexerArray = JsonSerializer.Deserialize<long[]>(json) ?? throw new Exception("Failed to load indexer");
_indexer = new Dictionary<int, long>();
for (int i = 0; i < indexerArray.Length; i++)
{
_indexer[i] = indexerArray[i];
}
}
private static string RemoveEmojis(string text)
{
var result = new StringBuilder();
for (int i = 0; i < text.Length; i++)
{
int codePoint;
if (char.IsHighSurrogate(text[i]) && i + 1 < text.Length && char.IsLowSurrogate(text[i + 1]))
{
// Get the full code point from surrogate pair
codePoint = char.ConvertToUtf32(text[i], text[i + 1]);
i++; // Skip the low surrogate
}
else
{
codePoint = text[i];
}
// Check if code point is in emoji ranges
bool isEmoji = (codePoint >= 0x1F600 && codePoint <= 0x1F64F) ||
(codePoint >= 0x1F300 && codePoint <= 0x1F5FF) ||
(codePoint >= 0x1F680 && codePoint <= 0x1F6FF) ||
(codePoint >= 0x1F700 && codePoint <= 0x1F77F) ||
(codePoint >= 0x1F780 && codePoint <= 0x1F7FF) ||
(codePoint >= 0x1F800 && codePoint <= 0x1F8FF) ||
(codePoint >= 0x1F900 && codePoint <= 0x1F9FF) ||
(codePoint >= 0x1FA00 && codePoint <= 0x1FA6F) ||
(codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) ||
(codePoint >= 0x2600 && codePoint <= 0x26FF) ||
(codePoint >= 0x2700 && codePoint <= 0x27BF) ||
(codePoint >= 0x1F1E6 && codePoint <= 0x1F1FF);
if (!isEmoji)
{
if (codePoint > 0xFFFF)
{
// Add back as surrogate pair
result.Append(char.ConvertFromUtf32(codePoint));
}
else
{
result.Append((char)codePoint);
}
}
}
return result.ToString();
}
private string PreprocessText(string text, string lang)
{
// TODO: Need advanced normalizer for better performance
text = text.Normalize(NormalizationForm.FormKD);
// Remove emojis (wide Unicode range)
// C# doesn't support \u{...} syntax in regex, so we use character filtering instead
text = RemoveEmojis(text);
// Replace various dashes and symbols
var replacements = new Dictionary<string, string>
{
{"", "-"}, // en dash
{"", "-"}, // non-breaking hyphen
{"—", "-"}, // em dash
{"_", " "}, // underscore
{"\u201C", "\""}, // left double quote
{"\u201D", "\""}, // right double quote
{"\u2018", "'"}, // left single quote
{"\u2019", "'"}, // right single quote
{"´", "'"}, // acute accent
{"`", "'"}, // grave accent
{"[", " "}, // left bracket
{"]", " "}, // right bracket
{"|", " "}, // vertical bar
{"/", " "}, // slash
{"#", " "}, // hash
{"→", " "}, // right arrow
{"←", " "}, // left arrow
};
foreach (var kvp in replacements)
{
text = text.Replace(kvp.Key, kvp.Value);
}
// Remove special symbols
text = Regex.Replace(text, @"[♥☆♡©\\]", "");
// Replace known expressions
var exprReplacements = new Dictionary<string, string>
{
{"@", " at "},
{"e.g.,", "for example, "},
{"i.e.,", "that is, "},
};
foreach (var kvp in exprReplacements)
{
text = text.Replace(kvp.Key, kvp.Value);
}
// Fix spacing around punctuation
text = Regex.Replace(text, @" ,", ",");
text = Regex.Replace(text, @" \.", ".");
text = Regex.Replace(text, @" !", "!");
text = Regex.Replace(text, @" \?", "?");
text = Regex.Replace(text, @" ;", ";");
text = Regex.Replace(text, @" :", ":");
text = Regex.Replace(text, @" '", "'");
// Remove duplicate quotes
while (text.Contains("\"\""))
{
text = text.Replace("\"\"", "\"");
}
while (text.Contains("''"))
{
text = text.Replace("''", "'");
}
while (text.Contains("``"))
{
text = text.Replace("``", "`");
}
// Remove extra spaces
text = Regex.Replace(text, @"\s+", " ").Trim();
// If text doesn't end with punctuation, quotes, or closing brackets, add a period
if (!Regex.IsMatch(text, @"[.!?;:,'\u0022\u201C\u201D\u2018\u2019)\]}…。」』】〉》›»]$"))
{
text += ".";
}
// Validate language
if (!Languages.Available.Contains(lang))
{
throw new ArgumentException($"Invalid language: {lang}. Available: {string.Join(", ", Languages.Available)}");
}
// Wrap text with language tags
text = $"<{lang}>" + text + $"</{lang}>";
return text;
}
private int[] TextToUnicodeValues(string text)
{
return text.Select(c => (int)c).ToArray();
}
private float[][][] GetTextMask(long[] textIdsLengths)
{
return Helper.LengthToMask(textIdsLengths);
}
public (long[][] textIds, float[][][] textMask) Call(List<string> textList, List<string> langList)
{
var processedTexts = textList.Select((t, i) => PreprocessText(t, langList[i])).ToList();
var textIdsLengths = processedTexts.Select(t => (long)t.Length).ToArray();
long maxLen = textIdsLengths.Max();
var textIds = new long[textList.Count][];
for (int i = 0; i < processedTexts.Count; i++)
{
textIds[i] = new long[maxLen];
var unicodeVals = TextToUnicodeValues(processedTexts[i]);
for (int j = 0; j < unicodeVals.Length; j++)
{
if (_indexer.TryGetValue(unicodeVals[j], out long val))
{
textIds[i][j] = val;
}
}
}
var textMask = GetTextMask(textIdsLengths);
return (textIds, textMask);
}
}
// ============================================================================
// TextToSpeech class
// ============================================================================
public class TextToSpeech
{
private readonly Config _cfgs;
private readonly UnicodeProcessor _textProcessor;
private readonly InferenceSession _dpOrt;
private readonly InferenceSession _textEncOrt;
private readonly InferenceSession _vectorEstOrt;
private readonly InferenceSession _vocoderOrt;
public readonly int SampleRate;
private readonly int _baseChunkSize;
private readonly int _chunkCompressFactor;
private readonly int _ldim;
public TextToSpeech(
Config cfgs,
UnicodeProcessor textProcessor,
InferenceSession dpOrt,
InferenceSession textEncOrt,
InferenceSession vectorEstOrt,
InferenceSession vocoderOrt)
{
_cfgs = cfgs;
_textProcessor = textProcessor;
_dpOrt = dpOrt;
_textEncOrt = textEncOrt;
_vectorEstOrt = vectorEstOrt;
_vocoderOrt = vocoderOrt;
SampleRate = cfgs.AE.SampleRate;
_baseChunkSize = cfgs.AE.BaseChunkSize;
_chunkCompressFactor = cfgs.TTL.ChunkCompressFactor;
_ldim = cfgs.TTL.LatentDim;
}
private (float[][][] noisyLatent, float[][][] latentMask) SampleNoisyLatent(float[] duration)
{
int bsz = duration.Length;
float wavLenMax = duration.Max() * SampleRate;
var wavLengths = duration.Select(d => (long)(d * SampleRate)).ToArray();
int chunkSize = _baseChunkSize * _chunkCompressFactor;
int latentLen = (int)((wavLenMax + chunkSize - 1) / chunkSize);
int latentDim = _ldim * _chunkCompressFactor;
// Generate random noise
var random = new Random();
var noisyLatent = new float[bsz][][];
for (int b = 0; b < bsz; b++)
{
noisyLatent[b] = new float[latentDim][];
for (int d = 0; d < latentDim; d++)
{
noisyLatent[b][d] = new float[latentLen];
for (int t = 0; t < latentLen; t++)
{
// Box-Muller transform for normal distribution
double u1 = 1.0 - random.NextDouble();
double u2 = 1.0 - random.NextDouble();
noisyLatent[b][d][t] = (float)(Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Cos(2.0 * Math.PI * u2));
}
}
}
var latentMask = Helper.GetLatentMask(wavLengths, _baseChunkSize, _chunkCompressFactor);
// Apply mask
for (int b = 0; b < bsz; b++)
{
for (int d = 0; d < latentDim; d++)
{
for (int t = 0; t < latentLen; t++)
{
noisyLatent[b][d][t] *= latentMask[b][0][t];
}
}
}
return (noisyLatent, latentMask);
}
private (float[] wav, float[] duration) _Infer(List<string> textList, List<string> langList, Style style, int totalStep, float speed = 1.05f)
{
int bsz = textList.Count;
if (bsz != style.TtlShape[0])
{
throw new ArgumentException("Number of texts must match number of style vectors");
}
// Process text
var (textIds, textMask) = _textProcessor.Call(textList, langList);
var textIdsShape = new long[] { bsz, textIds[0].Length };
var textMaskShape = new long[] { bsz, 1, textMask[0][0].Length };
var textIdsTensor = Helper.IntArrayToTensor(textIds, textIdsShape);
var textMaskTensor = Helper.ArrayToTensor(textMask, textMaskShape);
var styleTtlTensor = new DenseTensor<float>(style.Ttl, style.TtlShape.Select(x => (int)x).ToArray());
var styleDpTensor = new DenseTensor<float>(style.Dp, style.DpShape.Select(x => (int)x).ToArray());
// Run duration predictor
var dpInputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("text_ids", textIdsTensor),
NamedOnnxValue.CreateFromTensor("style_dp", styleDpTensor),
NamedOnnxValue.CreateFromTensor("text_mask", textMaskTensor)
};
using (var dpOutputs = _dpOrt.Run(dpInputs))
{
var durOnnx = dpOutputs.First(o => o.Name == "duration").AsTensor<float>().ToArray();
// Apply speed factor to duration
for (int i = 0; i < durOnnx.Length; i++)
{
durOnnx[i] /= speed;
}
var textEncInputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("text_ids", textIdsTensor),
NamedOnnxValue.CreateFromTensor("style_ttl", styleTtlTensor),
NamedOnnxValue.CreateFromTensor("text_mask", textMaskTensor)
};
using (var textEncOutputs = _textEncOrt.Run(textEncInputs))
{
var textEmbTensor = textEncOutputs.First(o => o.Name == "text_emb").AsTensor<float>();
// Sample noisy latent
var (xt, latentMask) = SampleNoisyLatent(durOnnx);
var latentShape = new long[] { bsz, xt[0].Length, xt[0][0].Length };
var latentMaskShape = new long[] { bsz, 1, latentMask[0][0].Length };
var totalStepArray = Enumerable.Repeat((float)totalStep, bsz).ToArray();
// Iterative denoising
for (int step = 0; step < totalStep; step++)
{
var currentStepArray = Enumerable.Repeat((float)step, bsz).ToArray();
var vectorEstInputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("noisy_latent", Helper.ArrayToTensor(xt, latentShape)),
NamedOnnxValue.CreateFromTensor("text_emb", textEmbTensor),
NamedOnnxValue.CreateFromTensor("style_ttl", styleTtlTensor),
NamedOnnxValue.CreateFromTensor("text_mask", textMaskTensor),
NamedOnnxValue.CreateFromTensor("latent_mask", Helper.ArrayToTensor(latentMask, latentMaskShape)),
NamedOnnxValue.CreateFromTensor("total_step", new DenseTensor<float>(totalStepArray, new int[] { bsz })),
NamedOnnxValue.CreateFromTensor("current_step", new DenseTensor<float>(currentStepArray, new int[] { bsz }))
};
using (var vectorEstOutputs = _vectorEstOrt.Run(vectorEstInputs))
{
var denoisedLatent = vectorEstOutputs.First(o => o.Name == "denoised_latent").AsTensor<float>();
// Update xt
int idx = 0;
for (int b = 0; b < bsz; b++)
{
for (int d = 0; d < xt[b].Length; d++)
{
for (int t = 0; t < xt[b][d].Length; t++)
{
xt[b][d][t] = denoisedLatent.GetValue(idx++);
}
}
}
}
}
// Run vocoder
var vocoderInputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("latent", Helper.ArrayToTensor(xt, latentShape))
};
using (var vocoderOutputs = _vocoderOrt.Run(vocoderInputs))
{
var wavTensor = vocoderOutputs.First(o => o.Name == "wav_tts").AsTensor<float>();
return (wavTensor.ToArray(), durOnnx);
}
}
}
// Run text encoder
}
public (float[] wav, float[] duration) Call(string text, string lang, Style style, int totalStep, float speed = 1.05f, float silenceDuration = 0.3f)
{
if (style.TtlShape[0] != 1)
{
throw new ArgumentException("Single speaker text to speech only supports single style");
}
int maxLen = lang == "ko" ? 120 : 300;
var textList = Helper.ChunkText(text, maxLen);
var wavCat = new List<float>();
float durCat = 0.0f;
foreach (var chunk in textList)
{
var (wav, duration) = _Infer(new List<string> { chunk }, new List<string> { lang }, style, totalStep, speed);
if (wavCat.Count == 0)
{
wavCat.AddRange(wav);
durCat = duration[0];
}
else
{
int silenceLen = (int)(silenceDuration * SampleRate);
var silence = new float[silenceLen];
wavCat.AddRange(silence);
wavCat.AddRange(wav);
durCat += duration[0] + silenceDuration;
}
}
return (wavCat.ToArray(), new float[] { durCat });
}
public (float[] wav, float[] duration) Batch(List<string> textList, List<string> langList, Style style, int totalStep, float speed = 1.05f)
{
return _Infer(textList, langList, style, totalStep, speed);
}
}
// ============================================================================
// Helper class with utility functions
// ============================================================================
public static class Helper
{
// ============================================================================
// Utility functions
// ============================================================================
public static float[][][] LengthToMask(long[] lengths, long maxLen = -1)
{
if (maxLen == -1)
{
maxLen = lengths.Max();
}
var mask = new float[lengths.Length][][];
for (int i = 0; i < lengths.Length; i++)
{
mask[i] = new float[1][];
mask[i][0] = new float[maxLen];
for (int j = 0; j < maxLen; j++)
{
mask[i][0][j] = j < lengths[i] ? 1.0f : 0.0f;
}
}
return mask;
}
public static float[][][] GetLatentMask(long[] wavLengths, int baseChunkSize, int chunkCompressFactor)
{
int latentSize = baseChunkSize * chunkCompressFactor;
var latentLengths = wavLengths.Select(len => (len + latentSize - 1) / latentSize).ToArray();
return LengthToMask(latentLengths);
}
// ============================================================================
// ONNX model loading
// ============================================================================
public static InferenceSession LoadOnnx(string onnxPath, SessionOptions opts)
{
return new InferenceSession(onnxPath, opts);
}
public static (InferenceSession dp, InferenceSession textEnc, InferenceSession vectorEst, InferenceSession vocoder)
LoadOnnxAll(string onnxDir, SessionOptions opts)
{
var dpPath = Path.Combine(onnxDir, "duration_predictor.onnx");
var textEncPath = Path.Combine(onnxDir, "text_encoder.onnx");
var vectorEstPath = Path.Combine(onnxDir, "vector_estimator.onnx");
var vocoderPath = Path.Combine(onnxDir, "vocoder.onnx");
return (
LoadOnnx(dpPath, opts),
LoadOnnx(textEncPath, opts),
LoadOnnx(vectorEstPath, opts),
LoadOnnx(vocoderPath, opts)
);
}
// ============================================================================
// Configuration loading
// ============================================================================
public static Config LoadCfgs(string onnxDir)
{
var cfgPath = Path.Combine(onnxDir, "tts.json");
var json = File.ReadAllText(cfgPath);
using (var doc = JsonDocument.Parse(json))
{
var root = doc.RootElement;
return new Config
{
AE = new Config.AEConfig
{
SampleRate = root.GetProperty("ae").GetProperty("sample_rate").GetInt32(),
BaseChunkSize = root.GetProperty("ae").GetProperty("base_chunk_size").GetInt32()
},
TTL = new Config.TTLConfig
{
ChunkCompressFactor = root.GetProperty("ttl").GetProperty("chunk_compress_factor").GetInt32(),
LatentDim = root.GetProperty("ttl").GetProperty("latent_dim").GetInt32()
}
};
}
}
public static UnicodeProcessor LoadTextProcessor(string onnxDir)
{
var unicodeIndexerPath = Path.Combine(onnxDir, "unicode_indexer.json");
return new UnicodeProcessor(unicodeIndexerPath);
}
// ============================================================================
// Voice style loading
// ============================================================================
public static Style LoadVoiceStyle(List<string> voiceStylePaths, bool verbose = false)
{
int bsz = voiceStylePaths.Count;
// Read first file to get dimensions
var firstJson = File.ReadAllText(voiceStylePaths[0]);
using (var firstDoc = JsonDocument.Parse(firstJson))
{
var firstRoot = firstDoc.RootElement;
var ttlDims = ParseInt64Array(firstRoot.GetProperty("style_ttl").GetProperty("dims"));
var dpDims = ParseInt64Array(firstRoot.GetProperty("style_dp").GetProperty("dims"));
long ttlDim1 = ttlDims[1];
long ttlDim2 = ttlDims[2];
long dpDim1 = dpDims[1];
long dpDim2 = dpDims[2];
// Pre-allocate arrays with full batch size
int ttlSize = (int)(bsz * ttlDim1 * ttlDim2);
int dpSize = (int)(bsz * dpDim1 * dpDim2);
var ttlFlat = new float[ttlSize];
var dpFlat = new float[dpSize];
// Fill in the data
for (int i = 0; i < bsz; i++)
{
var json = File.ReadAllText(voiceStylePaths[i]);
using (var doc = JsonDocument.Parse(json))
{
var root = doc.RootElement;
// Flatten data
var ttlData3D = ParseFloat3DArray(root.GetProperty("style_ttl").GetProperty("data"));
var ttlDataFlat = new List<float>();
foreach (var batch in ttlData3D)
{
foreach (var row in batch)
{
ttlDataFlat.AddRange(row);
}
}
var dpData3D = ParseFloat3DArray(root.GetProperty("style_dp").GetProperty("data"));
var dpDataFlat = new List<float>();
foreach (var batch in dpData3D)
{
foreach (var row in batch)
{
dpDataFlat.AddRange(row);
}
}
// Copy to pre-allocated array
int ttlOffset = (int)(i * ttlDim1 * ttlDim2);
ttlDataFlat.CopyTo(ttlFlat, ttlOffset);
int dpOffset = (int)(i * dpDim1 * dpDim2);
dpDataFlat.CopyTo(dpFlat, dpOffset);
}
}
var ttlShape = new long[] { bsz, ttlDim1, ttlDim2 };
var dpShape = new long[] { bsz, dpDim1, dpDim2 };
if (verbose)
{
Console.WriteLine($"Loaded {bsz} voice styles");
}
return new Style(ttlFlat, ttlShape, dpFlat, dpShape);
}
}
private static float[][][] ParseFloat3DArray(JsonElement element)
{
var result = new List<float[][]>();
foreach (var batch in element.EnumerateArray())
{
var batch2D = new List<float[]>();
foreach (var row in batch.EnumerateArray())
{
var rowData = new List<float>();
foreach (var val in row.EnumerateArray())
{
rowData.Add(val.GetSingle());
}
batch2D.Add(rowData.ToArray());
}
result.Add(batch2D.ToArray());
}
return result.ToArray();
}
private static long[] ParseInt64Array(JsonElement element)
{
var result = new List<long>();
foreach (var val in element.EnumerateArray())
{
result.Add(val.GetInt64());
}
return result.ToArray();
}
// ============================================================================
// TextToSpeech loading
// ============================================================================
public static TextToSpeech LoadTextToSpeech(string onnxDir, bool useGpu = false)
{
var opts = new SessionOptions();
if (useGpu)
{
throw new NotImplementedException("GPU mode is not supported yet");
}
else
{
Console.WriteLine("Using CPU for inference");
}
var cfgs = LoadCfgs(onnxDir);
var (dpOrt, textEncOrt, vectorEstOrt, vocoderOrt) = LoadOnnxAll(onnxDir, opts);
var textProcessor = LoadTextProcessor(onnxDir);
return new TextToSpeech(cfgs, textProcessor, dpOrt, textEncOrt, vectorEstOrt, vocoderOrt);
}
// ============================================================================
// WAV file writing
// ============================================================================
public static void WriteWavFile(string filename, float[] audioData, int sampleRate)
{
using (var writer = new BinaryWriter(File.Open(filename, FileMode.Create)))
{
int numChannels = 1;
int bitsPerSample = 16;
int byteRate = sampleRate * numChannels * bitsPerSample / 8;
short blockAlign = (short)(numChannels * bitsPerSample / 8);
int dataSize = audioData.Length * bitsPerSample / 8;
// RIFF header
writer.Write(Encoding.ASCII.GetBytes("RIFF"));
writer.Write(36 + dataSize);
writer.Write(Encoding.ASCII.GetBytes("WAVE"));
// fmt chunk
writer.Write(Encoding.ASCII.GetBytes("fmt "));
writer.Write(16); // fmt chunk size
writer.Write((short)1); // audio format (PCM)
writer.Write((short)numChannels);
writer.Write(sampleRate);
writer.Write(byteRate);
writer.Write(blockAlign);
writer.Write((short)bitsPerSample);
// data chunk
writer.Write(Encoding.ASCII.GetBytes("data"));
writer.Write(dataSize);
// Write audio data
foreach (var sample in audioData)
{
float clamped = Math.Max(-1.0f, Math.Min(1.0f, sample));
short intSample = (short)(clamped * 32767);
writer.Write(intSample);
}
}
}
// ============================================================================
// Tensor conversion utilities
// ============================================================================
public static DenseTensor<float> ArrayToTensor(float[][][] array, long[] dims)
{
var flat = new List<float>();
foreach (var batch in array)
{
foreach (var row in batch)
{
flat.AddRange(row);
}
}
return new DenseTensor<float>(flat.ToArray(), dims.Select(x => (int)x).ToArray());
}
public static DenseTensor<long> IntArrayToTensor(long[][] array, long[] dims)
{
var flat = new List<long>();
foreach (var row in array)
{
flat.AddRange(row);
}
return new DenseTensor<long>(flat.ToArray(), dims.Select(x => (int)x).ToArray());
}
// ============================================================================
// Timer utility
// ============================================================================
public static T Timer<T>(string name, Func<T> func)
{
var start = DateTime.Now;
Console.WriteLine($"{name}...");
var result = func();
var elapsed = (DateTime.Now - start).TotalSeconds;
Console.WriteLine($" -> {name} completed in {elapsed:F2} sec");
return result;
}
public static string SanitizeFilename(string text, int maxLen)
{
var result = new StringBuilder();
int count = 0;
foreach (char c in text)
{
if (count >= maxLen) break;
if (char.IsLetterOrDigit(c))
{
result.Append(c);
}
else
{
result.Append('_');
}
count++;
}
return result.ToString();
}
// ============================================================================
// Chunk text
// ============================================================================
public static List<string> ChunkText(string text, int maxLen = 300)
{
var chunks = new List<string>();
// Split by paragraph (two or more newlines)
var paragraphRegex = new Regex(@"\n\s*\n+");
var paragraphs = paragraphRegex.Split(text.Trim())
.Select(p => p.Trim())
.Where(p => !string.IsNullOrEmpty(p))
.ToList();
// Split by sentence boundaries, excluding abbreviations
var sentenceRegex = new Regex(@"(?<!Mr\.|Mrs\.|Ms\.|Dr\.|Prof\.|Sr\.|Jr\.|Ph\.D\.|etc\.|e\.g\.|i\.e\.|vs\.|Inc\.|Ltd\.|Co\.|Corp\.|St\.|Ave\.|Blvd\.)(?<!\b[A-Z]\.)(?<=[.!?])\s+");
foreach (var paragraph in paragraphs)
{
var sentences = sentenceRegex.Split(paragraph);
string currentChunk = "";
foreach (var sentence in sentences)
{
if (string.IsNullOrEmpty(sentence)) continue;
if (currentChunk.Length + sentence.Length + 1 <= maxLen)
{
if (!string.IsNullOrEmpty(currentChunk))
{
currentChunk += " ";
}
currentChunk += sentence;
}
else
{
if (!string.IsNullOrEmpty(currentChunk))
{
chunks.Add(currentChunk.Trim());
}
currentChunk = sentence;
}
}
if (!string.IsNullOrEmpty(currentChunk))
{
chunks.Add(currentChunk.Trim());
}
}
// If no chunks were created, return the original text
if (chunks.Count == 0)
{
chunks.Add(text.Trim());
}
return chunks;
}
}
}

View File

@@ -0,0 +1,19 @@
using System;
using System.Windows.Forms;
namespace Supertonic.WinForms
{
static class Program
{
/// <summary>
/// The main entry point for the application.
/// </summary>
[STAThread]
static void Main()
{
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new fMain());
}
}
}

View File

@@ -0,0 +1,17 @@
using System.Reflection;
using System.Runtime.InteropServices;
[assembly: AssemblyTitle("Supertonic.WinForms")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("Supertonic.WinForms")]
[assembly: AssemblyCopyright("Copyright © 2026")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
[assembly: ComVisible(false)]
[assembly: Guid("bd3e8373-c40c-4f7f-aa18-6990f1cfd21a")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

View File

@@ -0,0 +1,63 @@
//------------------------------------------------------------------------------
// <auto-generated>
// 이 코드는 도구를 사용하여 생성되었습니다.
// 런타임 버전:4.0.30319.42000
//
// 파일 내용을 변경하면 잘못된 동작이 발생할 수 있으며, 코드를 다시 생성하면
// 이러한 변경 내용이 손실됩니다.
// </auto-generated>
//------------------------------------------------------------------------------
namespace Supertonic.WinForms.Properties {
using System;
/// <summary>
/// 지역화된 문자열 등을 찾기 위한 강력한 형식의 리소스 클래스입니다.
/// </summary>
// 이 클래스는 ResGen 또는 Visual Studio와 같은 도구를 통해 StronglyTypedResourceBuilder
// 클래스에서 자동으로 생성되었습니다.
// 멤버를 추가하거나 제거하려면 .ResX 파일을 편집한 다음 /str 옵션을 사용하여 ResGen을
// 다시 실행하거나 VS 프로젝트를 다시 빌드하십시오.
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
internal class Resources {
private static global::System.Resources.ResourceManager resourceMan;
private static global::System.Globalization.CultureInfo resourceCulture;
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
internal Resources() {
}
/// <summary>
/// 이 클래스에서 사용하는 캐시된 ResourceManager 인스턴스를 반환합니다.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Resources.ResourceManager ResourceManager {
get {
if (object.ReferenceEquals(resourceMan, null)) {
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Supertonic.WinForms.Properties.Resources", typeof(Resources).Assembly);
resourceMan = temp;
}
return resourceMan;
}
}
/// <summary>
/// 이 강력한 형식의 리소스 클래스를 사용하여 모든 리소스 조회에 대한 현재 스레드의 CurrentUICulture
/// 속성을 재정의합니다.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Globalization.CultureInfo Culture {
get {
return resourceCulture;
}
set {
resourceCulture = value;
}
}
}
}

View File

@@ -0,0 +1,101 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 1.3
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">1.3</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1">this is my long string</data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
[base64 mime encoded serialized .NET Framework object]
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
[base64 mime encoded string representing a byte array form of the .NET Framework object]
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>1.3</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.3500.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.3500.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@@ -0,0 +1,26 @@
//------------------------------------------------------------------------------
// <auto-generated>
// 이 코드는 도구를 사용하여 생성되었습니다.
// 런타임 버전:4.0.30319.42000
//
// 파일 내용을 변경하면 잘못된 동작이 발생할 수 있으며, 코드를 다시 생성하면
// 이러한 변경 내용이 손실됩니다.
// </auto-generated>
//------------------------------------------------------------------------------
namespace Supertonic.WinForms.Properties {
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "15.9.0.0")]
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
public static Settings Default {
get {
return defaultInstance;
}
}
}
}

View File

@@ -0,0 +1,6 @@
<?xml version='1.0' encoding='utf-8'?>
<SettingsFile xmlns="http://schemas.microsoft.com/VisualStudio/2004/01/settings" CurrentProfile="(Default)">
<Profiles>
<Profile Name="(Default)" />
</Profiles>
</SettingsFile>

View File

@@ -0,0 +1,91 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}</ProjectGuid>
<OutputType>WinExe</OutputType>
<RootNamespace>Supertonic.WinForms</RootNamespace>
<AssemblyName>Supertonic.WinForms</AssemblyName>
<TargetFrameworkVersion>v4.8</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<Deterministic>true</Deterministic>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>x64</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|Win32'">
<OutputPath>bin\Debug\</OutputPath>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Deployment" />
<Reference Include="System.Drawing" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Windows.Forms" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="fMain.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="fMain.Designer.cs">
<DependentUpon>fMain.cs</DependentUpon>
</Compile>
<Compile Include="Helper.cs" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Properties\Resources.Designer.cs">
<AutoGen>True</AutoGen>
<DesignTime>True</DesignTime>
<DependentUpon>Resources.resx</DependentUpon>
</Compile>
<Compile Include="Properties\Settings.Designer.cs">
<AutoGen>True</AutoGen>
<DesignTimeSharedInput>True</DesignTimeSharedInput>
<DependentUpon>Settings.settings</DependentUpon>
</Compile>
<EmbeddedResource Include="fMain.resx">
<DependentUpon>fMain.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Properties\Resources.resx">
<Generator>ResXFileCodeGenerator</Generator>
<LastGenOutput>Resources.Designer.cs</LastGenOutput>
</EmbeddedResource>
<None Include="App.config" />
<None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
</None>
<PackageReference Include="Microsoft.ML.OnnxRuntime">
<Version>1.24.1</Version>
</PackageReference>
<PackageReference Include="System.Text.Json">
<Version>10.0.2</Version>
</PackageReference>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>

View File

@@ -0,0 +1,25 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Express 15 for Windows Desktop
VisualStudioVersion = 15.0.36324.19
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Supertonic.WinForms", "Supertonic.WinForms.csproj", "{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {1DFF2850-1C17-454F-BB29-E08B604073C4}
EndGlobalSection
EndGlobal

187
HMI/TestProject/tts/fMain.Designer.cs generated Normal file
View File

@@ -0,0 +1,187 @@
namespace Supertonic.WinForms
{
partial class fMain
{
private System.ComponentModel.IContainer components = null;
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
private void InitializeComponent()
{
this.txtInput = new System.Windows.Forms.TextBox();
this.btnGenerate = new System.Windows.Forms.Button();
this.cmbLang = new System.Windows.Forms.ComboBox();
this.txtStylePath = new System.Windows.Forms.TextBox();
this.numSteps = new System.Windows.Forms.NumericUpDown();
this.numSpeed = new System.Windows.Forms.NumericUpDown();
this.lblText = new System.Windows.Forms.Label();
this.lblLang = new System.Windows.Forms.Label();
this.lblStyle = new System.Windows.Forms.Label();
this.lblSteps = new System.Windows.Forms.Label();
this.lblSpeed = new System.Windows.Forms.Label();
this.txtLog = new System.Windows.Forms.TextBox();
((System.ComponentModel.ISupportInitialize)(this.numSteps)).BeginInit();
((System.ComponentModel.ISupportInitialize)(this.numSpeed)).BeginInit();
this.SuspendLayout();
//
// txtInput
//
this.txtInput.Location = new System.Drawing.Point(12, 29);
this.txtInput.Multiline = true;
this.txtInput.Name = "txtInput";
this.txtInput.Size = new System.Drawing.Size(460, 60);
this.txtInput.TabIndex = 0;
this.txtInput.Text = "This morning, I took a walk in the park.";
//
// btnGenerate
//
this.btnGenerate.Location = new System.Drawing.Point(372, 169);
this.btnGenerate.Name = "btnGenerate";
this.btnGenerate.Size = new System.Drawing.Size(100, 30);
this.btnGenerate.TabIndex = 1;
this.btnGenerate.Text = "Generate TTS";
this.btnGenerate.UseVisualStyleBackColor = true;
this.btnGenerate.Click += new System.EventHandler(this.btnGenerate_Click);
//
// cmbLang
//
this.cmbLang.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.cmbLang.FormattingEnabled = true;
this.cmbLang.Location = new System.Drawing.Point(12, 114);
this.cmbLang.Name = "cmbLang";
this.cmbLang.Size = new System.Drawing.Size(80, 21);
this.cmbLang.TabIndex = 2;
//
// txtStylePath
//
this.txtStylePath.Location = new System.Drawing.Point(110, 114);
this.txtStylePath.Name = "txtStylePath";
this.txtStylePath.Size = new System.Drawing.Size(362, 20);
this.txtStylePath.TabIndex = 3;
this.txtStylePath.Text = "assets/voice_styles/M1.json";
//
// numSteps
//
this.numSteps.Location = new System.Drawing.Point(12, 169);
this.numSteps.Name = "numSteps";
this.numSteps.Size = new System.Drawing.Size(80, 20);
this.numSteps.TabIndex = 4;
this.numSteps.Value = new decimal(new int[] { 5, 0, 0, 0 });
//
// numSpeed
//
this.numSpeed.DecimalPlaces = 2;
this.numSpeed.Increment = new decimal(new int[] { 5, 0, 0, 131072 });
this.numSpeed.Location = new System.Drawing.Point(110, 169);
this.numSpeed.Name = "numSpeed";
this.numSpeed.Size = new System.Drawing.Size(80, 20);
this.numSpeed.TabIndex = 5;
this.numSpeed.Value = new decimal(new int[] { 105, 0, 0, 131072 });
//
// lblText
//
this.lblText.AutoSize = true;
this.lblText.Location = new System.Drawing.Point(12, 13);
this.lblText.Name = "lblText";
this.lblText.Size = new System.Drawing.Size(28, 13);
this.lblText.TabIndex = 6;
this.lblText.Text = "Text";
//
// lblLang
//
this.lblLang.AutoSize = true;
this.lblLang.Location = new System.Drawing.Point(12, 98);
this.lblLang.Name = "lblLang";
this.lblLang.Size = new System.Drawing.Size(55, 13);
this.lblLang.TabIndex = 7;
this.lblLang.Text = "Language";
//
// lblStyle
//
this.lblStyle.AutoSize = true;
this.lblStyle.Location = new System.Drawing.Point(110, 98);
this.lblStyle.Name = "lblStyle";
this.lblStyle.Size = new System.Drawing.Size(87, 13);
this.lblStyle.TabIndex = 8;
this.lblStyle.Text = "Voice Style Path";
//
// lblSteps
//
this.lblSteps.AutoSize = true;
this.lblSteps.Location = new System.Drawing.Point(12, 153);
this.lblSteps.Name = "lblSteps";
this.lblSteps.Size = new System.Drawing.Size(61, 13);
this.lblSteps.TabIndex = 9;
this.lblSteps.Text = "Total Steps";
//
// lblSpeed
//
this.lblSpeed.AutoSize = true;
this.lblSpeed.Location = new System.Drawing.Point(110, 153);
this.lblSpeed.Name = "lblSpeed";
this.lblSpeed.Size = new System.Drawing.Size(38, 13);
this.lblSpeed.TabIndex = 10;
this.lblSpeed.Text = "Speed";
//
// txtLog
//
this.txtLog.Location = new System.Drawing.Point(12, 214);
this.txtLog.Multiline = true;
this.txtLog.Name = "txtLog";
this.txtLog.ReadOnly = true;
this.txtLog.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
this.txtLog.Size = new System.Drawing.Size(460, 150);
this.txtLog.TabIndex = 11;
//
// fMain
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(484, 376);
this.Controls.Add(this.txtLog);
this.Controls.Add(this.lblSpeed);
this.Controls.Add(this.lblSteps);
this.Controls.Add(this.lblStyle);
this.Controls.Add(this.lblLang);
this.Controls.Add(this.lblText);
this.Controls.Add(this.numSpeed);
this.Controls.Add(this.numSteps);
this.Controls.Add(this.txtStylePath);
this.Controls.Add(this.cmbLang);
this.Controls.Add(this.btnGenerate);
this.Controls.Add(this.txtInput);
this.Name = "fMain";
this.Text = "Supertonic TTS (WinForms 4.8)";
this.Load += new System.EventHandler(this.fMain_Load);
((System.ComponentModel.ISupportInitialize)(this.numSteps)).EndInit();
((System.ComponentModel.ISupportInitialize)(this.numSpeed)).EndInit();
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
private System.Windows.Forms.TextBox txtInput;
private System.Windows.Forms.Button btnGenerate;
private System.Windows.Forms.ComboBox cmbLang;
private System.Windows.Forms.TextBox txtStylePath;
private System.Windows.Forms.NumericUpDown numSteps;
private System.Windows.Forms.NumericUpDown numSpeed;
private System.Windows.Forms.Label lblText;
private System.Windows.Forms.Label lblLang;
private System.Windows.Forms.Label lblStyle;
private System.Windows.Forms.Label lblSteps;
private System.Windows.Forms.Label lblSpeed;
private System.Windows.Forms.TextBox txtLog;
}
}

View File

@@ -0,0 +1,89 @@
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Windows.Forms;
namespace Supertonic.WinForms
{
public partial class fMain : Form
{
private TextToSpeech _tts;
public fMain()
{
InitializeComponent();
}
private async void btnGenerate_Click(object sender, EventArgs e)
{
try
{
string text = txtInput.Text;
string lang = cmbLang.SelectedItem?.ToString() ?? "en";
string stylePath = txtStylePath.Text;
int totalStep = (int)numSteps.Value;
float speed = (float)numSpeed.Value;
if (string.IsNullOrWhiteSpace(text))
{
MessageBox.Show("Please enter text.");
return;
}
if (_tts == null)
{
Log("Loading TTS model...");
string onnxDir = "assets/onnx"; // This should be updated if assets are moved
_tts = await System.Threading.Tasks.Task.Run(() => Helper.LoadTextToSpeech(onnxDir, false));
Log("TTS model loaded.");
}
Log($"Generating speech: \"{text}\" ({lang})");
var style = Helper.LoadVoiceStyle(new List<string> { stylePath }, true);
var result = await System.Threading.Tasks.Task.Run(() => _tts.Call(text, lang, style, totalStep, speed));
string saveDir = "results";
if (!Directory.Exists(saveDir)) Directory.CreateDirectory(saveDir);
string fname = $"{Helper.SanitizeFilename(text, 20)}_{DateTime.Now:HHmmss}.wav";
string outputPath = Path.Combine(saveDir, fname);
Helper.WriteWavFile(outputPath, result.wav, _tts.SampleRate);
Log($"Saved: {outputPath}");
MessageBox.Show($"Synthesis completed successfully!\nSaved to: {outputPath}");
}
catch (Exception ex)
{
Log($"Error: {ex.Message}");
MessageBox.Show($"Error: {ex.Message}");
}
}
private void Log(string msg)
{
if (txtLog.InvokeRequired)
{
txtLog.Invoke(new Action(() => Log(msg)));
return;
}
txtLog.AppendText($"[{DateTime.Now:HH:mm:ss}] {msg}\r\n");
txtLog.SelectionStart = txtLog.Text.Length;
txtLog.ScrollToCaret();
}
private void fMain_Load(object sender, EventArgs e)
{
cmbLang.Items.AddRange(Languages.Available);
cmbLang.SelectedIndex = 0;
// Set default style path if exists
string defaultStyle = "assets/voice_styles/M1.json";
if (File.Exists(defaultStyle)) txtStylePath.Text = defaultStyle;
}
}
}

View File

@@ -0,0 +1,61 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="name" type="xsd:string" />
<xsd:attribute name="alias" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>