add ttsproject
This commit is contained in:
6
HMI/SubProject/tts/App.config
Normal file
6
HMI/SubProject/tts/App.config
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<configuration>
|
||||||
|
<startup>
|
||||||
|
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8" />
|
||||||
|
</startup>
|
||||||
|
</configuration>
|
||||||
889
HMI/SubProject/tts/Helper.cs
Normal file
889
HMI/SubProject/tts/Helper.cs
Normal file
@@ -0,0 +1,889 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
using Microsoft.ML.OnnxRuntime;
|
||||||
|
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||||
|
|
||||||
|
namespace Supertonic.WinForms
|
||||||
|
{
|
||||||
|
// Available languages for multilingual TTS
|
||||||
|
public static class Languages
|
||||||
|
{
|
||||||
|
public static readonly string[] Available = { "en", "ko", "es", "pt", "fr" };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Configuration classes
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public class Config
|
||||||
|
{
|
||||||
|
public AEConfig AE { get; set; } = null;
|
||||||
|
public TTLConfig TTL { get; set; } = null;
|
||||||
|
|
||||||
|
public class AEConfig
|
||||||
|
{
|
||||||
|
public int SampleRate { get; set; }
|
||||||
|
public int BaseChunkSize { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class TTLConfig
|
||||||
|
{
|
||||||
|
public int ChunkCompressFactor { get; set; }
|
||||||
|
public int LatentDim { get; set; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Style class
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public class Style
|
||||||
|
{
|
||||||
|
public float[] Ttl { get; set; }
|
||||||
|
public long[] TtlShape { get; set; }
|
||||||
|
public float[] Dp { get; set; }
|
||||||
|
public long[] DpShape { get; set; }
|
||||||
|
|
||||||
|
public Style(float[] ttl, long[] ttlShape, float[] dp, long[] dpShape)
|
||||||
|
{
|
||||||
|
Ttl = ttl;
|
||||||
|
TtlShape = ttlShape;
|
||||||
|
Dp = dp;
|
||||||
|
DpShape = dpShape;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Unicode text processor
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public class UnicodeProcessor
|
||||||
|
{
|
||||||
|
private readonly Dictionary<int, long> _indexer;
|
||||||
|
|
||||||
|
public UnicodeProcessor(string unicodeIndexerPath)
|
||||||
|
{
|
||||||
|
var json = File.ReadAllText(unicodeIndexerPath);
|
||||||
|
var indexerArray = JsonSerializer.Deserialize<long[]>(json) ?? throw new Exception("Failed to load indexer");
|
||||||
|
_indexer = new Dictionary<int, long>();
|
||||||
|
for (int i = 0; i < indexerArray.Length; i++)
|
||||||
|
{
|
||||||
|
_indexer[i] = indexerArray[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string RemoveEmojis(string text)
|
||||||
|
{
|
||||||
|
var result = new StringBuilder();
|
||||||
|
for (int i = 0; i < text.Length; i++)
|
||||||
|
{
|
||||||
|
int codePoint;
|
||||||
|
if (char.IsHighSurrogate(text[i]) && i + 1 < text.Length && char.IsLowSurrogate(text[i + 1]))
|
||||||
|
{
|
||||||
|
// Get the full code point from surrogate pair
|
||||||
|
codePoint = char.ConvertToUtf32(text[i], text[i + 1]);
|
||||||
|
i++; // Skip the low surrogate
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
codePoint = text[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if code point is in emoji ranges
|
||||||
|
bool isEmoji = (codePoint >= 0x1F600 && codePoint <= 0x1F64F) ||
|
||||||
|
(codePoint >= 0x1F300 && codePoint <= 0x1F5FF) ||
|
||||||
|
(codePoint >= 0x1F680 && codePoint <= 0x1F6FF) ||
|
||||||
|
(codePoint >= 0x1F700 && codePoint <= 0x1F77F) ||
|
||||||
|
(codePoint >= 0x1F780 && codePoint <= 0x1F7FF) ||
|
||||||
|
(codePoint >= 0x1F800 && codePoint <= 0x1F8FF) ||
|
||||||
|
(codePoint >= 0x1F900 && codePoint <= 0x1F9FF) ||
|
||||||
|
(codePoint >= 0x1FA00 && codePoint <= 0x1FA6F) ||
|
||||||
|
(codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) ||
|
||||||
|
(codePoint >= 0x2600 && codePoint <= 0x26FF) ||
|
||||||
|
(codePoint >= 0x2700 && codePoint <= 0x27BF) ||
|
||||||
|
(codePoint >= 0x1F1E6 && codePoint <= 0x1F1FF);
|
||||||
|
|
||||||
|
if (!isEmoji)
|
||||||
|
{
|
||||||
|
if (codePoint > 0xFFFF)
|
||||||
|
{
|
||||||
|
// Add back as surrogate pair
|
||||||
|
result.Append(char.ConvertFromUtf32(codePoint));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result.Append((char)codePoint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result.ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private string PreprocessText(string text, string lang)
|
||||||
|
{
|
||||||
|
// TODO: Need advanced normalizer for better performance
|
||||||
|
text = text.Normalize(NormalizationForm.FormKD);
|
||||||
|
|
||||||
|
// Remove emojis (wide Unicode range)
|
||||||
|
// C# doesn't support \u{...} syntax in regex, so we use character filtering instead
|
||||||
|
text = RemoveEmojis(text);
|
||||||
|
|
||||||
|
// Replace various dashes and symbols
|
||||||
|
var replacements = new Dictionary<string, string>
|
||||||
|
{
|
||||||
|
{"–", "-"}, // en dash
|
||||||
|
{"‑", "-"}, // non-breaking hyphen
|
||||||
|
{"—", "-"}, // em dash
|
||||||
|
{"_", " "}, // underscore
|
||||||
|
{"\u201C", "\""}, // left double quote
|
||||||
|
{"\u201D", "\""}, // right double quote
|
||||||
|
{"\u2018", "'"}, // left single quote
|
||||||
|
{"\u2019", "'"}, // right single quote
|
||||||
|
{"´", "'"}, // acute accent
|
||||||
|
{"`", "'"}, // grave accent
|
||||||
|
{"[", " "}, // left bracket
|
||||||
|
{"]", " "}, // right bracket
|
||||||
|
{"|", " "}, // vertical bar
|
||||||
|
{"/", " "}, // slash
|
||||||
|
{"#", " "}, // hash
|
||||||
|
{"→", " "}, // right arrow
|
||||||
|
{"←", " "}, // left arrow
|
||||||
|
};
|
||||||
|
|
||||||
|
foreach (var kvp in replacements)
|
||||||
|
{
|
||||||
|
text = text.Replace(kvp.Key, kvp.Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove special symbols
|
||||||
|
text = Regex.Replace(text, @"[♥☆♡©\\]", "");
|
||||||
|
|
||||||
|
// Replace known expressions
|
||||||
|
var exprReplacements = new Dictionary<string, string>
|
||||||
|
{
|
||||||
|
{"@", " at "},
|
||||||
|
{"e.g.,", "for example, "},
|
||||||
|
{"i.e.,", "that is, "},
|
||||||
|
};
|
||||||
|
|
||||||
|
foreach (var kvp in exprReplacements)
|
||||||
|
{
|
||||||
|
text = text.Replace(kvp.Key, kvp.Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fix spacing around punctuation
|
||||||
|
text = Regex.Replace(text, @" ,", ",");
|
||||||
|
text = Regex.Replace(text, @" \.", ".");
|
||||||
|
text = Regex.Replace(text, @" !", "!");
|
||||||
|
text = Regex.Replace(text, @" \?", "?");
|
||||||
|
text = Regex.Replace(text, @" ;", ";");
|
||||||
|
text = Regex.Replace(text, @" :", ":");
|
||||||
|
text = Regex.Replace(text, @" '", "'");
|
||||||
|
|
||||||
|
// Remove duplicate quotes
|
||||||
|
while (text.Contains("\"\""))
|
||||||
|
{
|
||||||
|
text = text.Replace("\"\"", "\"");
|
||||||
|
}
|
||||||
|
while (text.Contains("''"))
|
||||||
|
{
|
||||||
|
text = text.Replace("''", "'");
|
||||||
|
}
|
||||||
|
while (text.Contains("``"))
|
||||||
|
{
|
||||||
|
text = text.Replace("``", "`");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove extra spaces
|
||||||
|
text = Regex.Replace(text, @"\s+", " ").Trim();
|
||||||
|
|
||||||
|
// If text doesn't end with punctuation, quotes, or closing brackets, add a period
|
||||||
|
if (!Regex.IsMatch(text, @"[.!?;:,'\u0022\u201C\u201D\u2018\u2019)\]}…。」』】〉》›»]$"))
|
||||||
|
{
|
||||||
|
text += ".";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate language
|
||||||
|
if (!Languages.Available.Contains(lang))
|
||||||
|
{
|
||||||
|
throw new ArgumentException($"Invalid language: {lang}. Available: {string.Join(", ", Languages.Available)}");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wrap text with language tags
|
||||||
|
text = $"<{lang}>" + text + $"</{lang}>";
|
||||||
|
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int[] TextToUnicodeValues(string text)
|
||||||
|
{
|
||||||
|
return text.Select(c => (int)c).ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
private float[][][] GetTextMask(long[] textIdsLengths)
|
||||||
|
{
|
||||||
|
return Helper.LengthToMask(textIdsLengths);
|
||||||
|
}
|
||||||
|
|
||||||
|
public (long[][] textIds, float[][][] textMask) Call(List<string> textList, List<string> langList)
|
||||||
|
{
|
||||||
|
var processedTexts = textList.Select((t, i) => PreprocessText(t, langList[i])).ToList();
|
||||||
|
var textIdsLengths = processedTexts.Select(t => (long)t.Length).ToArray();
|
||||||
|
long maxLen = textIdsLengths.Max();
|
||||||
|
|
||||||
|
var textIds = new long[textList.Count][];
|
||||||
|
for (int i = 0; i < processedTexts.Count; i++)
|
||||||
|
{
|
||||||
|
textIds[i] = new long[maxLen];
|
||||||
|
var unicodeVals = TextToUnicodeValues(processedTexts[i]);
|
||||||
|
for (int j = 0; j < unicodeVals.Length; j++)
|
||||||
|
{
|
||||||
|
if (_indexer.TryGetValue(unicodeVals[j], out long val))
|
||||||
|
{
|
||||||
|
textIds[i][j] = val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var textMask = GetTextMask(textIdsLengths);
|
||||||
|
return (textIds, textMask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// TextToSpeech class
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public class TextToSpeech
|
||||||
|
{
|
||||||
|
private readonly Config _cfgs;
|
||||||
|
private readonly UnicodeProcessor _textProcessor;
|
||||||
|
private readonly InferenceSession _dpOrt;
|
||||||
|
private readonly InferenceSession _textEncOrt;
|
||||||
|
private readonly InferenceSession _vectorEstOrt;
|
||||||
|
private readonly InferenceSession _vocoderOrt;
|
||||||
|
public readonly int SampleRate;
|
||||||
|
private readonly int _baseChunkSize;
|
||||||
|
private readonly int _chunkCompressFactor;
|
||||||
|
private readonly int _ldim;
|
||||||
|
|
||||||
|
public TextToSpeech(
|
||||||
|
Config cfgs,
|
||||||
|
UnicodeProcessor textProcessor,
|
||||||
|
InferenceSession dpOrt,
|
||||||
|
InferenceSession textEncOrt,
|
||||||
|
InferenceSession vectorEstOrt,
|
||||||
|
InferenceSession vocoderOrt)
|
||||||
|
{
|
||||||
|
_cfgs = cfgs;
|
||||||
|
_textProcessor = textProcessor;
|
||||||
|
_dpOrt = dpOrt;
|
||||||
|
_textEncOrt = textEncOrt;
|
||||||
|
_vectorEstOrt = vectorEstOrt;
|
||||||
|
_vocoderOrt = vocoderOrt;
|
||||||
|
SampleRate = cfgs.AE.SampleRate;
|
||||||
|
_baseChunkSize = cfgs.AE.BaseChunkSize;
|
||||||
|
_chunkCompressFactor = cfgs.TTL.ChunkCompressFactor;
|
||||||
|
_ldim = cfgs.TTL.LatentDim;
|
||||||
|
}
|
||||||
|
|
||||||
|
private (float[][][] noisyLatent, float[][][] latentMask) SampleNoisyLatent(float[] duration)
|
||||||
|
{
|
||||||
|
int bsz = duration.Length;
|
||||||
|
float wavLenMax = duration.Max() * SampleRate;
|
||||||
|
var wavLengths = duration.Select(d => (long)(d * SampleRate)).ToArray();
|
||||||
|
int chunkSize = _baseChunkSize * _chunkCompressFactor;
|
||||||
|
int latentLen = (int)((wavLenMax + chunkSize - 1) / chunkSize);
|
||||||
|
int latentDim = _ldim * _chunkCompressFactor;
|
||||||
|
|
||||||
|
// Generate random noise
|
||||||
|
var random = new Random();
|
||||||
|
var noisyLatent = new float[bsz][][];
|
||||||
|
for (int b = 0; b < bsz; b++)
|
||||||
|
{
|
||||||
|
noisyLatent[b] = new float[latentDim][];
|
||||||
|
for (int d = 0; d < latentDim; d++)
|
||||||
|
{
|
||||||
|
noisyLatent[b][d] = new float[latentLen];
|
||||||
|
for (int t = 0; t < latentLen; t++)
|
||||||
|
{
|
||||||
|
// Box-Muller transform for normal distribution
|
||||||
|
double u1 = 1.0 - random.NextDouble();
|
||||||
|
double u2 = 1.0 - random.NextDouble();
|
||||||
|
noisyLatent[b][d][t] = (float)(Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Cos(2.0 * Math.PI * u2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var latentMask = Helper.GetLatentMask(wavLengths, _baseChunkSize, _chunkCompressFactor);
|
||||||
|
|
||||||
|
// Apply mask
|
||||||
|
for (int b = 0; b < bsz; b++)
|
||||||
|
{
|
||||||
|
for (int d = 0; d < latentDim; d++)
|
||||||
|
{
|
||||||
|
for (int t = 0; t < latentLen; t++)
|
||||||
|
{
|
||||||
|
noisyLatent[b][d][t] *= latentMask[b][0][t];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (noisyLatent, latentMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
private (float[] wav, float[] duration) _Infer(List<string> textList, List<string> langList, Style style, int totalStep, float speed = 1.05f)
|
||||||
|
{
|
||||||
|
int bsz = textList.Count;
|
||||||
|
if (bsz != style.TtlShape[0])
|
||||||
|
{
|
||||||
|
throw new ArgumentException("Number of texts must match number of style vectors");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process text
|
||||||
|
var (textIds, textMask) = _textProcessor.Call(textList, langList);
|
||||||
|
var textIdsShape = new long[] { bsz, textIds[0].Length };
|
||||||
|
var textMaskShape = new long[] { bsz, 1, textMask[0][0].Length };
|
||||||
|
|
||||||
|
var textIdsTensor = Helper.IntArrayToTensor(textIds, textIdsShape);
|
||||||
|
var textMaskTensor = Helper.ArrayToTensor(textMask, textMaskShape);
|
||||||
|
|
||||||
|
var styleTtlTensor = new DenseTensor<float>(style.Ttl, style.TtlShape.Select(x => (int)x).ToArray());
|
||||||
|
var styleDpTensor = new DenseTensor<float>(style.Dp, style.DpShape.Select(x => (int)x).ToArray());
|
||||||
|
|
||||||
|
// Run duration predictor
|
||||||
|
var dpInputs = new List<NamedOnnxValue>
|
||||||
|
{
|
||||||
|
NamedOnnxValue.CreateFromTensor("text_ids", textIdsTensor),
|
||||||
|
NamedOnnxValue.CreateFromTensor("style_dp", styleDpTensor),
|
||||||
|
NamedOnnxValue.CreateFromTensor("text_mask", textMaskTensor)
|
||||||
|
};
|
||||||
|
using (var dpOutputs = _dpOrt.Run(dpInputs))
|
||||||
|
{
|
||||||
|
var durOnnx = dpOutputs.First(o => o.Name == "duration").AsTensor<float>().ToArray();
|
||||||
|
|
||||||
|
// Apply speed factor to duration
|
||||||
|
for (int i = 0; i < durOnnx.Length; i++)
|
||||||
|
{
|
||||||
|
durOnnx[i] /= speed;
|
||||||
|
}
|
||||||
|
|
||||||
|
var textEncInputs = new List<NamedOnnxValue>
|
||||||
|
{
|
||||||
|
NamedOnnxValue.CreateFromTensor("text_ids", textIdsTensor),
|
||||||
|
NamedOnnxValue.CreateFromTensor("style_ttl", styleTtlTensor),
|
||||||
|
NamedOnnxValue.CreateFromTensor("text_mask", textMaskTensor)
|
||||||
|
};
|
||||||
|
using (var textEncOutputs = _textEncOrt.Run(textEncInputs))
|
||||||
|
{
|
||||||
|
var textEmbTensor = textEncOutputs.First(o => o.Name == "text_emb").AsTensor<float>();
|
||||||
|
// Sample noisy latent
|
||||||
|
var (xt, latentMask) = SampleNoisyLatent(durOnnx);
|
||||||
|
var latentShape = new long[] { bsz, xt[0].Length, xt[0][0].Length };
|
||||||
|
var latentMaskShape = new long[] { bsz, 1, latentMask[0][0].Length };
|
||||||
|
|
||||||
|
var totalStepArray = Enumerable.Repeat((float)totalStep, bsz).ToArray();
|
||||||
|
|
||||||
|
// Iterative denoising
|
||||||
|
for (int step = 0; step < totalStep; step++)
|
||||||
|
{
|
||||||
|
var currentStepArray = Enumerable.Repeat((float)step, bsz).ToArray();
|
||||||
|
|
||||||
|
var vectorEstInputs = new List<NamedOnnxValue>
|
||||||
|
{
|
||||||
|
NamedOnnxValue.CreateFromTensor("noisy_latent", Helper.ArrayToTensor(xt, latentShape)),
|
||||||
|
NamedOnnxValue.CreateFromTensor("text_emb", textEmbTensor),
|
||||||
|
NamedOnnxValue.CreateFromTensor("style_ttl", styleTtlTensor),
|
||||||
|
NamedOnnxValue.CreateFromTensor("text_mask", textMaskTensor),
|
||||||
|
NamedOnnxValue.CreateFromTensor("latent_mask", Helper.ArrayToTensor(latentMask, latentMaskShape)),
|
||||||
|
NamedOnnxValue.CreateFromTensor("total_step", new DenseTensor<float>(totalStepArray, new int[] { bsz })),
|
||||||
|
NamedOnnxValue.CreateFromTensor("current_step", new DenseTensor<float>(currentStepArray, new int[] { bsz }))
|
||||||
|
};
|
||||||
|
|
||||||
|
using (var vectorEstOutputs = _vectorEstOrt.Run(vectorEstInputs))
|
||||||
|
{
|
||||||
|
var denoisedLatent = vectorEstOutputs.First(o => o.Name == "denoised_latent").AsTensor<float>();
|
||||||
|
|
||||||
|
// Update xt
|
||||||
|
int idx = 0;
|
||||||
|
for (int b = 0; b < bsz; b++)
|
||||||
|
{
|
||||||
|
for (int d = 0; d < xt[b].Length; d++)
|
||||||
|
{
|
||||||
|
for (int t = 0; t < xt[b][d].Length; t++)
|
||||||
|
{
|
||||||
|
xt[b][d][t] = denoisedLatent.GetValue(idx++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run vocoder
|
||||||
|
var vocoderInputs = new List<NamedOnnxValue>
|
||||||
|
{
|
||||||
|
NamedOnnxValue.CreateFromTensor("latent", Helper.ArrayToTensor(xt, latentShape))
|
||||||
|
};
|
||||||
|
using (var vocoderOutputs = _vocoderOrt.Run(vocoderInputs))
|
||||||
|
{
|
||||||
|
var wavTensor = vocoderOutputs.First(o => o.Name == "wav_tts").AsTensor<float>();
|
||||||
|
|
||||||
|
return (wavTensor.ToArray(), durOnnx);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Run text encoder
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public (float[] wav, float[] duration) Call(string text, string lang, Style style, int totalStep, float speed = 1.05f, float silenceDuration = 0.3f)
|
||||||
|
{
|
||||||
|
if (style.TtlShape[0] != 1)
|
||||||
|
{
|
||||||
|
throw new ArgumentException("Single speaker text to speech only supports single style");
|
||||||
|
}
|
||||||
|
|
||||||
|
int maxLen = lang == "ko" ? 120 : 300;
|
||||||
|
var textList = Helper.ChunkText(text, maxLen);
|
||||||
|
var wavCat = new List<float>();
|
||||||
|
float durCat = 0.0f;
|
||||||
|
|
||||||
|
foreach (var chunk in textList)
|
||||||
|
{
|
||||||
|
var (wav, duration) = _Infer(new List<string> { chunk }, new List<string> { lang }, style, totalStep, speed);
|
||||||
|
|
||||||
|
if (wavCat.Count == 0)
|
||||||
|
{
|
||||||
|
wavCat.AddRange(wav);
|
||||||
|
durCat = duration[0];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int silenceLen = (int)(silenceDuration * SampleRate);
|
||||||
|
var silence = new float[silenceLen];
|
||||||
|
wavCat.AddRange(silence);
|
||||||
|
wavCat.AddRange(wav);
|
||||||
|
durCat += duration[0] + silenceDuration;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (wavCat.ToArray(), new float[] { durCat });
|
||||||
|
}
|
||||||
|
|
||||||
|
public (float[] wav, float[] duration) Batch(List<string> textList, List<string> langList, Style style, int totalStep, float speed = 1.05f)
|
||||||
|
{
|
||||||
|
return _Infer(textList, langList, style, totalStep, speed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Helper class with utility functions
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public static class Helper
|
||||||
|
{
|
||||||
|
// ============================================================================
|
||||||
|
// Utility functions
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public static float[][][] LengthToMask(long[] lengths, long maxLen = -1)
|
||||||
|
{
|
||||||
|
if (maxLen == -1)
|
||||||
|
{
|
||||||
|
maxLen = lengths.Max();
|
||||||
|
}
|
||||||
|
|
||||||
|
var mask = new float[lengths.Length][][];
|
||||||
|
for (int i = 0; i < lengths.Length; i++)
|
||||||
|
{
|
||||||
|
mask[i] = new float[1][];
|
||||||
|
mask[i][0] = new float[maxLen];
|
||||||
|
for (int j = 0; j < maxLen; j++)
|
||||||
|
{
|
||||||
|
mask[i][0][j] = j < lengths[i] ? 1.0f : 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static float[][][] GetLatentMask(long[] wavLengths, int baseChunkSize, int chunkCompressFactor)
|
||||||
|
{
|
||||||
|
int latentSize = baseChunkSize * chunkCompressFactor;
|
||||||
|
var latentLengths = wavLengths.Select(len => (len + latentSize - 1) / latentSize).ToArray();
|
||||||
|
return LengthToMask(latentLengths);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// ONNX model loading
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public static InferenceSession LoadOnnx(string onnxPath, SessionOptions opts)
|
||||||
|
{
|
||||||
|
return new InferenceSession(onnxPath, opts);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static (InferenceSession dp, InferenceSession textEnc, InferenceSession vectorEst, InferenceSession vocoder)
|
||||||
|
LoadOnnxAll(string onnxDir, SessionOptions opts)
|
||||||
|
{
|
||||||
|
var dpPath = Path.Combine(onnxDir, "duration_predictor.onnx");
|
||||||
|
var textEncPath = Path.Combine(onnxDir, "text_encoder.onnx");
|
||||||
|
var vectorEstPath = Path.Combine(onnxDir, "vector_estimator.onnx");
|
||||||
|
var vocoderPath = Path.Combine(onnxDir, "vocoder.onnx");
|
||||||
|
|
||||||
|
return (
|
||||||
|
LoadOnnx(dpPath, opts),
|
||||||
|
LoadOnnx(textEncPath, opts),
|
||||||
|
LoadOnnx(vectorEstPath, opts),
|
||||||
|
LoadOnnx(vocoderPath, opts)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Configuration loading
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public static Config LoadCfgs(string onnxDir)
|
||||||
|
{
|
||||||
|
var cfgPath = Path.Combine(onnxDir, "tts.json");
|
||||||
|
var json = File.ReadAllText(cfgPath);
|
||||||
|
|
||||||
|
using (var doc = JsonDocument.Parse(json))
|
||||||
|
{
|
||||||
|
var root = doc.RootElement;
|
||||||
|
|
||||||
|
return new Config
|
||||||
|
{
|
||||||
|
AE = new Config.AEConfig
|
||||||
|
{
|
||||||
|
SampleRate = root.GetProperty("ae").GetProperty("sample_rate").GetInt32(),
|
||||||
|
BaseChunkSize = root.GetProperty("ae").GetProperty("base_chunk_size").GetInt32()
|
||||||
|
},
|
||||||
|
TTL = new Config.TTLConfig
|
||||||
|
{
|
||||||
|
ChunkCompressFactor = root.GetProperty("ttl").GetProperty("chunk_compress_factor").GetInt32(),
|
||||||
|
LatentDim = root.GetProperty("ttl").GetProperty("latent_dim").GetInt32()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static UnicodeProcessor LoadTextProcessor(string onnxDir)
|
||||||
|
{
|
||||||
|
var unicodeIndexerPath = Path.Combine(onnxDir, "unicode_indexer.json");
|
||||||
|
return new UnicodeProcessor(unicodeIndexerPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Voice style loading
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public static Style LoadVoiceStyle(List<string> voiceStylePaths, bool verbose = false)
|
||||||
|
{
|
||||||
|
int bsz = voiceStylePaths.Count;
|
||||||
|
|
||||||
|
// Read first file to get dimensions
|
||||||
|
var firstJson = File.ReadAllText(voiceStylePaths[0]);
|
||||||
|
using (var firstDoc = JsonDocument.Parse(firstJson))
|
||||||
|
{
|
||||||
|
var firstRoot = firstDoc.RootElement;
|
||||||
|
|
||||||
|
var ttlDims = ParseInt64Array(firstRoot.GetProperty("style_ttl").GetProperty("dims"));
|
||||||
|
var dpDims = ParseInt64Array(firstRoot.GetProperty("style_dp").GetProperty("dims"));
|
||||||
|
|
||||||
|
long ttlDim1 = ttlDims[1];
|
||||||
|
long ttlDim2 = ttlDims[2];
|
||||||
|
long dpDim1 = dpDims[1];
|
||||||
|
long dpDim2 = dpDims[2];
|
||||||
|
|
||||||
|
// Pre-allocate arrays with full batch size
|
||||||
|
int ttlSize = (int)(bsz * ttlDim1 * ttlDim2);
|
||||||
|
int dpSize = (int)(bsz * dpDim1 * dpDim2);
|
||||||
|
var ttlFlat = new float[ttlSize];
|
||||||
|
var dpFlat = new float[dpSize];
|
||||||
|
|
||||||
|
// Fill in the data
|
||||||
|
for (int i = 0; i < bsz; i++)
|
||||||
|
{
|
||||||
|
var json = File.ReadAllText(voiceStylePaths[i]);
|
||||||
|
using (var doc = JsonDocument.Parse(json))
|
||||||
|
{
|
||||||
|
var root = doc.RootElement;
|
||||||
|
|
||||||
|
// Flatten data
|
||||||
|
var ttlData3D = ParseFloat3DArray(root.GetProperty("style_ttl").GetProperty("data"));
|
||||||
|
var ttlDataFlat = new List<float>();
|
||||||
|
foreach (var batch in ttlData3D)
|
||||||
|
{
|
||||||
|
foreach (var row in batch)
|
||||||
|
{
|
||||||
|
ttlDataFlat.AddRange(row);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var dpData3D = ParseFloat3DArray(root.GetProperty("style_dp").GetProperty("data"));
|
||||||
|
var dpDataFlat = new List<float>();
|
||||||
|
foreach (var batch in dpData3D)
|
||||||
|
{
|
||||||
|
foreach (var row in batch)
|
||||||
|
{
|
||||||
|
dpDataFlat.AddRange(row);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy to pre-allocated array
|
||||||
|
int ttlOffset = (int)(i * ttlDim1 * ttlDim2);
|
||||||
|
ttlDataFlat.CopyTo(ttlFlat, ttlOffset);
|
||||||
|
|
||||||
|
int dpOffset = (int)(i * dpDim1 * dpDim2);
|
||||||
|
dpDataFlat.CopyTo(dpFlat, dpOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
var ttlShape = new long[] { bsz, ttlDim1, ttlDim2 };
|
||||||
|
var dpShape = new long[] { bsz, dpDim1, dpDim2 };
|
||||||
|
|
||||||
|
if (verbose)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Loaded {bsz} voice styles");
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Style(ttlFlat, ttlShape, dpFlat, dpShape);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static float[][][] ParseFloat3DArray(JsonElement element)
|
||||||
|
{
|
||||||
|
var result = new List<float[][]>();
|
||||||
|
foreach (var batch in element.EnumerateArray())
|
||||||
|
{
|
||||||
|
var batch2D = new List<float[]>();
|
||||||
|
foreach (var row in batch.EnumerateArray())
|
||||||
|
{
|
||||||
|
var rowData = new List<float>();
|
||||||
|
foreach (var val in row.EnumerateArray())
|
||||||
|
{
|
||||||
|
rowData.Add(val.GetSingle());
|
||||||
|
}
|
||||||
|
batch2D.Add(rowData.ToArray());
|
||||||
|
}
|
||||||
|
result.Add(batch2D.ToArray());
|
||||||
|
}
|
||||||
|
return result.ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static long[] ParseInt64Array(JsonElement element)
|
||||||
|
{
|
||||||
|
var result = new List<long>();
|
||||||
|
foreach (var val in element.EnumerateArray())
|
||||||
|
{
|
||||||
|
result.Add(val.GetInt64());
|
||||||
|
}
|
||||||
|
return result.ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// TextToSpeech loading
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public static TextToSpeech LoadTextToSpeech(string onnxDir, bool useGpu = false)
|
||||||
|
{
|
||||||
|
var opts = new SessionOptions();
|
||||||
|
if (useGpu)
|
||||||
|
{
|
||||||
|
throw new NotImplementedException("GPU mode is not supported yet");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine("Using CPU for inference");
|
||||||
|
}
|
||||||
|
|
||||||
|
var cfgs = LoadCfgs(onnxDir);
|
||||||
|
var (dpOrt, textEncOrt, vectorEstOrt, vocoderOrt) = LoadOnnxAll(onnxDir, opts);
|
||||||
|
var textProcessor = LoadTextProcessor(onnxDir);
|
||||||
|
|
||||||
|
return new TextToSpeech(cfgs, textProcessor, dpOrt, textEncOrt, vectorEstOrt, vocoderOrt);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// WAV file writing
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public static void WriteWavFile(string filename, float[] audioData, int sampleRate)
|
||||||
|
{
|
||||||
|
using (var writer = new BinaryWriter(File.Open(filename, FileMode.Create)))
|
||||||
|
{
|
||||||
|
int numChannels = 1;
|
||||||
|
int bitsPerSample = 16;
|
||||||
|
int byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
||||||
|
short blockAlign = (short)(numChannels * bitsPerSample / 8);
|
||||||
|
int dataSize = audioData.Length * bitsPerSample / 8;
|
||||||
|
|
||||||
|
// RIFF header
|
||||||
|
writer.Write(Encoding.ASCII.GetBytes("RIFF"));
|
||||||
|
writer.Write(36 + dataSize);
|
||||||
|
writer.Write(Encoding.ASCII.GetBytes("WAVE"));
|
||||||
|
|
||||||
|
// fmt chunk
|
||||||
|
writer.Write(Encoding.ASCII.GetBytes("fmt "));
|
||||||
|
writer.Write(16); // fmt chunk size
|
||||||
|
writer.Write((short)1); // audio format (PCM)
|
||||||
|
writer.Write((short)numChannels);
|
||||||
|
writer.Write(sampleRate);
|
||||||
|
writer.Write(byteRate);
|
||||||
|
writer.Write(blockAlign);
|
||||||
|
writer.Write((short)bitsPerSample);
|
||||||
|
|
||||||
|
// data chunk
|
||||||
|
writer.Write(Encoding.ASCII.GetBytes("data"));
|
||||||
|
writer.Write(dataSize);
|
||||||
|
|
||||||
|
// Write audio data
|
||||||
|
foreach (var sample in audioData)
|
||||||
|
{
|
||||||
|
float clamped = Math.Max(-1.0f, Math.Min(1.0f, sample));
|
||||||
|
short intSample = (short)(clamped * 32767);
|
||||||
|
writer.Write(intSample);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Tensor conversion utilities
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public static DenseTensor<float> ArrayToTensor(float[][][] array, long[] dims)
|
||||||
|
{
|
||||||
|
var flat = new List<float>();
|
||||||
|
foreach (var batch in array)
|
||||||
|
{
|
||||||
|
foreach (var row in batch)
|
||||||
|
{
|
||||||
|
flat.AddRange(row);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new DenseTensor<float>(flat.ToArray(), dims.Select(x => (int)x).ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DenseTensor<long> IntArrayToTensor(long[][] array, long[] dims)
|
||||||
|
{
|
||||||
|
var flat = new List<long>();
|
||||||
|
foreach (var row in array)
|
||||||
|
{
|
||||||
|
flat.AddRange(row);
|
||||||
|
}
|
||||||
|
return new DenseTensor<long>(flat.ToArray(), dims.Select(x => (int)x).ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Timer utility
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public static T Timer<T>(string name, Func<T> func)
|
||||||
|
{
|
||||||
|
var start = DateTime.Now;
|
||||||
|
Console.WriteLine($"{name}...");
|
||||||
|
var result = func();
|
||||||
|
var elapsed = (DateTime.Now - start).TotalSeconds;
|
||||||
|
Console.WriteLine($" -> {name} completed in {elapsed:F2} sec");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string SanitizeFilename(string text, int maxLen)
|
||||||
|
{
|
||||||
|
var result = new StringBuilder();
|
||||||
|
int count = 0;
|
||||||
|
foreach (char c in text)
|
||||||
|
{
|
||||||
|
if (count >= maxLen) break;
|
||||||
|
if (char.IsLetterOrDigit(c))
|
||||||
|
{
|
||||||
|
result.Append(c);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result.Append('_');
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return result.ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Chunk text
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
public static List<string> ChunkText(string text, int maxLen = 300)
|
||||||
|
{
|
||||||
|
var chunks = new List<string>();
|
||||||
|
|
||||||
|
// Split by paragraph (two or more newlines)
|
||||||
|
var paragraphRegex = new Regex(@"\n\s*\n+");
|
||||||
|
var paragraphs = paragraphRegex.Split(text.Trim())
|
||||||
|
.Select(p => p.Trim())
|
||||||
|
.Where(p => !string.IsNullOrEmpty(p))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
// Split by sentence boundaries, excluding abbreviations
|
||||||
|
var sentenceRegex = new Regex(@"(?<!Mr\.|Mrs\.|Ms\.|Dr\.|Prof\.|Sr\.|Jr\.|Ph\.D\.|etc\.|e\.g\.|i\.e\.|vs\.|Inc\.|Ltd\.|Co\.|Corp\.|St\.|Ave\.|Blvd\.)(?<!\b[A-Z]\.)(?<=[.!?])\s+");
|
||||||
|
|
||||||
|
foreach (var paragraph in paragraphs)
|
||||||
|
{
|
||||||
|
var sentences = sentenceRegex.Split(paragraph);
|
||||||
|
string currentChunk = "";
|
||||||
|
|
||||||
|
foreach (var sentence in sentences)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrEmpty(sentence)) continue;
|
||||||
|
|
||||||
|
if (currentChunk.Length + sentence.Length + 1 <= maxLen)
|
||||||
|
{
|
||||||
|
if (!string.IsNullOrEmpty(currentChunk))
|
||||||
|
{
|
||||||
|
currentChunk += " ";
|
||||||
|
}
|
||||||
|
currentChunk += sentence;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!string.IsNullOrEmpty(currentChunk))
|
||||||
|
{
|
||||||
|
chunks.Add(currentChunk.Trim());
|
||||||
|
}
|
||||||
|
currentChunk = sentence;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!string.IsNullOrEmpty(currentChunk))
|
||||||
|
{
|
||||||
|
chunks.Add(currentChunk.Trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no chunks were created, return the original text
|
||||||
|
if (chunks.Count == 0)
|
||||||
|
{
|
||||||
|
chunks.Add(text.Trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
19
HMI/SubProject/tts/Program.cs
Normal file
19
HMI/SubProject/tts/Program.cs
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
using System;
|
||||||
|
using System.Windows.Forms;
|
||||||
|
|
||||||
|
namespace Supertonic.WinForms
|
||||||
|
{
|
||||||
|
static class Program
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// The main entry point for the application.
|
||||||
|
/// </summary>
|
||||||
|
[STAThread]
|
||||||
|
static void Main()
|
||||||
|
{
|
||||||
|
Application.EnableVisualStyles();
|
||||||
|
Application.SetCompatibleTextRenderingDefault(false);
|
||||||
|
Application.Run(new fMain());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
17
HMI/SubProject/tts/Properties/AssemblyInfo.cs
Normal file
17
HMI/SubProject/tts/Properties/AssemblyInfo.cs
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
using System.Reflection;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
[assembly: AssemblyTitle("Supertonic.WinForms")]
|
||||||
|
[assembly: AssemblyDescription("")]
|
||||||
|
[assembly: AssemblyConfiguration("")]
|
||||||
|
[assembly: AssemblyCompany("")]
|
||||||
|
[assembly: AssemblyProduct("Supertonic.WinForms")]
|
||||||
|
[assembly: AssemblyCopyright("Copyright © 2026")]
|
||||||
|
[assembly: AssemblyTrademark("")]
|
||||||
|
[assembly: AssemblyCulture("")]
|
||||||
|
|
||||||
|
[assembly: ComVisible(false)]
|
||||||
|
[assembly: Guid("bd3e8373-c40c-4f7f-aa18-6990f1cfd21a")]
|
||||||
|
|
||||||
|
[assembly: AssemblyVersion("1.0.0.0")]
|
||||||
|
[assembly: AssemblyFileVersion("1.0.0.0")]
|
||||||
63
HMI/SubProject/tts/Properties/Resources.Designer.cs
generated
Normal file
63
HMI/SubProject/tts/Properties/Resources.Designer.cs
generated
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// <auto-generated>
|
||||||
|
// 이 코드는 도구를 사용하여 생성되었습니다.
|
||||||
|
// 런타임 버전:4.0.30319.42000
|
||||||
|
//
|
||||||
|
// 파일 내용을 변경하면 잘못된 동작이 발생할 수 있으며, 코드를 다시 생성하면
|
||||||
|
// 이러한 변경 내용이 손실됩니다.
|
||||||
|
// </auto-generated>
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Supertonic.WinForms.Properties {
|
||||||
|
using System;
|
||||||
|
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 지역화된 문자열 등을 찾기 위한 강력한 형식의 리소스 클래스입니다.
|
||||||
|
/// </summary>
|
||||||
|
// 이 클래스는 ResGen 또는 Visual Studio와 같은 도구를 통해 StronglyTypedResourceBuilder
|
||||||
|
// 클래스에서 자동으로 생성되었습니다.
|
||||||
|
// 멤버를 추가하거나 제거하려면 .ResX 파일을 편집한 다음 /str 옵션을 사용하여 ResGen을
|
||||||
|
// 다시 실행하거나 VS 프로젝트를 다시 빌드하십시오.
|
||||||
|
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
|
||||||
|
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
||||||
|
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
|
||||||
|
internal class Resources {
|
||||||
|
|
||||||
|
private static global::System.Resources.ResourceManager resourceMan;
|
||||||
|
|
||||||
|
private static global::System.Globalization.CultureInfo resourceCulture;
|
||||||
|
|
||||||
|
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
|
||||||
|
internal Resources() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 이 클래스에서 사용하는 캐시된 ResourceManager 인스턴스를 반환합니다.
|
||||||
|
/// </summary>
|
||||||
|
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
|
||||||
|
internal static global::System.Resources.ResourceManager ResourceManager {
|
||||||
|
get {
|
||||||
|
if (object.ReferenceEquals(resourceMan, null)) {
|
||||||
|
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Supertonic.WinForms.Properties.Resources", typeof(Resources).Assembly);
|
||||||
|
resourceMan = temp;
|
||||||
|
}
|
||||||
|
return resourceMan;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 이 강력한 형식의 리소스 클래스를 사용하여 모든 리소스 조회에 대한 현재 스레드의 CurrentUICulture
|
||||||
|
/// 속성을 재정의합니다.
|
||||||
|
/// </summary>
|
||||||
|
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
|
||||||
|
internal static global::System.Globalization.CultureInfo Culture {
|
||||||
|
get {
|
||||||
|
return resourceCulture;
|
||||||
|
}
|
||||||
|
set {
|
||||||
|
resourceCulture = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
101
HMI/SubProject/tts/Properties/Resources.resx
Normal file
101
HMI/SubProject/tts/Properties/Resources.resx
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<root>
|
||||||
|
<!--
|
||||||
|
Microsoft ResX Schema
|
||||||
|
|
||||||
|
Version 1.3
|
||||||
|
|
||||||
|
The primary goals of this format is to allow a simple XML format
|
||||||
|
that is mostly human readable. The generation and parsing of the
|
||||||
|
various data types are done through the TypeConverter classes
|
||||||
|
associated with the data types.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
... ado.net/XML headers & schema ...
|
||||||
|
<resheader name="resmimetype">text/microsoft-resx</resheader>
|
||||||
|
<resheader name="version">1.3</resheader>
|
||||||
|
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
|
||||||
|
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
|
||||||
|
<data name="Name1">this is my long string</data>
|
||||||
|
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
|
||||||
|
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
|
||||||
|
[base64 mime encoded serialized .NET Framework object]
|
||||||
|
</data>
|
||||||
|
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
|
||||||
|
[base64 mime encoded string representing a byte array form of the .NET Framework object]
|
||||||
|
</data>
|
||||||
|
|
||||||
|
There are any number of "resheader" rows that contain simple
|
||||||
|
name/value pairs.
|
||||||
|
|
||||||
|
Each data row contains a name, and value. The row also contains a
|
||||||
|
type or mimetype. Type corresponds to a .NET class that support
|
||||||
|
text/value conversion through the TypeConverter architecture.
|
||||||
|
Classes that don't support this are serialized and stored with the
|
||||||
|
mimetype set.
|
||||||
|
|
||||||
|
The mimetype is used for serialized objects, and tells the
|
||||||
|
ResXResourceReader how to depersist the object. This is currently not
|
||||||
|
extensible. For a given mimetype the value must be set accordingly:
|
||||||
|
|
||||||
|
Note - application/x-microsoft.net.object.binary.base64 is the format
|
||||||
|
that the ResXResourceWriter will generate, however the reader can
|
||||||
|
read any of the formats listed below.
|
||||||
|
|
||||||
|
mimetype: application/x-microsoft.net.object.binary.base64
|
||||||
|
value : The object must be serialized with
|
||||||
|
: System.Serialization.Formatters.Binary.BinaryFormatter
|
||||||
|
: and then encoded with base64 encoding.
|
||||||
|
|
||||||
|
mimetype: application/x-microsoft.net.object.soap.base64
|
||||||
|
value : The object must be serialized with
|
||||||
|
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
|
||||||
|
: and then encoded with base64 encoding.
|
||||||
|
|
||||||
|
mimetype: application/x-microsoft.net.object.bytearray.base64
|
||||||
|
value : The object must be serialized into a byte array
|
||||||
|
: using a System.ComponentModel.TypeConverter
|
||||||
|
: and then encoded with base64 encoding.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
|
||||||
|
<xsd:element name="root" msdata:IsDataSet="true">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:choice maxOccurs="unbounded">
|
||||||
|
<xsd:element name="data">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||||
|
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" type="xsd:string" msdata:Ordinal="1" />
|
||||||
|
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
|
||||||
|
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="resheader">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" type="xsd:string" use="required" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:choice>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:schema>
|
||||||
|
<resheader name="resmimetype">
|
||||||
|
<value>text/microsoft-resx</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="version">
|
||||||
|
<value>1.3</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="reader">
|
||||||
|
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.3500.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="writer">
|
||||||
|
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.3500.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||||
|
</resheader>
|
||||||
|
</root>
|
||||||
26
HMI/SubProject/tts/Properties/Settings.Designer.cs
generated
Normal file
26
HMI/SubProject/tts/Properties/Settings.Designer.cs
generated
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// <auto-generated>
|
||||||
|
// 이 코드는 도구를 사용하여 생성되었습니다.
|
||||||
|
// 런타임 버전:4.0.30319.42000
|
||||||
|
//
|
||||||
|
// 파일 내용을 변경하면 잘못된 동작이 발생할 수 있으며, 코드를 다시 생성하면
|
||||||
|
// 이러한 변경 내용이 손실됩니다.
|
||||||
|
// </auto-generated>
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Supertonic.WinForms.Properties {
|
||||||
|
|
||||||
|
|
||||||
|
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
|
||||||
|
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "15.9.0.0")]
|
||||||
|
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
|
||||||
|
|
||||||
|
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
|
||||||
|
|
||||||
|
public static Settings Default {
|
||||||
|
get {
|
||||||
|
return defaultInstance;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
6
HMI/SubProject/tts/Properties/Settings.settings
Normal file
6
HMI/SubProject/tts/Properties/Settings.settings
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
<?xml version='1.0' encoding='utf-8'?>
|
||||||
|
<SettingsFile xmlns="http://schemas.microsoft.com/VisualStudio/2004/01/settings" CurrentProfile="(Default)">
|
||||||
|
<Profiles>
|
||||||
|
<Profile Name="(Default)" />
|
||||||
|
</Profiles>
|
||||||
|
</SettingsFile>
|
||||||
91
HMI/SubProject/tts/Supertonic.WinForms.csproj
Normal file
91
HMI/SubProject/tts/Supertonic.WinForms.csproj
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||||
|
<PropertyGroup>
|
||||||
|
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||||
|
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||||
|
<ProjectGuid>{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}</ProjectGuid>
|
||||||
|
<OutputType>WinExe</OutputType>
|
||||||
|
<RootNamespace>Supertonic.WinForms</RootNamespace>
|
||||||
|
<AssemblyName>Supertonic.WinForms</AssemblyName>
|
||||||
|
<TargetFrameworkVersion>v4.8</TargetFrameworkVersion>
|
||||||
|
<FileAlignment>512</FileAlignment>
|
||||||
|
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||||
|
<Deterministic>true</Deterministic>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||||
|
<PlatformTarget>x64</PlatformTarget>
|
||||||
|
<DebugSymbols>true</DebugSymbols>
|
||||||
|
<DebugType>full</DebugType>
|
||||||
|
<Optimize>false</Optimize>
|
||||||
|
<OutputPath>bin\Debug\</OutputPath>
|
||||||
|
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||||
|
<ErrorReport>prompt</ErrorReport>
|
||||||
|
<WarningLevel>4</WarningLevel>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||||
|
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||||
|
<DebugType>pdbonly</DebugType>
|
||||||
|
<Optimize>true</Optimize>
|
||||||
|
<OutputPath>bin\Release\</OutputPath>
|
||||||
|
<DefineConstants>TRACE</DefineConstants>
|
||||||
|
<ErrorReport>prompt</ErrorReport>
|
||||||
|
<WarningLevel>4</WarningLevel>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|Win32'">
|
||||||
|
<OutputPath>bin\Debug\</OutputPath>
|
||||||
|
</PropertyGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<Reference Include="System" />
|
||||||
|
<Reference Include="System.Core" />
|
||||||
|
<Reference Include="System.Xml.Linq" />
|
||||||
|
<Reference Include="System.Data.DataSetExtensions" />
|
||||||
|
<Reference Include="Microsoft.CSharp" />
|
||||||
|
<Reference Include="System.Data" />
|
||||||
|
<Reference Include="System.Deployment" />
|
||||||
|
<Reference Include="System.Drawing" />
|
||||||
|
<Reference Include="System.Net.Http" />
|
||||||
|
<Reference Include="System.Windows.Forms" />
|
||||||
|
<Reference Include="System.Xml" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<Compile Include="fMain.cs">
|
||||||
|
<SubType>Form</SubType>
|
||||||
|
</Compile>
|
||||||
|
<Compile Include="fMain.Designer.cs">
|
||||||
|
<DependentUpon>fMain.cs</DependentUpon>
|
||||||
|
</Compile>
|
||||||
|
<Compile Include="Helper.cs" />
|
||||||
|
<Compile Include="Program.cs" />
|
||||||
|
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||||
|
<Compile Include="Properties\Resources.Designer.cs">
|
||||||
|
<AutoGen>True</AutoGen>
|
||||||
|
<DesignTime>True</DesignTime>
|
||||||
|
<DependentUpon>Resources.resx</DependentUpon>
|
||||||
|
</Compile>
|
||||||
|
<Compile Include="Properties\Settings.Designer.cs">
|
||||||
|
<AutoGen>True</AutoGen>
|
||||||
|
<DesignTimeSharedInput>True</DesignTimeSharedInput>
|
||||||
|
<DependentUpon>Settings.settings</DependentUpon>
|
||||||
|
</Compile>
|
||||||
|
<EmbeddedResource Include="fMain.resx">
|
||||||
|
<DependentUpon>fMain.cs</DependentUpon>
|
||||||
|
</EmbeddedResource>
|
||||||
|
<EmbeddedResource Include="Properties\Resources.resx">
|
||||||
|
<Generator>ResXFileCodeGenerator</Generator>
|
||||||
|
<LastGenOutput>Resources.Designer.cs</LastGenOutput>
|
||||||
|
</EmbeddedResource>
|
||||||
|
<None Include="App.config" />
|
||||||
|
<None Include="Properties\Settings.settings">
|
||||||
|
<Generator>SettingsSingleFileGenerator</Generator>
|
||||||
|
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
|
||||||
|
</None>
|
||||||
|
<PackageReference Include="Microsoft.ML.OnnxRuntime">
|
||||||
|
<Version>1.24.1</Version>
|
||||||
|
</PackageReference>
|
||||||
|
<PackageReference Include="System.Text.Json">
|
||||||
|
<Version>10.0.2</Version>
|
||||||
|
</PackageReference>
|
||||||
|
</ItemGroup>
|
||||||
|
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||||
|
</Project>
|
||||||
25
HMI/SubProject/tts/Supertonic.WinForms.sln
Normal file
25
HMI/SubProject/tts/Supertonic.WinForms.sln
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio Express 15 for Windows Desktop
|
||||||
|
VisualStudioVersion = 15.0.36324.19
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Supertonic.WinForms", "Supertonic.WinForms.csproj", "{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|Any CPU = Debug|Any CPU
|
||||||
|
Release|Any CPU = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{BD3E8373-C40C-4F7F-AA18-6990F1CFD21A}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||||
|
SolutionGuid = {1DFF2850-1C17-454F-BB29-E08B604073C4}
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
||||||
187
HMI/SubProject/tts/fMain.Designer.cs
generated
Normal file
187
HMI/SubProject/tts/fMain.Designer.cs
generated
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
namespace Supertonic.WinForms
|
||||||
|
{
|
||||||
|
partial class fMain
|
||||||
|
{
|
||||||
|
private System.ComponentModel.IContainer components = null;
|
||||||
|
|
||||||
|
protected override void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (disposing && (components != null))
|
||||||
|
{
|
||||||
|
components.Dispose();
|
||||||
|
}
|
||||||
|
base.Dispose(disposing);
|
||||||
|
}
|
||||||
|
|
||||||
|
#region Windows Form Designer generated code
|
||||||
|
|
||||||
|
private void InitializeComponent()
|
||||||
|
{
|
||||||
|
this.txtInput = new System.Windows.Forms.TextBox();
|
||||||
|
this.btnGenerate = new System.Windows.Forms.Button();
|
||||||
|
this.cmbLang = new System.Windows.Forms.ComboBox();
|
||||||
|
this.txtStylePath = new System.Windows.Forms.TextBox();
|
||||||
|
this.numSteps = new System.Windows.Forms.NumericUpDown();
|
||||||
|
this.numSpeed = new System.Windows.Forms.NumericUpDown();
|
||||||
|
this.lblText = new System.Windows.Forms.Label();
|
||||||
|
this.lblLang = new System.Windows.Forms.Label();
|
||||||
|
this.lblStyle = new System.Windows.Forms.Label();
|
||||||
|
this.lblSteps = new System.Windows.Forms.Label();
|
||||||
|
this.lblSpeed = new System.Windows.Forms.Label();
|
||||||
|
this.txtLog = new System.Windows.Forms.TextBox();
|
||||||
|
((System.ComponentModel.ISupportInitialize)(this.numSteps)).BeginInit();
|
||||||
|
((System.ComponentModel.ISupportInitialize)(this.numSpeed)).BeginInit();
|
||||||
|
this.SuspendLayout();
|
||||||
|
//
|
||||||
|
// txtInput
|
||||||
|
//
|
||||||
|
this.txtInput.Location = new System.Drawing.Point(12, 29);
|
||||||
|
this.txtInput.Multiline = true;
|
||||||
|
this.txtInput.Name = "txtInput";
|
||||||
|
this.txtInput.Size = new System.Drawing.Size(460, 60);
|
||||||
|
this.txtInput.TabIndex = 0;
|
||||||
|
this.txtInput.Text = "This morning, I took a walk in the park.";
|
||||||
|
//
|
||||||
|
// btnGenerate
|
||||||
|
//
|
||||||
|
this.btnGenerate.Location = new System.Drawing.Point(372, 169);
|
||||||
|
this.btnGenerate.Name = "btnGenerate";
|
||||||
|
this.btnGenerate.Size = new System.Drawing.Size(100, 30);
|
||||||
|
this.btnGenerate.TabIndex = 1;
|
||||||
|
this.btnGenerate.Text = "Generate TTS";
|
||||||
|
this.btnGenerate.UseVisualStyleBackColor = true;
|
||||||
|
this.btnGenerate.Click += new System.EventHandler(this.btnGenerate_Click);
|
||||||
|
//
|
||||||
|
// cmbLang
|
||||||
|
//
|
||||||
|
this.cmbLang.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||||
|
this.cmbLang.FormattingEnabled = true;
|
||||||
|
this.cmbLang.Location = new System.Drawing.Point(12, 114);
|
||||||
|
this.cmbLang.Name = "cmbLang";
|
||||||
|
this.cmbLang.Size = new System.Drawing.Size(80, 21);
|
||||||
|
this.cmbLang.TabIndex = 2;
|
||||||
|
//
|
||||||
|
// txtStylePath
|
||||||
|
//
|
||||||
|
this.txtStylePath.Location = new System.Drawing.Point(110, 114);
|
||||||
|
this.txtStylePath.Name = "txtStylePath";
|
||||||
|
this.txtStylePath.Size = new System.Drawing.Size(362, 20);
|
||||||
|
this.txtStylePath.TabIndex = 3;
|
||||||
|
this.txtStylePath.Text = "assets/voice_styles/M1.json";
|
||||||
|
//
|
||||||
|
// numSteps
|
||||||
|
//
|
||||||
|
this.numSteps.Location = new System.Drawing.Point(12, 169);
|
||||||
|
this.numSteps.Name = "numSteps";
|
||||||
|
this.numSteps.Size = new System.Drawing.Size(80, 20);
|
||||||
|
this.numSteps.TabIndex = 4;
|
||||||
|
this.numSteps.Value = new decimal(new int[] { 5, 0, 0, 0 });
|
||||||
|
//
|
||||||
|
// numSpeed
|
||||||
|
//
|
||||||
|
this.numSpeed.DecimalPlaces = 2;
|
||||||
|
this.numSpeed.Increment = new decimal(new int[] { 5, 0, 0, 131072 });
|
||||||
|
this.numSpeed.Location = new System.Drawing.Point(110, 169);
|
||||||
|
this.numSpeed.Name = "numSpeed";
|
||||||
|
this.numSpeed.Size = new System.Drawing.Size(80, 20);
|
||||||
|
this.numSpeed.TabIndex = 5;
|
||||||
|
this.numSpeed.Value = new decimal(new int[] { 105, 0, 0, 131072 });
|
||||||
|
//
|
||||||
|
// lblText
|
||||||
|
//
|
||||||
|
this.lblText.AutoSize = true;
|
||||||
|
this.lblText.Location = new System.Drawing.Point(12, 13);
|
||||||
|
this.lblText.Name = "lblText";
|
||||||
|
this.lblText.Size = new System.Drawing.Size(28, 13);
|
||||||
|
this.lblText.TabIndex = 6;
|
||||||
|
this.lblText.Text = "Text";
|
||||||
|
//
|
||||||
|
// lblLang
|
||||||
|
//
|
||||||
|
this.lblLang.AutoSize = true;
|
||||||
|
this.lblLang.Location = new System.Drawing.Point(12, 98);
|
||||||
|
this.lblLang.Name = "lblLang";
|
||||||
|
this.lblLang.Size = new System.Drawing.Size(55, 13);
|
||||||
|
this.lblLang.TabIndex = 7;
|
||||||
|
this.lblLang.Text = "Language";
|
||||||
|
//
|
||||||
|
// lblStyle
|
||||||
|
//
|
||||||
|
this.lblStyle.AutoSize = true;
|
||||||
|
this.lblStyle.Location = new System.Drawing.Point(110, 98);
|
||||||
|
this.lblStyle.Name = "lblStyle";
|
||||||
|
this.lblStyle.Size = new System.Drawing.Size(87, 13);
|
||||||
|
this.lblStyle.TabIndex = 8;
|
||||||
|
this.lblStyle.Text = "Voice Style Path";
|
||||||
|
//
|
||||||
|
// lblSteps
|
||||||
|
//
|
||||||
|
this.lblSteps.AutoSize = true;
|
||||||
|
this.lblSteps.Location = new System.Drawing.Point(12, 153);
|
||||||
|
this.lblSteps.Name = "lblSteps";
|
||||||
|
this.lblSteps.Size = new System.Drawing.Size(61, 13);
|
||||||
|
this.lblSteps.TabIndex = 9;
|
||||||
|
this.lblSteps.Text = "Total Steps";
|
||||||
|
//
|
||||||
|
// lblSpeed
|
||||||
|
//
|
||||||
|
this.lblSpeed.AutoSize = true;
|
||||||
|
this.lblSpeed.Location = new System.Drawing.Point(110, 153);
|
||||||
|
this.lblSpeed.Name = "lblSpeed";
|
||||||
|
this.lblSpeed.Size = new System.Drawing.Size(38, 13);
|
||||||
|
this.lblSpeed.TabIndex = 10;
|
||||||
|
this.lblSpeed.Text = "Speed";
|
||||||
|
//
|
||||||
|
// txtLog
|
||||||
|
//
|
||||||
|
this.txtLog.Location = new System.Drawing.Point(12, 214);
|
||||||
|
this.txtLog.Multiline = true;
|
||||||
|
this.txtLog.Name = "txtLog";
|
||||||
|
this.txtLog.ReadOnly = true;
|
||||||
|
this.txtLog.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
|
||||||
|
this.txtLog.Size = new System.Drawing.Size(460, 150);
|
||||||
|
this.txtLog.TabIndex = 11;
|
||||||
|
//
|
||||||
|
// fMain
|
||||||
|
//
|
||||||
|
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
|
||||||
|
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
|
||||||
|
this.ClientSize = new System.Drawing.Size(484, 376);
|
||||||
|
this.Controls.Add(this.txtLog);
|
||||||
|
this.Controls.Add(this.lblSpeed);
|
||||||
|
this.Controls.Add(this.lblSteps);
|
||||||
|
this.Controls.Add(this.lblStyle);
|
||||||
|
this.Controls.Add(this.lblLang);
|
||||||
|
this.Controls.Add(this.lblText);
|
||||||
|
this.Controls.Add(this.numSpeed);
|
||||||
|
this.Controls.Add(this.numSteps);
|
||||||
|
this.Controls.Add(this.txtStylePath);
|
||||||
|
this.Controls.Add(this.cmbLang);
|
||||||
|
this.Controls.Add(this.btnGenerate);
|
||||||
|
this.Controls.Add(this.txtInput);
|
||||||
|
this.Name = "fMain";
|
||||||
|
this.Text = "Supertonic TTS (WinForms 4.8)";
|
||||||
|
this.Load += new System.EventHandler(this.fMain_Load);
|
||||||
|
((System.ComponentModel.ISupportInitialize)(this.numSteps)).EndInit();
|
||||||
|
((System.ComponentModel.ISupportInitialize)(this.numSpeed)).EndInit();
|
||||||
|
this.ResumeLayout(false);
|
||||||
|
this.PerformLayout();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
private System.Windows.Forms.TextBox txtInput;
|
||||||
|
private System.Windows.Forms.Button btnGenerate;
|
||||||
|
private System.Windows.Forms.ComboBox cmbLang;
|
||||||
|
private System.Windows.Forms.TextBox txtStylePath;
|
||||||
|
private System.Windows.Forms.NumericUpDown numSteps;
|
||||||
|
private System.Windows.Forms.NumericUpDown numSpeed;
|
||||||
|
private System.Windows.Forms.Label lblText;
|
||||||
|
private System.Windows.Forms.Label lblLang;
|
||||||
|
private System.Windows.Forms.Label lblStyle;
|
||||||
|
private System.Windows.Forms.Label lblSteps;
|
||||||
|
private System.Windows.Forms.Label lblSpeed;
|
||||||
|
private System.Windows.Forms.TextBox txtLog;
|
||||||
|
}
|
||||||
|
}
|
||||||
89
HMI/SubProject/tts/fMain.cs
Normal file
89
HMI/SubProject/tts/fMain.cs
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Drawing;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Windows.Forms;
|
||||||
|
|
||||||
|
namespace Supertonic.WinForms
|
||||||
|
{
|
||||||
|
public partial class fMain : Form
|
||||||
|
{
|
||||||
|
private TextToSpeech _tts;
|
||||||
|
|
||||||
|
public fMain()
|
||||||
|
{
|
||||||
|
InitializeComponent();
|
||||||
|
}
|
||||||
|
|
||||||
|
private async void btnGenerate_Click(object sender, EventArgs e)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
string text = txtInput.Text;
|
||||||
|
string lang = cmbLang.SelectedItem?.ToString() ?? "en";
|
||||||
|
string stylePath = txtStylePath.Text;
|
||||||
|
int totalStep = (int)numSteps.Value;
|
||||||
|
float speed = (float)numSpeed.Value;
|
||||||
|
|
||||||
|
if (string.IsNullOrWhiteSpace(text))
|
||||||
|
{
|
||||||
|
MessageBox.Show("Please enter text.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_tts == null)
|
||||||
|
{
|
||||||
|
Log("Loading TTS model...");
|
||||||
|
string onnxDir = "assets/onnx"; // This should be updated if assets are moved
|
||||||
|
_tts = await System.Threading.Tasks.Task.Run(() => Helper.LoadTextToSpeech(onnxDir, false));
|
||||||
|
Log("TTS model loaded.");
|
||||||
|
}
|
||||||
|
|
||||||
|
Log($"Generating speech: \"{text}\" ({lang})");
|
||||||
|
|
||||||
|
var style = Helper.LoadVoiceStyle(new List<string> { stylePath }, true);
|
||||||
|
|
||||||
|
var result = await System.Threading.Tasks.Task.Run(() => _tts.Call(text, lang, style, totalStep, speed));
|
||||||
|
|
||||||
|
string saveDir = "results";
|
||||||
|
if (!Directory.Exists(saveDir)) Directory.CreateDirectory(saveDir);
|
||||||
|
|
||||||
|
string fname = $"{Helper.SanitizeFilename(text, 20)}_{DateTime.Now:HHmmss}.wav";
|
||||||
|
string outputPath = Path.Combine(saveDir, fname);
|
||||||
|
|
||||||
|
Helper.WriteWavFile(outputPath, result.wav, _tts.SampleRate);
|
||||||
|
Log($"Saved: {outputPath}");
|
||||||
|
|
||||||
|
MessageBox.Show($"Synthesis completed successfully!\nSaved to: {outputPath}");
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Log($"Error: {ex.Message}");
|
||||||
|
MessageBox.Show($"Error: {ex.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Log(string msg)
|
||||||
|
{
|
||||||
|
if (txtLog.InvokeRequired)
|
||||||
|
{
|
||||||
|
txtLog.Invoke(new Action(() => Log(msg)));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
txtLog.AppendText($"[{DateTime.Now:HH:mm:ss}] {msg}\r\n");
|
||||||
|
txtLog.SelectionStart = txtLog.Text.Length;
|
||||||
|
txtLog.ScrollToCaret();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void fMain_Load(object sender, EventArgs e)
|
||||||
|
{
|
||||||
|
cmbLang.Items.AddRange(Languages.Available);
|
||||||
|
cmbLang.SelectedIndex = 0;
|
||||||
|
|
||||||
|
// Set default style path if exists
|
||||||
|
string defaultStyle = "assets/voice_styles/M1.json";
|
||||||
|
if (File.Exists(defaultStyle)) txtStylePath.Text = defaultStyle;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
61
HMI/SubProject/tts/fMain.resx
Normal file
61
HMI/SubProject/tts/fMain.resx
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<root>
|
||||||
|
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
|
||||||
|
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
|
||||||
|
<xsd:element name="root" msdata:IsDataSet="true">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:choice maxOccurs="unbounded">
|
||||||
|
<xsd:element name="metadata">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" use="required" type="xsd:string" />
|
||||||
|
<xsd:attribute name="type" type="xsd:string" />
|
||||||
|
<xsd:attribute name="mimetype" type="xsd:string" />
|
||||||
|
<xsd:attribute ref="xml:space" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="assembly">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:attribute name="name" type="xsd:string" />
|
||||||
|
<xsd:attribute name="alias" type="xsd:string" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="data">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||||
|
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" use="required" type="xsd:string" msdata:Ordinal="1" />
|
||||||
|
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
|
||||||
|
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
|
||||||
|
<xsd:attribute ref="xml:space" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="resheader">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" type="xsd:string" use="required" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:choice>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:schema>
|
||||||
|
<resheader name="resmimetype">
|
||||||
|
<value>text/microsoft-resx</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="version">
|
||||||
|
<value>2.0</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="reader">
|
||||||
|
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="writer">
|
||||||
|
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||||
|
</resheader>
|
||||||
|
</root>
|
||||||
Reference in New Issue
Block a user