initial commit
This commit is contained in:
10
ios/ExampleiOSApp/App.swift
Normal file
10
ios/ExampleiOSApp/App.swift
Normal file
@@ -0,0 +1,10 @@
|
||||
import SwiftUI
|
||||
|
||||
@main
|
||||
struct ExampleiOSApp: App {
|
||||
var body: some Scene {
|
||||
WindowGroup {
|
||||
ContentView()
|
||||
}
|
||||
}
|
||||
}
|
||||
30
ios/ExampleiOSApp/AudioPlayer.swift
Normal file
30
ios/ExampleiOSApp/AudioPlayer.swift
Normal file
@@ -0,0 +1,30 @@
|
||||
import Foundation
|
||||
import AVFoundation
|
||||
|
||||
final class AudioPlayer: NSObject, AVAudioPlayerDelegate {
|
||||
private var player: AVAudioPlayer?
|
||||
private var onFinish: (() -> Void)?
|
||||
|
||||
func play(url: URL, onFinish: (() -> Void)? = nil) {
|
||||
self.onFinish = onFinish
|
||||
do {
|
||||
let data = try Data(contentsOf: url)
|
||||
let player = try AVAudioPlayer(data: data)
|
||||
player.delegate = self
|
||||
player.prepareToPlay()
|
||||
player.play()
|
||||
self.player = player
|
||||
} catch {
|
||||
print("Audio play error: \(error)")
|
||||
}
|
||||
}
|
||||
|
||||
func stop() {
|
||||
player?.stop()
|
||||
player = nil
|
||||
}
|
||||
|
||||
func audioPlayerDidFinishPlaying(_ player: AVAudioPlayer, successfully flag: Bool) {
|
||||
onFinish?()
|
||||
}
|
||||
}
|
||||
99
ios/ExampleiOSApp/ContentView.swift
Normal file
99
ios/ExampleiOSApp/ContentView.swift
Normal file
@@ -0,0 +1,99 @@
|
||||
import SwiftUI
|
||||
|
||||
struct ContentView: View {
|
||||
@StateObject private var vm = TTSViewModel()
|
||||
|
||||
var body: some View {
|
||||
ZStack {
|
||||
LinearGradient(gradient: Gradient(colors: [Color(.systemBackground), Color(.secondarySystemBackground)]), startPoint: .topLeading, endPoint: .bottomTrailing)
|
||||
.ignoresSafeArea()
|
||||
|
||||
VStack(spacing: 20) {
|
||||
Spacer()
|
||||
|
||||
VStack(spacing: 12) {
|
||||
Text("Supertonic 2 iOS Demo")
|
||||
.font(.title2.weight(.semibold))
|
||||
.foregroundColor(.primary)
|
||||
|
||||
TextEditor(text: $vm.text)
|
||||
.frame(minHeight: 120, maxHeight: 180)
|
||||
.padding(8)
|
||||
.background(Color(.secondarySystemBackground))
|
||||
.cornerRadius(12)
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 12)
|
||||
.stroke(Color.secondary.opacity(0.3), lineWidth: 1)
|
||||
)
|
||||
.padding(.horizontal)
|
||||
|
||||
HStack(spacing: 12) {
|
||||
Text("NFE")
|
||||
.font(.subheadline)
|
||||
.foregroundColor(.secondary)
|
||||
Slider(value: $vm.nfe, in: 2...15, step: 1)
|
||||
Text("\(Int(vm.nfe))")
|
||||
.font(.subheadline.monospacedDigit())
|
||||
.frame(width: 36)
|
||||
}
|
||||
.padding(.horizontal)
|
||||
|
||||
Picker("Voice", selection: $vm.voice) {
|
||||
Text("M").tag(TTSService.Voice.male)
|
||||
Text("F").tag(TTSService.Voice.female)
|
||||
}
|
||||
.pickerStyle(SegmentedPickerStyle())
|
||||
.padding(.horizontal)
|
||||
|
||||
HStack(spacing: 12) {
|
||||
Text("Language")
|
||||
.font(.subheadline)
|
||||
.foregroundColor(.secondary)
|
||||
Picker("Language", selection: $vm.language) {
|
||||
ForEach(TTSService.Language.allCases, id: \.self) { lang in
|
||||
Text(lang.displayName).tag(lang)
|
||||
}
|
||||
}
|
||||
.pickerStyle(MenuPickerStyle())
|
||||
}
|
||||
.padding(.horizontal)
|
||||
}
|
||||
|
||||
HStack(spacing: 16) {
|
||||
Button(action: { vm.generate() }) {
|
||||
Label(vm.isGenerating ? "Generating..." : "Generate", systemImage: vm.isGenerating ? "hourglass" : "wand.and.stars"
|
||||
)
|
||||
.labelStyle(.titleAndIcon)
|
||||
}
|
||||
.buttonStyle(.borderedProminent)
|
||||
.tint(.accentColor)
|
||||
.disabled(vm.isGenerating)
|
||||
|
||||
Button(action: { vm.togglePlay() }) {
|
||||
Label(vm.isPlaying ? "Stop" : "Play", systemImage: vm.isPlaying ? "stop.fill" : "play.fill")
|
||||
}
|
||||
.buttonStyle(.bordered)
|
||||
.disabled(vm.audioURL == nil)
|
||||
}
|
||||
|
||||
if let rtf = vm.rtfText {
|
||||
Text(rtf)
|
||||
.font(.footnote.monospacedDigit())
|
||||
.foregroundColor(.secondary)
|
||||
.padding(.top, 2)
|
||||
}
|
||||
|
||||
if let error = vm.errorMessage {
|
||||
Text(error)
|
||||
.foregroundColor(.red)
|
||||
.font(.footnote)
|
||||
.multilineTextAlignment(.center)
|
||||
.padding(.horizontal)
|
||||
}
|
||||
|
||||
Spacer()
|
||||
}
|
||||
}
|
||||
.onAppear { vm.startup() }
|
||||
}
|
||||
}
|
||||
29
ios/ExampleiOSApp/Info.plist
Normal file
29
ios/ExampleiOSApp/Info.plist
Normal file
@@ -0,0 +1,29 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>en</string>
|
||||
<key>CFBundleExecutable</key>
|
||||
<string>$(EXECUTABLE_NAME)</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleName</key>
|
||||
<string>ExampleiOSApp</string>
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>APPL</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.0</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>1</string>
|
||||
<key>UILaunchScreen</key>
|
||||
<dict/>
|
||||
<key>UIApplicationSceneManifest</key>
|
||||
<dict>
|
||||
<key>UIApplicationSupportsMultipleScenes</key>
|
||||
<false/>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
||||
114
ios/ExampleiOSApp/TTSService.swift
Normal file
114
ios/ExampleiOSApp/TTSService.swift
Normal file
@@ -0,0 +1,114 @@
|
||||
import Foundation
|
||||
import OnnxRuntimeBindings
|
||||
|
||||
final class TTSService {
|
||||
enum Voice { case male, female }
|
||||
enum Language: String, CaseIterable {
|
||||
case en = "en"
|
||||
case ko = "ko"
|
||||
case es = "es"
|
||||
case pt = "pt"
|
||||
case fr = "fr"
|
||||
|
||||
var displayName: String {
|
||||
switch self {
|
||||
case .en: return "English"
|
||||
case .ko: return "한국어"
|
||||
case .es: return "Español"
|
||||
case .pt: return "Português"
|
||||
case .fr: return "Français"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private let env: ORTEnv
|
||||
private let textToSpeech: TextToSpeech
|
||||
private let bundleOnnxDir: String
|
||||
private let sampleRate: Int
|
||||
|
||||
init() throws {
|
||||
bundleOnnxDir = try Self.locateOnnxDirInBundle()
|
||||
env = try ORTEnv(loggingLevel: .warning)
|
||||
textToSpeech = try loadTextToSpeech(bundleOnnxDir, false, env)
|
||||
sampleRate = textToSpeech.sampleRate
|
||||
}
|
||||
|
||||
func synthesize(text: String, nfe: Int, voice: Voice, language: Language) async throws -> URL {
|
||||
// Load style for the selected voice
|
||||
let styleURL = try Self.locateVoiceStyleURL(voice: voice)
|
||||
let style = try loadVoiceStyle([styleURL.path], verbose: false)
|
||||
|
||||
// 2) Synthesize via packed TextToSpeech component
|
||||
let (wav, duration) = try textToSpeech.call(text, language.rawValue, style, nfe)
|
||||
let audioSeconds = Double(duration)
|
||||
let wavLenSample = min(Int(Double(sampleRate) * audioSeconds), wav.count)
|
||||
let wavOut = Array(wav[0..<wavLenSample])
|
||||
|
||||
let tmpURL = FileManager.default.temporaryDirectory.appendingPathComponent("supertonic_tts_\(UUID().uuidString).wav")
|
||||
try writeWavFile(tmpURL.path, wavOut, sampleRate)
|
||||
|
||||
return tmpURL
|
||||
}
|
||||
|
||||
// MARK: - Resource location helpers
|
||||
private static func locateOnnxDirInBundle() throws -> String {
|
||||
let bundle = Bundle.main
|
||||
let fm = FileManager.default
|
||||
|
||||
func dirHasRequiredFiles(_ dir: URL) -> Bool {
|
||||
let required = [
|
||||
"tts.json",
|
||||
"duration_predictor.onnx",
|
||||
"text_encoder.onnx",
|
||||
"vector_estimator.onnx",
|
||||
"vocoder.onnx"
|
||||
]
|
||||
return required.allSatisfy { fm.fileExists(atPath: dir.appendingPathComponent($0).path) }
|
||||
}
|
||||
|
||||
var candidates: [URL] = []
|
||||
if let dir = bundle.resourceURL?.appendingPathComponent("onnx", isDirectory: true) { candidates.append(dir) }
|
||||
if let dir = bundle.resourceURL?.appendingPathComponent("assets/onnx", isDirectory: true) { candidates.append(dir) }
|
||||
if let url = bundle.url(forResource: "tts", withExtension: "json", subdirectory: "onnx") { candidates.append(url.deletingLastPathComponent()) }
|
||||
if let url = bundle.url(forResource: "tts", withExtension: "json", subdirectory: "assets/onnx") { candidates.append(url.deletingLastPathComponent()) }
|
||||
if let url = bundle.url(forResource: "tts", withExtension: "json", subdirectory: nil) { candidates.append(url.deletingLastPathComponent()) }
|
||||
if let root = bundle.resourceURL { candidates.append(root) }
|
||||
|
||||
for dir in candidates {
|
||||
if dirHasRequiredFiles(dir) { return dir.path }
|
||||
}
|
||||
throw NSError(
|
||||
domain: "TTS",
|
||||
code: -100,
|
||||
userInfo: [NSLocalizedDescriptionKey: "Could not find the onnx directory in the bundle. Please make sure the onnx folder (as a folder reference) is included in Copy Bundle Resources in Xcode."]
|
||||
)
|
||||
}
|
||||
|
||||
private static func locateVoiceStyleURL(voice: Voice) throws -> URL {
|
||||
// Prefer M1/F1 defaults; search common subdirectories
|
||||
let fileName = (voice == .male) ? "M1" : "F1"
|
||||
let bundle = Bundle.main
|
||||
let candidates: [URL?] = [
|
||||
bundle.url(forResource: fileName, withExtension: "json", subdirectory: "voice_styles"),
|
||||
bundle.url(forResource: fileName, withExtension: "json", subdirectory: "assets/voice_styles"),
|
||||
bundle.url(forResource: fileName, withExtension: "json", subdirectory: nil)
|
||||
]
|
||||
for url in candidates {
|
||||
if let url = url { return url }
|
||||
}
|
||||
// Fallback: scan folders if needed
|
||||
if let folder1 = bundle.resourceURL?.appendingPathComponent("voice_styles", isDirectory: true) {
|
||||
let file = folder1.appendingPathComponent("\(fileName).json")
|
||||
if FileManager.default.fileExists(atPath: file.path) { return file }
|
||||
}
|
||||
if let folder2 = bundle.resourceURL?.appendingPathComponent("assets/voice_styles", isDirectory: true) {
|
||||
let file = folder2.appendingPathComponent("\(fileName).json")
|
||||
if FileManager.default.fileExists(atPath: file.path) { return file }
|
||||
}
|
||||
throw NSError(
|
||||
domain: "TTS",
|
||||
code: -102,
|
||||
userInfo: [NSLocalizedDescriptionKey: "Could not find the voice style JSON (\(fileName).json) in the bundle. Ensure voice_styles folder is included in Copy Bundle Resources."]
|
||||
)
|
||||
}
|
||||
}
|
||||
82
ios/ExampleiOSApp/TTSViewModel.swift
Normal file
82
ios/ExampleiOSApp/TTSViewModel.swift
Normal file
@@ -0,0 +1,82 @@
|
||||
import Foundation
|
||||
import AVFoundation
|
||||
|
||||
@MainActor
|
||||
final class TTSViewModel: ObservableObject {
|
||||
@Published var text: String = "This morning, I took a walk in the park, and the sound of the birds and the breeze was so pleasant that I stopped for a long time just to listen."
|
||||
@Published var nfe: Double = 5
|
||||
@Published var voice: TTSService.Voice = .male
|
||||
@Published var language: TTSService.Language = .en
|
||||
@Published var isGenerating: Bool = false
|
||||
@Published var isPlaying: Bool = false
|
||||
@Published var errorMessage: String?
|
||||
@Published var audioURL: URL?
|
||||
@Published var elapsedSeconds: Double?
|
||||
@Published var audioSeconds: Double?
|
||||
|
||||
private var service: TTSService?
|
||||
private var player = AudioPlayer()
|
||||
|
||||
var rtfText: String? {
|
||||
guard let e = elapsedSeconds, let a = audioSeconds, a > 0 else { return nil }
|
||||
return String(format: "RTF %.2fx · %.2fs / %.2fs", e / a, e, a)
|
||||
}
|
||||
|
||||
func startup() {
|
||||
do {
|
||||
service = try TTSService()
|
||||
} catch {
|
||||
errorMessage = "Failed to init TTS: \(error.localizedDescription)"
|
||||
}
|
||||
}
|
||||
|
||||
func generate() {
|
||||
guard let service = service else { return }
|
||||
isGenerating = true
|
||||
errorMessage = nil
|
||||
audioURL = nil
|
||||
elapsedSeconds = nil
|
||||
audioSeconds = nil
|
||||
Task {
|
||||
let tic = Date()
|
||||
do {
|
||||
let url = try await service.synthesize(text: text, nfe: Int(nfe), voice: voice, language: language)
|
||||
let elapsed = Date().timeIntervalSince(tic)
|
||||
let audio = audioDuration(at: url)
|
||||
await MainActor.run {
|
||||
self.audioURL = url
|
||||
self.elapsedSeconds = elapsed
|
||||
self.audioSeconds = audio
|
||||
self.isGenerating = false
|
||||
self.play(url: url)
|
||||
}
|
||||
} catch {
|
||||
await MainActor.run {
|
||||
self.errorMessage = error.localizedDescription
|
||||
self.isGenerating = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func togglePlay() {
|
||||
if isPlaying {
|
||||
player.stop()
|
||||
isPlaying = false
|
||||
} else if let url = audioURL {
|
||||
play(url: url)
|
||||
}
|
||||
}
|
||||
|
||||
private func play(url: URL) {
|
||||
player.play(url: url) { [weak self] in
|
||||
DispatchQueue.main.async { self?.isPlaying = false }
|
||||
}
|
||||
isPlaying = true
|
||||
}
|
||||
|
||||
private func audioDuration(at url: URL) -> Double? {
|
||||
guard let file = try? AVAudioFile(forReading: url) else { return nil }
|
||||
return Double(file.length) / file.fileFormat.sampleRate
|
||||
}
|
||||
}
|
||||
29
ios/ExampleiOSApp/project.yml
Normal file
29
ios/ExampleiOSApp/project.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
name: ExampleiOSApp
|
||||
options:
|
||||
minimumXcodeGenVersion: 2.37.0
|
||||
packages:
|
||||
onnxruntime:
|
||||
url: https://github.com/microsoft/onnxruntime-swift-package-manager.git
|
||||
from: 1.16.0
|
||||
targets:
|
||||
ExampleiOSApp:
|
||||
type: application
|
||||
platform: iOS
|
||||
deploymentTarget: "15.0"
|
||||
sources:
|
||||
- path: .
|
||||
- path: ../../swift/Sources/Helper.swift
|
||||
type: file
|
||||
resources:
|
||||
- path: onnx
|
||||
type: folder
|
||||
- path: audio
|
||||
type: folder
|
||||
settings:
|
||||
base:
|
||||
PRODUCT_BUNDLE_IDENTIFIER: com.supertonic.ExampleiOSApp
|
||||
SWIFT_VERSION: 5.9
|
||||
INFOPLIST_FILE: Info.plist
|
||||
dependencies:
|
||||
- package: onnxruntime
|
||||
product: onnxruntime
|
||||
Reference in New Issue
Block a user