initial commit
This commit is contained in:
10
ios/ExampleiOSApp/App.swift
Normal file
10
ios/ExampleiOSApp/App.swift
Normal file
@@ -0,0 +1,10 @@
|
||||
import SwiftUI
|
||||
|
||||
@main
|
||||
struct ExampleiOSApp: App {
|
||||
var body: some Scene {
|
||||
WindowGroup {
|
||||
ContentView()
|
||||
}
|
||||
}
|
||||
}
|
||||
30
ios/ExampleiOSApp/AudioPlayer.swift
Normal file
30
ios/ExampleiOSApp/AudioPlayer.swift
Normal file
@@ -0,0 +1,30 @@
|
||||
import Foundation
|
||||
import AVFoundation
|
||||
|
||||
final class AudioPlayer: NSObject, AVAudioPlayerDelegate {
|
||||
private var player: AVAudioPlayer?
|
||||
private var onFinish: (() -> Void)?
|
||||
|
||||
func play(url: URL, onFinish: (() -> Void)? = nil) {
|
||||
self.onFinish = onFinish
|
||||
do {
|
||||
let data = try Data(contentsOf: url)
|
||||
let player = try AVAudioPlayer(data: data)
|
||||
player.delegate = self
|
||||
player.prepareToPlay()
|
||||
player.play()
|
||||
self.player = player
|
||||
} catch {
|
||||
print("Audio play error: \(error)")
|
||||
}
|
||||
}
|
||||
|
||||
func stop() {
|
||||
player?.stop()
|
||||
player = nil
|
||||
}
|
||||
|
||||
func audioPlayerDidFinishPlaying(_ player: AVAudioPlayer, successfully flag: Bool) {
|
||||
onFinish?()
|
||||
}
|
||||
}
|
||||
99
ios/ExampleiOSApp/ContentView.swift
Normal file
99
ios/ExampleiOSApp/ContentView.swift
Normal file
@@ -0,0 +1,99 @@
|
||||
import SwiftUI
|
||||
|
||||
struct ContentView: View {
|
||||
@StateObject private var vm = TTSViewModel()
|
||||
|
||||
var body: some View {
|
||||
ZStack {
|
||||
LinearGradient(gradient: Gradient(colors: [Color(.systemBackground), Color(.secondarySystemBackground)]), startPoint: .topLeading, endPoint: .bottomTrailing)
|
||||
.ignoresSafeArea()
|
||||
|
||||
VStack(spacing: 20) {
|
||||
Spacer()
|
||||
|
||||
VStack(spacing: 12) {
|
||||
Text("Supertonic 2 iOS Demo")
|
||||
.font(.title2.weight(.semibold))
|
||||
.foregroundColor(.primary)
|
||||
|
||||
TextEditor(text: $vm.text)
|
||||
.frame(minHeight: 120, maxHeight: 180)
|
||||
.padding(8)
|
||||
.background(Color(.secondarySystemBackground))
|
||||
.cornerRadius(12)
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 12)
|
||||
.stroke(Color.secondary.opacity(0.3), lineWidth: 1)
|
||||
)
|
||||
.padding(.horizontal)
|
||||
|
||||
HStack(spacing: 12) {
|
||||
Text("NFE")
|
||||
.font(.subheadline)
|
||||
.foregroundColor(.secondary)
|
||||
Slider(value: $vm.nfe, in: 2...15, step: 1)
|
||||
Text("\(Int(vm.nfe))")
|
||||
.font(.subheadline.monospacedDigit())
|
||||
.frame(width: 36)
|
||||
}
|
||||
.padding(.horizontal)
|
||||
|
||||
Picker("Voice", selection: $vm.voice) {
|
||||
Text("M").tag(TTSService.Voice.male)
|
||||
Text("F").tag(TTSService.Voice.female)
|
||||
}
|
||||
.pickerStyle(SegmentedPickerStyle())
|
||||
.padding(.horizontal)
|
||||
|
||||
HStack(spacing: 12) {
|
||||
Text("Language")
|
||||
.font(.subheadline)
|
||||
.foregroundColor(.secondary)
|
||||
Picker("Language", selection: $vm.language) {
|
||||
ForEach(TTSService.Language.allCases, id: \.self) { lang in
|
||||
Text(lang.displayName).tag(lang)
|
||||
}
|
||||
}
|
||||
.pickerStyle(MenuPickerStyle())
|
||||
}
|
||||
.padding(.horizontal)
|
||||
}
|
||||
|
||||
HStack(spacing: 16) {
|
||||
Button(action: { vm.generate() }) {
|
||||
Label(vm.isGenerating ? "Generating..." : "Generate", systemImage: vm.isGenerating ? "hourglass" : "wand.and.stars"
|
||||
)
|
||||
.labelStyle(.titleAndIcon)
|
||||
}
|
||||
.buttonStyle(.borderedProminent)
|
||||
.tint(.accentColor)
|
||||
.disabled(vm.isGenerating)
|
||||
|
||||
Button(action: { vm.togglePlay() }) {
|
||||
Label(vm.isPlaying ? "Stop" : "Play", systemImage: vm.isPlaying ? "stop.fill" : "play.fill")
|
||||
}
|
||||
.buttonStyle(.bordered)
|
||||
.disabled(vm.audioURL == nil)
|
||||
}
|
||||
|
||||
if let rtf = vm.rtfText {
|
||||
Text(rtf)
|
||||
.font(.footnote.monospacedDigit())
|
||||
.foregroundColor(.secondary)
|
||||
.padding(.top, 2)
|
||||
}
|
||||
|
||||
if let error = vm.errorMessage {
|
||||
Text(error)
|
||||
.foregroundColor(.red)
|
||||
.font(.footnote)
|
||||
.multilineTextAlignment(.center)
|
||||
.padding(.horizontal)
|
||||
}
|
||||
|
||||
Spacer()
|
||||
}
|
||||
}
|
||||
.onAppear { vm.startup() }
|
||||
}
|
||||
}
|
||||
29
ios/ExampleiOSApp/Info.plist
Normal file
29
ios/ExampleiOSApp/Info.plist
Normal file
@@ -0,0 +1,29 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>en</string>
|
||||
<key>CFBundleExecutable</key>
|
||||
<string>$(EXECUTABLE_NAME)</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleName</key>
|
||||
<string>ExampleiOSApp</string>
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>APPL</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.0</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>1</string>
|
||||
<key>UILaunchScreen</key>
|
||||
<dict/>
|
||||
<key>UIApplicationSceneManifest</key>
|
||||
<dict>
|
||||
<key>UIApplicationSupportsMultipleScenes</key>
|
||||
<false/>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
||||
114
ios/ExampleiOSApp/TTSService.swift
Normal file
114
ios/ExampleiOSApp/TTSService.swift
Normal file
@@ -0,0 +1,114 @@
|
||||
import Foundation
|
||||
import OnnxRuntimeBindings
|
||||
|
||||
final class TTSService {
|
||||
enum Voice { case male, female }
|
||||
enum Language: String, CaseIterable {
|
||||
case en = "en"
|
||||
case ko = "ko"
|
||||
case es = "es"
|
||||
case pt = "pt"
|
||||
case fr = "fr"
|
||||
|
||||
var displayName: String {
|
||||
switch self {
|
||||
case .en: return "English"
|
||||
case .ko: return "한국어"
|
||||
case .es: return "Español"
|
||||
case .pt: return "Português"
|
||||
case .fr: return "Français"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private let env: ORTEnv
|
||||
private let textToSpeech: TextToSpeech
|
||||
private let bundleOnnxDir: String
|
||||
private let sampleRate: Int
|
||||
|
||||
init() throws {
|
||||
bundleOnnxDir = try Self.locateOnnxDirInBundle()
|
||||
env = try ORTEnv(loggingLevel: .warning)
|
||||
textToSpeech = try loadTextToSpeech(bundleOnnxDir, false, env)
|
||||
sampleRate = textToSpeech.sampleRate
|
||||
}
|
||||
|
||||
func synthesize(text: String, nfe: Int, voice: Voice, language: Language) async throws -> URL {
|
||||
// Load style for the selected voice
|
||||
let styleURL = try Self.locateVoiceStyleURL(voice: voice)
|
||||
let style = try loadVoiceStyle([styleURL.path], verbose: false)
|
||||
|
||||
// 2) Synthesize via packed TextToSpeech component
|
||||
let (wav, duration) = try textToSpeech.call(text, language.rawValue, style, nfe)
|
||||
let audioSeconds = Double(duration)
|
||||
let wavLenSample = min(Int(Double(sampleRate) * audioSeconds), wav.count)
|
||||
let wavOut = Array(wav[0..<wavLenSample])
|
||||
|
||||
let tmpURL = FileManager.default.temporaryDirectory.appendingPathComponent("supertonic_tts_\(UUID().uuidString).wav")
|
||||
try writeWavFile(tmpURL.path, wavOut, sampleRate)
|
||||
|
||||
return tmpURL
|
||||
}
|
||||
|
||||
// MARK: - Resource location helpers
|
||||
private static func locateOnnxDirInBundle() throws -> String {
|
||||
let bundle = Bundle.main
|
||||
let fm = FileManager.default
|
||||
|
||||
func dirHasRequiredFiles(_ dir: URL) -> Bool {
|
||||
let required = [
|
||||
"tts.json",
|
||||
"duration_predictor.onnx",
|
||||
"text_encoder.onnx",
|
||||
"vector_estimator.onnx",
|
||||
"vocoder.onnx"
|
||||
]
|
||||
return required.allSatisfy { fm.fileExists(atPath: dir.appendingPathComponent($0).path) }
|
||||
}
|
||||
|
||||
var candidates: [URL] = []
|
||||
if let dir = bundle.resourceURL?.appendingPathComponent("onnx", isDirectory: true) { candidates.append(dir) }
|
||||
if let dir = bundle.resourceURL?.appendingPathComponent("assets/onnx", isDirectory: true) { candidates.append(dir) }
|
||||
if let url = bundle.url(forResource: "tts", withExtension: "json", subdirectory: "onnx") { candidates.append(url.deletingLastPathComponent()) }
|
||||
if let url = bundle.url(forResource: "tts", withExtension: "json", subdirectory: "assets/onnx") { candidates.append(url.deletingLastPathComponent()) }
|
||||
if let url = bundle.url(forResource: "tts", withExtension: "json", subdirectory: nil) { candidates.append(url.deletingLastPathComponent()) }
|
||||
if let root = bundle.resourceURL { candidates.append(root) }
|
||||
|
||||
for dir in candidates {
|
||||
if dirHasRequiredFiles(dir) { return dir.path }
|
||||
}
|
||||
throw NSError(
|
||||
domain: "TTS",
|
||||
code: -100,
|
||||
userInfo: [NSLocalizedDescriptionKey: "Could not find the onnx directory in the bundle. Please make sure the onnx folder (as a folder reference) is included in Copy Bundle Resources in Xcode."]
|
||||
)
|
||||
}
|
||||
|
||||
private static func locateVoiceStyleURL(voice: Voice) throws -> URL {
|
||||
// Prefer M1/F1 defaults; search common subdirectories
|
||||
let fileName = (voice == .male) ? "M1" : "F1"
|
||||
let bundle = Bundle.main
|
||||
let candidates: [URL?] = [
|
||||
bundle.url(forResource: fileName, withExtension: "json", subdirectory: "voice_styles"),
|
||||
bundle.url(forResource: fileName, withExtension: "json", subdirectory: "assets/voice_styles"),
|
||||
bundle.url(forResource: fileName, withExtension: "json", subdirectory: nil)
|
||||
]
|
||||
for url in candidates {
|
||||
if let url = url { return url }
|
||||
}
|
||||
// Fallback: scan folders if needed
|
||||
if let folder1 = bundle.resourceURL?.appendingPathComponent("voice_styles", isDirectory: true) {
|
||||
let file = folder1.appendingPathComponent("\(fileName).json")
|
||||
if FileManager.default.fileExists(atPath: file.path) { return file }
|
||||
}
|
||||
if let folder2 = bundle.resourceURL?.appendingPathComponent("assets/voice_styles", isDirectory: true) {
|
||||
let file = folder2.appendingPathComponent("\(fileName).json")
|
||||
if FileManager.default.fileExists(atPath: file.path) { return file }
|
||||
}
|
||||
throw NSError(
|
||||
domain: "TTS",
|
||||
code: -102,
|
||||
userInfo: [NSLocalizedDescriptionKey: "Could not find the voice style JSON (\(fileName).json) in the bundle. Ensure voice_styles folder is included in Copy Bundle Resources."]
|
||||
)
|
||||
}
|
||||
}
|
||||
82
ios/ExampleiOSApp/TTSViewModel.swift
Normal file
82
ios/ExampleiOSApp/TTSViewModel.swift
Normal file
@@ -0,0 +1,82 @@
|
||||
import Foundation
|
||||
import AVFoundation
|
||||
|
||||
@MainActor
|
||||
final class TTSViewModel: ObservableObject {
|
||||
@Published var text: String = "This morning, I took a walk in the park, and the sound of the birds and the breeze was so pleasant that I stopped for a long time just to listen."
|
||||
@Published var nfe: Double = 5
|
||||
@Published var voice: TTSService.Voice = .male
|
||||
@Published var language: TTSService.Language = .en
|
||||
@Published var isGenerating: Bool = false
|
||||
@Published var isPlaying: Bool = false
|
||||
@Published var errorMessage: String?
|
||||
@Published var audioURL: URL?
|
||||
@Published var elapsedSeconds: Double?
|
||||
@Published var audioSeconds: Double?
|
||||
|
||||
private var service: TTSService?
|
||||
private var player = AudioPlayer()
|
||||
|
||||
var rtfText: String? {
|
||||
guard let e = elapsedSeconds, let a = audioSeconds, a > 0 else { return nil }
|
||||
return String(format: "RTF %.2fx · %.2fs / %.2fs", e / a, e, a)
|
||||
}
|
||||
|
||||
func startup() {
|
||||
do {
|
||||
service = try TTSService()
|
||||
} catch {
|
||||
errorMessage = "Failed to init TTS: \(error.localizedDescription)"
|
||||
}
|
||||
}
|
||||
|
||||
func generate() {
|
||||
guard let service = service else { return }
|
||||
isGenerating = true
|
||||
errorMessage = nil
|
||||
audioURL = nil
|
||||
elapsedSeconds = nil
|
||||
audioSeconds = nil
|
||||
Task {
|
||||
let tic = Date()
|
||||
do {
|
||||
let url = try await service.synthesize(text: text, nfe: Int(nfe), voice: voice, language: language)
|
||||
let elapsed = Date().timeIntervalSince(tic)
|
||||
let audio = audioDuration(at: url)
|
||||
await MainActor.run {
|
||||
self.audioURL = url
|
||||
self.elapsedSeconds = elapsed
|
||||
self.audioSeconds = audio
|
||||
self.isGenerating = false
|
||||
self.play(url: url)
|
||||
}
|
||||
} catch {
|
||||
await MainActor.run {
|
||||
self.errorMessage = error.localizedDescription
|
||||
self.isGenerating = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func togglePlay() {
|
||||
if isPlaying {
|
||||
player.stop()
|
||||
isPlaying = false
|
||||
} else if let url = audioURL {
|
||||
play(url: url)
|
||||
}
|
||||
}
|
||||
|
||||
private func play(url: URL) {
|
||||
player.play(url: url) { [weak self] in
|
||||
DispatchQueue.main.async { self?.isPlaying = false }
|
||||
}
|
||||
isPlaying = true
|
||||
}
|
||||
|
||||
private func audioDuration(at url: URL) -> Double? {
|
||||
guard let file = try? AVAudioFile(forReading: url) else { return nil }
|
||||
return Double(file.length) / file.fileFormat.sampleRate
|
||||
}
|
||||
}
|
||||
29
ios/ExampleiOSApp/project.yml
Normal file
29
ios/ExampleiOSApp/project.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
name: ExampleiOSApp
|
||||
options:
|
||||
minimumXcodeGenVersion: 2.37.0
|
||||
packages:
|
||||
onnxruntime:
|
||||
url: https://github.com/microsoft/onnxruntime-swift-package-manager.git
|
||||
from: 1.16.0
|
||||
targets:
|
||||
ExampleiOSApp:
|
||||
type: application
|
||||
platform: iOS
|
||||
deploymentTarget: "15.0"
|
||||
sources:
|
||||
- path: .
|
||||
- path: ../../swift/Sources/Helper.swift
|
||||
type: file
|
||||
resources:
|
||||
- path: onnx
|
||||
type: folder
|
||||
- path: audio
|
||||
type: folder
|
||||
settings:
|
||||
base:
|
||||
PRODUCT_BUNDLE_IDENTIFIER: com.supertonic.ExampleiOSApp
|
||||
SWIFT_VERSION: 5.9
|
||||
INFOPLIST_FILE: Info.plist
|
||||
dependencies:
|
||||
- package: onnxruntime
|
||||
product: onnxruntime
|
||||
78
ios/README.md
Normal file
78
ios/README.md
Normal file
@@ -0,0 +1,78 @@
|
||||
# Supertonic iOS Example App
|
||||
|
||||
A minimal iOS demo that runs Supertonic 2 (ONNX Runtime) on-device. The app shows:
|
||||
- Multiline text input
|
||||
- NFE (denoising steps) slider
|
||||
- Voice toggle (M/F)
|
||||
- Language selector (en, ko, es, pt, fr)
|
||||
- Generate & Play buttons
|
||||
- RTF display (Elapsed / Audio seconds)
|
||||
|
||||
All ONNX models/configs are reused from `Supertonic/assets/onnx`, and voice style JSON files from `Supertonic/assets/voice_styles`.
|
||||
|
||||
## 📰 Update News
|
||||
|
||||
**2026.01.06** - 🎉 **Supertonic 2** released with multilingual support! Now supports English (`en`), Korean (`ko`), Spanish (`es`), Portuguese (`pt`), and French (`fr`). [Demo](https://huggingface.co/spaces/Supertone/supertonic-2) | [Models](https://huggingface.co/Supertone/supertonic-2)
|
||||
|
||||
**2025.12.10** - Added [6 new voice styles](https://huggingface.co/Supertone/supertonic/tree/b10dbaf18b316159be75b34d24f740008fddd381) (M3, M4, M5, F3, F4, F5). See [Voices](https://supertone-inc.github.io/supertonic-py/voices/) for details
|
||||
|
||||
**2025.12.08** - Optimized ONNX models via [OnnxSlim](https://github.com/inisis/OnnxSlim) now available on [Hugging Face Models](https://huggingface.co/Supertone/supertonic)
|
||||
|
||||
## Prerequisites
|
||||
- macOS 13+, Xcode 15+
|
||||
- Swift 5.9+
|
||||
- iOS 15+ device (recommended)
|
||||
- Homebrew, XcodeGen
|
||||
|
||||
Install tools (if needed):
|
||||
```bash
|
||||
brew install xcodegen
|
||||
```
|
||||
|
||||
## Quick Start (zero-click in Xcode)
|
||||
0) Prepare assets next to the iOS target (one-time)
|
||||
```bash
|
||||
cd ios/ExampleiOSApp
|
||||
mkdir -p onnx voice_styles
|
||||
rsync -a ../../assets/onnx/ onnx/
|
||||
rsync -a ../../assets/voice_styles/ voice_styles/
|
||||
```
|
||||
|
||||
1) Generate the Xcode project with XcodeGen
|
||||
```bash
|
||||
xcodegen generate
|
||||
open ExampleiOSApp.xcodeproj
|
||||
```
|
||||
|
||||
2) Open in Xcode and select your iPhone as the run destination
|
||||
- Targets → ExampleiOSApp → Signing & Capabilities: Select your Team
|
||||
- iOS Deployment Target: 15.0+
|
||||
|
||||
3) Build & Run on device
|
||||
- Type text → adjust NFE/Voice → Tap Generate → Audio plays automatically
|
||||
- An RTF line shows like: `RTF 0.30x · 3.04s / 10.11s`
|
||||
|
||||
## What's included (generated project)
|
||||
- SwiftUI app files: `App.swift`, `ContentView.swift`, `TTSViewModel.swift`, `AudioPlayer.swift`
|
||||
- Runtime wrapper: `TTSService.swift` (includes TTS inference logic)
|
||||
- Resources (local, vendored in `ios/ExampleiOSApp/onnx` and `ios/ExampleiOSApp/voice_styles` after step 0)
|
||||
|
||||
These references are defined in `project.yml` and added to the app bundle by XcodeGen.
|
||||
|
||||
## App Controls
|
||||
- **Text**: Multiline `TextEditor`
|
||||
- **NFE**: Denoising steps (default 5)
|
||||
- **Voice**: M/F voice style selector
|
||||
- **Language**: Language selector (English, 한국어, Español, Português, Français)
|
||||
- **Generate**: Runs end-to-end synthesis
|
||||
- **Play/Stop**: Controls playback of the last output
|
||||
- **RTF**: Shows Elapsed / Audio seconds for quick performance intuition
|
||||
|
||||
## Multilingual Support
|
||||
|
||||
Supertonic 2 supports multiple languages. Select the appropriate language for your input text:
|
||||
- **English (en)**: Default language
|
||||
- **한국어 (ko)**: Korean
|
||||
- **Español (es)**: Spanish
|
||||
- **Português (pt)**: Portuguese
|
||||
- **Français (fr)**: French
|
||||
Reference in New Issue
Block a user