Skip to content

Commit ea0e73f

Browse files
committed
feat(cli): add --model and --voices options
- [feat] Add `--model` and `--voices` to `valid_options` set (kokoro_tts/__init__.py:get_valid_options():1239-1240) - [feat] Modify `check_required_files` to accept `model_path` and `voices_path` parameters, updating `required_files` keys and adding a print statement (__init__.py:29-45) - [feat] Update `print_usage` to include new command-line options and examples (__init__.py:165-183) - [refactor] Update `print_supported_languages`, `print_supported_voices`, and `convert_text_to_audio` signatures to accept `model_path` and `voices_path` and pass them to `check_required_files` and `Kokoro` constructor (__init__.py:185-189, 200-204, 805-817) - [refactor] Refactor `main` to update argument skipping logic, parse `--model` and `--voices` values, initialize default paths, and pass paths to `convert_text_to_audio` (__init__.py:1262-1351)
1 parent f5f7eb5 commit ea0e73f

File tree

3 files changed

+70
-34
lines changed

3 files changed

+70
-34
lines changed

kokoro_tts/__init__.py

Lines changed: 68 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -29,26 +29,27 @@
2929
stop_spinner = False
3030
stop_audio = False
3131

32-
def check_required_files():
32+
def check_required_files(model_path="kokoro-v1.0.onnx", voices_path="voices-v1.0.bin"):
3333
"""Check if required model files exist and provide helpful error messages."""
3434
required_files = {
35-
"kokoro-v1.0.onnx": "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/kokoro-v1.0.onnx",
36-
"voices-v1.0.bin": "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/voices-v1.0.bin"
35+
model_path: "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/kokoro-v1.0.onnx",
36+
voices_path: "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/voices-v1.0.bin"
3737
}
3838

3939
missing_files = []
40-
for filename, download_url in required_files.items():
41-
if not os.path.exists(filename):
42-
missing_files.append((filename, download_url))
40+
for filepath, download_url in required_files.items():
41+
if not os.path.exists(filepath):
42+
missing_files.append((filepath, download_url))
4343

4444
if missing_files:
4545
print("Error: Required model files are missing:")
46-
for filename, download_url in missing_files:
47-
print(f" • {filename}")
46+
for filepath, download_url in missing_files:
47+
print(f" • {filepath}")
4848
print("\nYou can download the missing files using these commands:")
49-
for filename, download_url in missing_files:
49+
for filepath, download_url in missing_files:
5050
print(f" wget {download_url}")
5151
print(f"\nPlace the downloaded files in the same directory where you run the `kokoro-tts` command.")
52+
print(f"Or specify custom paths using --model and --voices options.")
5253
sys.exit(1)
5354

5455
def spinning_wheel(message="Processing...", progress=None):
@@ -162,6 +163,8 @@ def print_usage():
162163
--split-output <dir> Save each chunk as separate file in directory
163164
--format <str> Audio format: wav or mp3 (default: wav)
164165
--debug Show detailed debug information
166+
--model <path> Path to kokoro-v1.0.onnx model file (default: ./kokoro-v1.0.onnx)
167+
--voices <path> Path to voices-v1.0.bin file (default: ./voices-v1.0.bin)
165168
166169
Input formats:
167170
.txt Text file input
@@ -180,13 +183,15 @@ def print_usage():
180183
kokoro-tts --help-voices
181184
kokoro-tts --help-languages
182185
kokoro-tts input.epub --split-output ./chunks/ --debug
186+
kokoro-tts input.txt output.wav --model /path/to/model.onnx --voices /path/to/voices.bin
187+
kokoro-tts input.txt --model ./models/kokoro-v1.0.onnx --voices ./models/voices-v1.0.bin
183188
""")
184189

185-
def print_supported_languages():
190+
def print_supported_languages(model_path="kokoro-v1.0.onnx", voices_path="voices-v1.0.bin"):
186191
"""Print all supported languages from Kokoro."""
187-
check_required_files()
192+
check_required_files(model_path, voices_path)
188193
try:
189-
kokoro = Kokoro("kokoro-v1.0.onnx", "voices-v1.0.bin")
194+
kokoro = Kokoro(model_path, voices_path)
190195
languages = sorted(kokoro.get_languages())
191196
print("\nSupported languages:")
192197
for lang in languages:
@@ -196,11 +201,11 @@ def print_supported_languages():
196201
print(f"Error loading model to get supported languages: {e}")
197202
sys.exit(1)
198203

199-
def print_supported_voices():
204+
def print_supported_voices(model_path="kokoro-v1.0.onnx", voices_path="voices-v1.0.bin"):
200205
"""Print all supported voices from Kokoro."""
201-
check_required_files()
206+
check_required_files(model_path, voices_path)
202207
try:
203-
kokoro = Kokoro("kokoro-v1.0.onnx", "voices-v1.0.bin")
208+
kokoro = Kokoro(model_path, voices_path)
204209
voices = sorted(kokoro.get_voices())
205210
print("\nSupported voices:")
206211
for idx, voice in enumerate(voices):
@@ -803,19 +808,20 @@ def process_chunk_sequential(chunk: str, kokoro: Kokoro, voice: str, speed: floa
803808
return None, None
804809

805810
def convert_text_to_audio(input_file, output_file=None, voice=None, speed=1.0, lang="en-us",
806-
stream=False, split_output=None, format="wav", debug=False, stdin_indicators=None):
811+
stream=False, split_output=None, format="wav", debug=False, stdin_indicators=None,
812+
model_path="kokoro-v1.0.onnx", voices_path="voices-v1.0.bin"):
807813
global stop_spinner
808814

809815
# Define stdin indicators if not provided
810816
if stdin_indicators is None:
811817
stdin_indicators = ['/dev/stdin', '-', 'CONIN$'] # CONIN$ is Windows stdin
812818

813819
# Check for required files first
814-
check_required_files()
820+
check_required_files(model_path, voices_path)
815821

816822
# Load Kokoro model
817823
try:
818-
kokoro = Kokoro("kokoro-v1.0.onnx", "voices-v1.0.bin")
824+
kokoro = Kokoro(model_path, voices_path)
819825

820826
# Validate language after loading model
821827
lang = validate_language(lang, kokoro)
@@ -1232,7 +1238,9 @@ def get_valid_options():
12321238
'--voice',
12331239
'--split-output',
12341240
'--format',
1235-
'--debug' # Add debug option
1241+
'--debug',
1242+
'--model',
1243+
'--voices'
12361244
}
12371245

12381246

@@ -1254,7 +1262,7 @@ def main():
12541262
if arg.startswith('--') and arg not in valid_options:
12551263
unknown_options.append(arg)
12561264
# Skip the next argument if it's a value for an option that takes parameters
1257-
elif arg in {'--speed', '--lang', '--voice', '--split-output', '--format'}:
1265+
elif arg in {'--speed', '--lang', '--voice', '--split-output', '--format', '--model', '--voices'}:
12581266
i += 1
12591267
i += 1
12601268

@@ -1271,17 +1279,38 @@ def main():
12711279
print_usage() # Show the full help text
12721280
sys.exit(1)
12731281

1274-
# Handle help commands first
1275-
if len(sys.argv) == 2:
1276-
if sys.argv[1] in ['-h', '--help']:
1277-
print_usage()
1278-
sys.exit(0)
1279-
elif sys.argv[1] == '--help-languages':
1280-
print_supported_languages()
1281-
sys.exit(0)
1282-
elif sys.argv[1] == '--help-voices':
1283-
print_supported_voices()
1284-
sys.exit(0)
1282+
# Handle help commands first (before argument parsing)
1283+
if '--help' in sys.argv or '-h' in sys.argv:
1284+
print_usage()
1285+
sys.exit(0)
1286+
elif '--help-languages' in sys.argv:
1287+
# For help commands, we need to parse model/voices paths first
1288+
model_path = "kokoro-v1.0.onnx" # default model path
1289+
voices_path = "voices-v1.0.bin" # default voices path
1290+
1291+
# Parse model/voices paths for help commands
1292+
for i, arg in enumerate(sys.argv):
1293+
if arg == '--model' and i + 1 < len(sys.argv):
1294+
model_path = sys.argv[i + 1]
1295+
elif arg == '--voices' and i + 1 < len(sys.argv):
1296+
voices_path = sys.argv[i + 1]
1297+
1298+
print_supported_languages(model_path, voices_path)
1299+
sys.exit(0)
1300+
elif '--help-voices' in sys.argv:
1301+
# For help commands, we need to parse model/voices paths first
1302+
model_path = "kokoro-v1.0.onnx" # default model path
1303+
voices_path = "voices-v1.0.bin" # default voices path
1304+
1305+
# Parse model/voices paths for help commands
1306+
for i, arg in enumerate(sys.argv):
1307+
if arg == '--model' and i + 1 < len(sys.argv):
1308+
model_path = sys.argv[i + 1]
1309+
elif arg == '--voices' and i + 1 < len(sys.argv):
1310+
voices_path = sys.argv[i + 1]
1311+
1312+
print_supported_voices(model_path, voices_path)
1313+
sys.exit(0)
12851314

12861315
# Parse arguments
12871316
input_file = None
@@ -1298,6 +1327,8 @@ def main():
12981327
split_output = None
12991328
format = "wav" # default format
13001329
merge_chunks = '--merge-chunks' in sys.argv
1330+
model_path = "kokoro-v1.0.onnx" # default model path
1331+
voices_path = "voices-v1.0.bin" # default voices path
13011332

13021333
# Parse optional arguments
13031334
for i, arg in enumerate(sys.argv):
@@ -1318,6 +1349,10 @@ def main():
13181349
if format not in ['wav', 'mp3']:
13191350
print("Error: Format must be either 'wav' or 'mp3'")
13201351
sys.exit(1)
1352+
elif arg == '--model' and i + 1 < len(sys.argv):
1353+
model_path = sys.argv[i + 1]
1354+
elif arg == '--voices' and i + 1 < len(sys.argv):
1355+
voices_path = sys.argv[i + 1]
13211356

13221357
# Handle merge chunks operation
13231358
if merge_chunks:
@@ -1349,7 +1384,8 @@ def main():
13491384
# Convert text to audio with debug flag
13501385
convert_text_to_audio(input_file, output_file, voice=voice, stream=stream,
13511386
speed=speed, lang=lang, split_output=split_output,
1352-
format=format, debug=debug, stdin_indicators=stdin_indicators)
1387+
format=format, debug=debug, stdin_indicators=stdin_indicators,
1388+
model_path=model_path, voices_path=voices_path)
13531389

13541390

13551391
if __name__ == '__main__':

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "kokoro-tts"
3-
version = "2.2.2"
3+
version = "2.3.0"
44
description = "A CLI text-to-speech tool using the Kokoro model, supporting multiple languages, voices (with blending), and various input formats including EPUB books and PDF documents."
55
readme = "README.md"
66
requires-python = ">=3.9, <3.13"

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)