29
29
stop_spinner = False
30
30
stop_audio = False
31
31
32
- def check_required_files ():
32
+ def check_required_files (model_path = "kokoro-v1.0.onnx" , voices_path = "voices-v1.0.bin" ):
33
33
"""Check if required model files exist and provide helpful error messages."""
34
34
required_files = {
35
- "kokoro-v1.0.onnx" : "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/kokoro-v1.0.onnx" ,
36
- "voices-v1.0.bin" : "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/voices-v1.0.bin"
35
+ model_path : "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/kokoro-v1.0.onnx" ,
36
+ voices_path : "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/voices-v1.0.bin"
37
37
}
38
38
39
39
missing_files = []
40
- for filename , download_url in required_files .items ():
41
- if not os .path .exists (filename ):
42
- missing_files .append ((filename , download_url ))
40
+ for filepath , download_url in required_files .items ():
41
+ if not os .path .exists (filepath ):
42
+ missing_files .append ((filepath , download_url ))
43
43
44
44
if missing_files :
45
45
print ("Error: Required model files are missing:" )
46
- for filename , download_url in missing_files :
47
- print (f" • { filename } " )
46
+ for filepath , download_url in missing_files :
47
+ print (f" • { filepath } " )
48
48
print ("\n You can download the missing files using these commands:" )
49
- for filename , download_url in missing_files :
49
+ for filepath , download_url in missing_files :
50
50
print (f" wget { download_url } " )
51
51
print (f"\n Place the downloaded files in the same directory where you run the `kokoro-tts` command." )
52
+ print (f"Or specify custom paths using --model and --voices options." )
52
53
sys .exit (1 )
53
54
54
55
def spinning_wheel (message = "Processing..." , progress = None ):
@@ -162,6 +163,8 @@ def print_usage():
162
163
--split-output <dir> Save each chunk as separate file in directory
163
164
--format <str> Audio format: wav or mp3 (default: wav)
164
165
--debug Show detailed debug information
166
+ --model <path> Path to kokoro-v1.0.onnx model file (default: ./kokoro-v1.0.onnx)
167
+ --voices <path> Path to voices-v1.0.bin file (default: ./voices-v1.0.bin)
165
168
166
169
Input formats:
167
170
.txt Text file input
@@ -180,13 +183,15 @@ def print_usage():
180
183
kokoro-tts --help-voices
181
184
kokoro-tts --help-languages
182
185
kokoro-tts input.epub --split-output ./chunks/ --debug
186
+ kokoro-tts input.txt output.wav --model /path/to/model.onnx --voices /path/to/voices.bin
187
+ kokoro-tts input.txt --model ./models/kokoro-v1.0.onnx --voices ./models/voices-v1.0.bin
183
188
""" )
184
189
185
- def print_supported_languages ():
190
+ def print_supported_languages (model_path = "kokoro-v1.0.onnx" , voices_path = "voices-v1.0.bin" ):
186
191
"""Print all supported languages from Kokoro."""
187
- check_required_files ()
192
+ check_required_files (model_path , voices_path )
188
193
try :
189
- kokoro = Kokoro ("kokoro-v1.0.onnx" , "voices-v1.0.bin" )
194
+ kokoro = Kokoro (model_path , voices_path )
190
195
languages = sorted (kokoro .get_languages ())
191
196
print ("\n Supported languages:" )
192
197
for lang in languages :
@@ -196,11 +201,11 @@ def print_supported_languages():
196
201
print (f"Error loading model to get supported languages: { e } " )
197
202
sys .exit (1 )
198
203
199
- def print_supported_voices ():
204
+ def print_supported_voices (model_path = "kokoro-v1.0.onnx" , voices_path = "voices-v1.0.bin" ):
200
205
"""Print all supported voices from Kokoro."""
201
- check_required_files ()
206
+ check_required_files (model_path , voices_path )
202
207
try :
203
- kokoro = Kokoro ("kokoro-v1.0.onnx" , "voices-v1.0.bin" )
208
+ kokoro = Kokoro (model_path , voices_path )
204
209
voices = sorted (kokoro .get_voices ())
205
210
print ("\n Supported voices:" )
206
211
for idx , voice in enumerate (voices ):
@@ -803,19 +808,20 @@ def process_chunk_sequential(chunk: str, kokoro: Kokoro, voice: str, speed: floa
803
808
return None , None
804
809
805
810
def convert_text_to_audio (input_file , output_file = None , voice = None , speed = 1.0 , lang = "en-us" ,
806
- stream = False , split_output = None , format = "wav" , debug = False , stdin_indicators = None ):
811
+ stream = False , split_output = None , format = "wav" , debug = False , stdin_indicators = None ,
812
+ model_path = "kokoro-v1.0.onnx" , voices_path = "voices-v1.0.bin" ):
807
813
global stop_spinner
808
814
809
815
# Define stdin indicators if not provided
810
816
if stdin_indicators is None :
811
817
stdin_indicators = ['/dev/stdin' , '-' , 'CONIN$' ] # CONIN$ is Windows stdin
812
818
813
819
# Check for required files first
814
- check_required_files ()
820
+ check_required_files (model_path , voices_path )
815
821
816
822
# Load Kokoro model
817
823
try :
818
- kokoro = Kokoro ("kokoro-v1.0.onnx" , "voices-v1.0.bin" )
824
+ kokoro = Kokoro (model_path , voices_path )
819
825
820
826
# Validate language after loading model
821
827
lang = validate_language (lang , kokoro )
@@ -1232,7 +1238,9 @@ def get_valid_options():
1232
1238
'--voice' ,
1233
1239
'--split-output' ,
1234
1240
'--format' ,
1235
- '--debug' # Add debug option
1241
+ '--debug' ,
1242
+ '--model' ,
1243
+ '--voices'
1236
1244
}
1237
1245
1238
1246
@@ -1254,7 +1262,7 @@ def main():
1254
1262
if arg .startswith ('--' ) and arg not in valid_options :
1255
1263
unknown_options .append (arg )
1256
1264
# Skip the next argument if it's a value for an option that takes parameters
1257
- elif arg in {'--speed' , '--lang' , '--voice' , '--split-output' , '--format' }:
1265
+ elif arg in {'--speed' , '--lang' , '--voice' , '--split-output' , '--format' , '--model' , '--voices' }:
1258
1266
i += 1
1259
1267
i += 1
1260
1268
@@ -1271,17 +1279,38 @@ def main():
1271
1279
print_usage () # Show the full help text
1272
1280
sys .exit (1 )
1273
1281
1274
- # Handle help commands first
1275
- if len (sys .argv ) == 2 :
1276
- if sys .argv [1 ] in ['-h' , '--help' ]:
1277
- print_usage ()
1278
- sys .exit (0 )
1279
- elif sys .argv [1 ] == '--help-languages' :
1280
- print_supported_languages ()
1281
- sys .exit (0 )
1282
- elif sys .argv [1 ] == '--help-voices' :
1283
- print_supported_voices ()
1284
- sys .exit (0 )
1282
+ # Handle help commands first (before argument parsing)
1283
+ if '--help' in sys .argv or '-h' in sys .argv :
1284
+ print_usage ()
1285
+ sys .exit (0 )
1286
+ elif '--help-languages' in sys .argv :
1287
+ # For help commands, we need to parse model/voices paths first
1288
+ model_path = "kokoro-v1.0.onnx" # default model path
1289
+ voices_path = "voices-v1.0.bin" # default voices path
1290
+
1291
+ # Parse model/voices paths for help commands
1292
+ for i , arg in enumerate (sys .argv ):
1293
+ if arg == '--model' and i + 1 < len (sys .argv ):
1294
+ model_path = sys .argv [i + 1 ]
1295
+ elif arg == '--voices' and i + 1 < len (sys .argv ):
1296
+ voices_path = sys .argv [i + 1 ]
1297
+
1298
+ print_supported_languages (model_path , voices_path )
1299
+ sys .exit (0 )
1300
+ elif '--help-voices' in sys .argv :
1301
+ # For help commands, we need to parse model/voices paths first
1302
+ model_path = "kokoro-v1.0.onnx" # default model path
1303
+ voices_path = "voices-v1.0.bin" # default voices path
1304
+
1305
+ # Parse model/voices paths for help commands
1306
+ for i , arg in enumerate (sys .argv ):
1307
+ if arg == '--model' and i + 1 < len (sys .argv ):
1308
+ model_path = sys .argv [i + 1 ]
1309
+ elif arg == '--voices' and i + 1 < len (sys .argv ):
1310
+ voices_path = sys .argv [i + 1 ]
1311
+
1312
+ print_supported_voices (model_path , voices_path )
1313
+ sys .exit (0 )
1285
1314
1286
1315
# Parse arguments
1287
1316
input_file = None
@@ -1298,6 +1327,8 @@ def main():
1298
1327
split_output = None
1299
1328
format = "wav" # default format
1300
1329
merge_chunks = '--merge-chunks' in sys .argv
1330
+ model_path = "kokoro-v1.0.onnx" # default model path
1331
+ voices_path = "voices-v1.0.bin" # default voices path
1301
1332
1302
1333
# Parse optional arguments
1303
1334
for i , arg in enumerate (sys .argv ):
@@ -1318,6 +1349,10 @@ def main():
1318
1349
if format not in ['wav' , 'mp3' ]:
1319
1350
print ("Error: Format must be either 'wav' or 'mp3'" )
1320
1351
sys .exit (1 )
1352
+ elif arg == '--model' and i + 1 < len (sys .argv ):
1353
+ model_path = sys .argv [i + 1 ]
1354
+ elif arg == '--voices' and i + 1 < len (sys .argv ):
1355
+ voices_path = sys .argv [i + 1 ]
1321
1356
1322
1357
# Handle merge chunks operation
1323
1358
if merge_chunks :
@@ -1349,7 +1384,8 @@ def main():
1349
1384
# Convert text to audio with debug flag
1350
1385
convert_text_to_audio (input_file , output_file , voice = voice , stream = stream ,
1351
1386
speed = speed , lang = lang , split_output = split_output ,
1352
- format = format , debug = debug , stdin_indicators = stdin_indicators )
1387
+ format = format , debug = debug , stdin_indicators = stdin_indicators ,
1388
+ model_path = model_path , voices_path = voices_path )
1353
1389
1354
1390
1355
1391
if __name__ == '__main__' :
0 commit comments