|
80 | 80 | "!python ./models.py"
|
81 | 81 | ]
|
82 | 82 | },
|
83 |
| - { |
84 |
| - "cell_type": "code", |
85 |
| - "source": [ |
86 |
| - "#@title youtube downloader for separation\n", |
87 |
| - "import yt_dlp\n", |
88 |
| - "import os\n", |
89 |
| - "\n", |
90 |
| - "def download_wav(video_url, output_dir):\n", |
91 |
| - " ydl_opts = {\n", |
92 |
| - " 'format': 'bestaudio/best',\n", |
93 |
| - " 'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'),\n", |
94 |
| - " 'noplaylist': True, # Download only a single video, not a playlist\n", |
95 |
| - " 'extract_audio': True,\n", |
96 |
| - " 'audio_format': 'wav', # Ensure WAV format\n", |
97 |
| - " 'postprocessors': [{\n", |
98 |
| - " 'key': 'FFmpegExtractAudio',\n", |
99 |
| - " 'preferredcodec': 'wav',\n", |
100 |
| - " }]\n", |
101 |
| - " }\n", |
102 |
| - "\n", |
103 |
| - " try:\n", |
104 |
| - " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", |
105 |
| - " ydl.download([video_url])\n", |
106 |
| - " print(f\"Download completed. Audio saved in {output_dir}\")\n", |
107 |
| - " except yt_dlp.utils.DownloadError as e:\n", |
108 |
| - " print(f\"An error occurred: {e}\")\n", |
109 |
| - "\n", |
110 |
| - "# Example usage\n", |
111 |
| - "video_url = 'Video_URL' #@param {type:\"string\"}\n", |
112 |
| - " # Replace with your YouTube video URL\n", |
113 |
| - "output_directory = '/content/yt_dlp' #@param {type:\"string\"}\n", |
114 |
| - " # customize your output directory\n", |
115 |
| - "\n", |
116 |
| - "# Create the output directory if it doesn't exist.\n", |
117 |
| - "os.makedirs(output_directory, exist_ok=True)\n", |
118 |
| - "\n", |
119 |
| - "download_wav(video_url, output_directory)" |
120 |
| - ], |
121 |
| - "metadata": { |
122 |
| - "cellView": "form", |
123 |
| - "id": "hDYUwhTXJI3_" |
124 |
| - }, |
125 |
| - "execution_count": null, |
126 |
| - "outputs": [] |
127 |
| - }, |
128 | 83 | {
|
129 | 84 | "cell_type": "code",
|
130 | 85 | "source": [
|
131 | 86 | "#@title audio separator for infernece\n",
|
132 | 87 | "\n",
|
133 | 88 | "\n",
|
134 | 89 | "\n",
|
135 |
| - "from audio_separator.separator import Separator\n", |
136 |
| - "\n", |
137 |
| - "aud_input = \"Input_aud\" #@param {type:\"string\"}\n", |
138 |
| - "\n", |
139 |
| - "output_dir = \"/content/separated\" #@param {type:\"string\"}\n", |
140 |
| - "\n", |
141 |
| - "# Initialize the Separator class (with optional configuration properties, below)\n", |
142 |
| - "separator = Separator(output_dir=output_dir, output_format=\"wav\")\n", |
143 |
| - "\n", |
144 |
| - "separator.load_model()\n", |
| 90 | + "import os\n", |
| 91 | + "import glob\n", |
| 92 | + "import yt_dlp\n", |
145 | 93 | "\n",
|
146 |
| - "output_names = {\n", |
147 |
| - " \"Vocals\": \"vocals_output\",\n", |
148 |
| - " \"Instrumental\": \"instrumental_output\",\n", |
149 |
| - "}\n", |
150 |
| - "output_files = separator.separate(aud_input, output_names)\n", |
151 |
| - "print(f\"Separation complete!\")" |
| 94 | + "def downloader(url):\n", |
| 95 | + " ydl_opts = {\n", |
| 96 | + " 'format': 'bestaudio/best',\n", |
| 97 | + " 'postprocessors': [{\n", |
| 98 | + " 'key': 'FFmpegExtractAudio',\n", |
| 99 | + " 'preferredcodec': 'wav',\n", |
| 100 | + " 'preferredquality': '192',\n", |
| 101 | + " }],\n", |
| 102 | + " 'outtmpl': os.path.join(f'{path}/temp', '%(title)s.%(ext)s'),\n", |
| 103 | + " }\n", |
| 104 | + "\n", |
| 105 | + " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", |
| 106 | + " ydl.download([url])\n", |
| 107 | + "\n", |
| 108 | + "def checker(url):\n", |
| 109 | + " return \"http\" in url\n", |
| 110 | + "\n", |
| 111 | + "def uvr_cli(audio_input, output_folder, model, output_format, segment_size, overlap, batch_size, override_model_segment_size, use_autocast, extensions):\n", |
| 112 | + " found_files = []\n", |
| 113 | + "\n", |
| 114 | + " dictmodel = {\n", |
| 115 | + " 'BS-Roformer-Viperx-1297': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',\n", |
| 116 | + " 'BS-Roformer-Viperx-1296': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',\n", |
| 117 | + " 'BS-Roformer-Viperx-1053': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',\n", |
| 118 | + " 'Mel-Roformer-Viperx-1143': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',\n", |
| 119 | + " 'BS-Roformer-De-Reverb': 'deverb_bs_roformer_8_384dim_10depth.ckpt',\n", |
| 120 | + " 'Mel-Roformer-Crowd-Aufr33-Viperx': 'mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt',\n", |
| 121 | + " 'Mel-Roformer-Denoise-Aufr33': 'denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt',\n", |
| 122 | + " 'Mel-Roformer-Denoise-Aufr33-Aggr' : 'denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt',\n", |
| 123 | + " 'Mel-Roformer-Karaoke-Aufr33-Viperx': 'mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt',\n", |
| 124 | + " 'MelBand Roformer | Vocals by Kimberley Jensen' : 'vocals_mel_band_roformer.ckpt',\n", |
| 125 | + " 'MelBand Roformer Kim | FT by unwa' : 'mel_band_roformer_kim_ft_unwa.ckpt',\n", |
| 126 | + " 'MelBand Roformer Kim | Inst V1 by Unwa' : 'melband_roformer_inst_v1.ckpt',\n", |
| 127 | + " 'MelBand Roformer Kim | Inst V1 (E) by Unwa' : 'melband_roformer_inst_v1e.ckpt',\n", |
| 128 | + " 'MelBand Roformer Kim | Inst V2 by Unwa' : 'melband_roformer_inst_v2.ckpt',\n", |
| 129 | + " 'MelBand Roformer Kim | InstVoc Duality V1 by Unwa' : 'melband_roformer_instvoc_duality_v1.ckpt',\n", |
| 130 | + " 'MelBand Roformer Kim | InstVoc Duality V2 by Unwa' : 'melband_roformer_instvox_duality_v2.ckpt',\n", |
| 131 | + " 'MelBand Roformer | De-Reverb by anvuew' : 'dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt',\n", |
| 132 | + " 'MelBand Roformer | De-Reverb Less Aggressive by anvuew' : 'dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt',\n", |
| 133 | + " 'MelBand Roformer | De-Reverb-Echo by Sucial' : 'dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt',\n", |
| 134 | + " 'MelBand Roformer | De-Reverb-Echo V2 by Sucial' : 'dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt',\n", |
| 135 | + " 'MelBand Roformer Kim | SYHFT by SYH99999' : 'MelBandRoformerSYHFT.ckpt',\n", |
| 136 | + " 'MelBand Roformer Kim | SYHFT V2 by SYH99999' : 'MelBandRoformerSYHFTV2.ckpt',\n", |
| 137 | + " 'MelBand Roformer Kim | SYHFT V2.5 by SYH99999' : 'MelBandRoformerSYHFTV2.5.ckpt',\n", |
| 138 | + " 'MelBand Roformer Kim | SYHFT V3 by SYH99999' : 'MelBandRoformerSYHFTV3Epsilon.ckpt',\n", |
| 139 | + " 'MelBand Roformer Kim | Big SYHFT V1 by SYH99999' : 'MelBandRoformerBigSYHFTV1.ckpt',\n", |
| 140 | + " 'MelBand Roformer Kim | Big Beta 4 FT by unwa' : 'melband_roformer_big_beta4.ckpt',\n", |
| 141 | + " 'MelBand Roformer Kim | Big Beta 5e FT by unwa' : 'melband_roformer_big_beta5e.ckpt',\n", |
| 142 | + " 'BS Roformer | Chorus Male-Female by Sucial' : 'model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt',\n", |
| 143 | + " 'MelBand Roformer | Aspiration by Sucial' : 'aspiration_mel_band_roformer_sdr_18.9845.ckpt',\n", |
| 144 | + " 'MelBand Roformer | Aspiration Less Aggressive by Sucial' : 'aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt',\n", |
| 145 | + " 'MelBand Roformer | Bleed Suppressor V1 by unwa-97chris' : 'mel_band_roformer_bleed_suppressor_v1.ckpt'\n", |
| 146 | + " }\n", |
| 147 | + " roformer_model = dictmodel[model]\n", |
| 148 | + "\n", |
| 149 | + " if checker(audio_input):\n", |
| 150 | + " downloader(audio_input)\n", |
| 151 | + " audio_input = f\"{path}/temp\"\n", |
| 152 | + "\n", |
| 153 | + " for audio_files in os.listdir(audio_input):\n", |
| 154 | + " if audio_files.endswith(extensions):\n", |
| 155 | + " found_files.append(audio_files)\n", |
| 156 | + "\n", |
| 157 | + " total_files = len(found_files)\n", |
| 158 | + "\n", |
| 159 | + " if total_files == 0:\n", |
| 160 | + " print(\"No valid audio files found.\")\n", |
| 161 | + " else:\n", |
| 162 | + " print(f\"{total_files} audio files found\")\n", |
| 163 | + "\n", |
| 164 | + " found_files.sort()\n", |
| 165 | + "\n", |
| 166 | + " for audio_files in found_files:\n", |
| 167 | + " file_path = os.path.join(audio_input, audio_files)\n", |
| 168 | + " prompt = f'audio-separator \"{file_path}\" --model_filename {roformer_model} --output_dir={output_folder} --output_format={output_format} --mdxc_segment_size={segment_size} --mdxc_overlap={overlap} --mdxc_batch_size={batch_size} --model_file_dir=./models'\n", |
| 169 | + " if override_model_segment_size:\n", |
| 170 | + " prompt += \" --mdxc_override_model_segment_size\"\n", |
| 171 | + " if use_autocast:\n", |
| 172 | + " prompt += \" --use_autocast\"\n", |
| 173 | + " !$prompt\n", |
| 174 | + "\n", |
| 175 | + " if audio_input == f\"{path}/temp\":\n", |
| 176 | + " temp_files = glob.glob(f\"{path}/temp/*\")\n", |
| 177 | + " for file in temp_files:\n", |
| 178 | + " os.remove(file)\n", |
| 179 | + "\n", |
| 180 | + "#@markdown Input path for audio files or link:\n", |
| 181 | + "audio_input = \"\" #@param {type:\"string\"}\n", |
| 182 | + "#@markdown You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)\n", |
| 183 | + "\n", |
| 184 | + "#@markdown Output path for audio files:\n", |
| 185 | + "output_folder = \"/content/Vocales\" #@param {type:\"string\"}\n", |
| 186 | + "#@markdown Select the model:\n", |
| 187 | + "model = \"BS-Roformer-Viperx-1297\" #@param [\"BS-Roformer-Viperx-1297\", \"BS-Roformer-Viperx-1296\", \"BS-Roformer-Viperx-1053\", \"Mel-Roformer-Viperx-1143\", \"BS-Roformer-De-Reverb\", \"Mel-Roformer-Crowd-Aufr33-Viperx\", \"Mel-Roformer-Denoise-Aufr33\", \"Mel-Roformer-Denoise-Aufr33-Aggr\", \"Mel-Roformer-Karaoke-Aufr33-Viperx\", \"MelBand Roformer | Vocals by Kimberley Jensen\", \"MelBand Roformer Kim | FT by unwa\", \"MelBand Roformer Kim | Inst V1 by Unwa\", \"MelBand Roformer Kim | Inst V1 (E) by Unwa\", \"MelBand Roformer Kim | Inst V2 by Unwa\", \"MelBand Roformer Kim | InstVoc Duality V1 by Unwa\", \"MelBand Roformer Kim | InstVoc Duality V2 by Unwa\", \"MelBand Roformer | De-Reverb by anvuew\", \"MelBand Roformer | De-Reverb Less Aggressive by anvuew\", \"MelBand Roformer | De-Reverb-Echo by Sucial\", \"MelBand Roformer | De-Reverb-Echo V2 by Sucial\", \"MelBand Roformer Kim | SYHFT by SYH99999\", \"MelBand Roformer Kim | SYHFT V2 by SYH99999\", \"MelBand Roformer Kim | SYHFT V2.5 by SYH99999\", \"MelBand Roformer Kim | SYHFT V3 by SYH99999\", \"MelBand Roformer Kim | Big SYHFT V1 by SYH99999\", \"MelBand Roformer Kim | Big Beta 4 FT by unwa\", \"MelBand Roformer Kim | Big Beta 5e FT by unwa\", \"BS Roformer | Chorus Male-Female by Sucial\", \"MelBand Roformer | Aspiration by Sucial\", \"MelBand Roformer | Aspiration Less Aggressive by Sucial\", \"MelBand Roformer | Bleed Suppressor V1 by unwa-97chris\"]\n", |
| 188 | + "#@markdown Select the output format:\n", |
| 189 | + "output_format = \"wav\" #@param [\"wav\", \"flac\", \"mp3\", \"ogg\", \"opus\", \"m4a\", \"aiff\", \"ac3\"]\n", |
| 190 | + "#@markdown Larger consumes more resources, but may give better results.\n", |
| 191 | + "segment_size = 256 #@param {type:\"slider\", min:32, max:4000, step:32}\n", |
| 192 | + "#@markdown Amount of overlap between prediction windows.\n", |
| 193 | + "overlap = 8 #@param {type:\"slider\", min:2, max:10, step:1}\n", |
| 194 | + "#@markdown Larger consumes more RAM but may process slightly faster.\n", |
| 195 | + "batch_size = 1 #@param {type:\"slider\", min:1, max:16, step:1}\n", |
| 196 | + "#@markdown Override model default segment size instead of using the model default value.\n", |
| 197 | + "override_model_segment_size = False #@param {type:\"boolean\"}\n", |
| 198 | + "#@markdown Flag to use PyTorch autocast for faster inference. Do not use for CPU inference.\n", |
| 199 | + "use_autocast = True #@param {type:\"boolean\"}\n", |
| 200 | + "extensions = (\".wav\", \".flac\", \".mp3\", \".ogg\", \".opus\", \".m4a\", \".aiff\", \".ac3\")\n", |
| 201 | + "\n", |
| 202 | + "uvr_cli(audio_input, output_folder, model, output_format, segment_size, overlap, batch_size, override_model_segment_size, use_autocast, extensions)" |
152 | 203 | ],
|
153 | 204 | "metadata": {
|
154 | 205 | "cellView": "form",
|
|
227 | 278 | "\n",
|
228 | 279 | "#@title ## Inference\n",
|
229 | 280 | "import os\n",
|
230 |
| - "main_dir = \"/content/Harmonify\"\n", |
| 281 | + "main_dir = os.getcwd()\n", |
231 | 282 | "os.chdir(main_dir)\n",
|
232 | 283 | "from lib.infer import infer_audio\n",
|
233 | 284 | "from google.colab import files\n",
|
|
0 commit comments