Created using Colab

TheNeodev · TheNeodev · commit 240c81c48592 · 2025-01-30T05:51:44.000-08:00
diff --git a/Advanced-RVC.ipynb b/Advanced-RVC.ipynb
@@ -80,75 +80,126 @@
         "!python ./models.py"
       ]
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "#@title youtube downloader for separation\n",
-        "import yt_dlp\n",
-        "import os\n",
-        "\n",
-        "def download_wav(video_url, output_dir):\n",
-        "  ydl_opts = {\n",
-        "      'format': 'bestaudio/best',\n",
-        "      'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'),\n",
-        "      'noplaylist': True,  # Download only a single video, not a playlist\n",
-        "      'extract_audio': True,\n",
-        "      'audio_format': 'wav', # Ensure WAV format\n",
-        "      'postprocessors': [{\n",
-        "          'key': 'FFmpegExtractAudio',\n",
-        "          'preferredcodec': 'wav',\n",
-        "      }]\n",
-        "  }\n",
-        "\n",
-        "  try:\n",
-        "      with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
-        "          ydl.download([video_url])\n",
-        "          print(f\"Download completed. Audio saved in {output_dir}\")\n",
-        "  except yt_dlp.utils.DownloadError as e:\n",
-        "      print(f\"An error occurred: {e}\")\n",
-        "\n",
-        "# Example usage\n",
-        "video_url = 'Video_URL' #@param {type:\"string\"}\n",
-        " # Replace with your YouTube video URL\n",
-        "output_directory = '/content/yt_dlp' #@param {type:\"string\"}\n",
-        " # customize your output directory\n",
-        "\n",
-        "# Create the output directory if it doesn't exist.\n",
-        "os.makedirs(output_directory, exist_ok=True)\n",
-        "\n",
-        "download_wav(video_url, output_directory)"
-      ],
-      "metadata": {
-        "cellView": "form",
-        "id": "hDYUwhTXJI3_"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
     {
       "cell_type": "code",
       "source": [
         "#@title audio separator for infernece\n",
         "\n",
         "\n",
         "\n",
-        "from audio_separator.separator import Separator\n",
-        "\n",
-        "aud_input = \"Input_aud\"  #@param {type:\"string\"}\n",
-        "\n",
-        "output_dir = \"/content/separated\"  #@param {type:\"string\"}\n",
-        "\n",
-        "# Initialize the Separator class (with optional configuration properties, below)\n",
-        "separator = Separator(output_dir=output_dir, output_format=\"wav\")\n",
-        "\n",
-        "separator.load_model()\n",
+        "import os\n",
+        "import glob\n",
+        "import yt_dlp\n",
         "\n",
-        "output_names = {\n",
-        "    \"Vocals\": \"vocals_output\",\n",
-        "    \"Instrumental\": \"instrumental_output\",\n",
-        "}\n",
-        "output_files = separator.separate(aud_input, output_names)\n",
-        "print(f\"Separation complete!\")"
+        "def downloader(url):\n",
+        "    ydl_opts = {\n",
+        "        'format': 'bestaudio/best',\n",
+        "        'postprocessors': [{\n",
+        "            'key': 'FFmpegExtractAudio',\n",
+        "            'preferredcodec': 'wav',\n",
+        "            'preferredquality': '192',\n",
+        "        }],\n",
+        "        'outtmpl': os.path.join(f'{path}/temp', '%(title)s.%(ext)s'),\n",
+        "    }\n",
+        "\n",
+        "    with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
+        "        ydl.download([url])\n",
+        "\n",
+        "def checker(url):\n",
+        "    return \"http\" in url\n",
+        "\n",
+        "def uvr_cli(audio_input, output_folder, model, output_format, segment_size, overlap, batch_size, override_model_segment_size, use_autocast, extensions):\n",
+        "    found_files = []\n",
+        "\n",
+        "    dictmodel = {\n",
+        "        'BS-Roformer-Viperx-1297': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',\n",
+        "        'BS-Roformer-Viperx-1296': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',\n",
+        "        'BS-Roformer-Viperx-1053': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',\n",
+        "        'Mel-Roformer-Viperx-1143': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',\n",
+        "        'BS-Roformer-De-Reverb': 'deverb_bs_roformer_8_384dim_10depth.ckpt',\n",
+        "        'Mel-Roformer-Crowd-Aufr33-Viperx': 'mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt',\n",
+        "        'Mel-Roformer-Denoise-Aufr33': 'denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt',\n",
+        "        'Mel-Roformer-Denoise-Aufr33-Aggr' : 'denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt',\n",
+        "        'Mel-Roformer-Karaoke-Aufr33-Viperx': 'mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt',\n",
+        "        'MelBand Roformer | Vocals by Kimberley Jensen' : 'vocals_mel_band_roformer.ckpt',\n",
+        "        'MelBand Roformer Kim | FT by unwa' : 'mel_band_roformer_kim_ft_unwa.ckpt',\n",
+        "        'MelBand Roformer Kim | Inst V1 by Unwa' : 'melband_roformer_inst_v1.ckpt',\n",
+        "        'MelBand Roformer Kim | Inst V1 (E) by Unwa' : 'melband_roformer_inst_v1e.ckpt',\n",
+        "        'MelBand Roformer Kim | Inst V2 by Unwa' : 'melband_roformer_inst_v2.ckpt',\n",
+        "        'MelBand Roformer Kim | InstVoc Duality V1 by Unwa' : 'melband_roformer_instvoc_duality_v1.ckpt',\n",
+        "        'MelBand Roformer Kim | InstVoc Duality V2 by Unwa' : 'melband_roformer_instvox_duality_v2.ckpt',\n",
+        "        'MelBand Roformer | De-Reverb by anvuew' : 'dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt',\n",
+        "        'MelBand Roformer | De-Reverb Less Aggressive by anvuew' : 'dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt',\n",
+        "        'MelBand Roformer | De-Reverb-Echo by Sucial' : 'dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt',\n",
+        "        'MelBand Roformer | De-Reverb-Echo V2 by Sucial' : 'dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt',\n",
+        "        'MelBand Roformer Kim | SYHFT by SYH99999' : 'MelBandRoformerSYHFT.ckpt',\n",
+        "        'MelBand Roformer Kim | SYHFT V2 by SYH99999' : 'MelBandRoformerSYHFTV2.ckpt',\n",
+        "        'MelBand Roformer Kim | SYHFT V2.5 by SYH99999' : 'MelBandRoformerSYHFTV2.5.ckpt',\n",
+        "        'MelBand Roformer Kim | SYHFT V3 by SYH99999' : 'MelBandRoformerSYHFTV3Epsilon.ckpt',\n",
+        "        'MelBand Roformer Kim | Big SYHFT V1 by SYH99999' : 'MelBandRoformerBigSYHFTV1.ckpt',\n",
+        "        'MelBand Roformer Kim | Big Beta 4 FT by unwa' : 'melband_roformer_big_beta4.ckpt',\n",
+        "        'MelBand Roformer Kim | Big Beta 5e FT by unwa' : 'melband_roformer_big_beta5e.ckpt',\n",
+        "        'BS Roformer | Chorus Male-Female by Sucial' : 'model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt',\n",
+        "        'MelBand Roformer | Aspiration by Sucial' : 'aspiration_mel_band_roformer_sdr_18.9845.ckpt',\n",
+        "        'MelBand Roformer | Aspiration Less Aggressive by Sucial' : 'aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt',\n",
+        "        'MelBand Roformer | Bleed Suppressor V1 by unwa-97chris' : 'mel_band_roformer_bleed_suppressor_v1.ckpt'\n",
+        "    }\n",
+        "    roformer_model = dictmodel[model]\n",
+        "\n",
+        "    if checker(audio_input):\n",
+        "        downloader(audio_input)\n",
+        "        audio_input = f\"{path}/temp\"\n",
+        "\n",
+        "    for audio_files in os.listdir(audio_input):\n",
+        "        if audio_files.endswith(extensions):\n",
+        "            found_files.append(audio_files)\n",
+        "\n",
+        "    total_files = len(found_files)\n",
+        "\n",
+        "    if total_files == 0:\n",
+        "        print(\"No valid audio files found.\")\n",
+        "    else:\n",
+        "        print(f\"{total_files} audio files found\")\n",
+        "\n",
+        "        found_files.sort()\n",
+        "\n",
+        "        for audio_files in found_files:\n",
+        "            file_path = os.path.join(audio_input, audio_files)\n",
+        "            prompt = f'audio-separator \"{file_path}\" --model_filename {roformer_model} --output_dir={output_folder} --output_format={output_format} --mdxc_segment_size={segment_size} --mdxc_overlap={overlap} --mdxc_batch_size={batch_size} --model_file_dir=./models'\n",
+        "            if override_model_segment_size:\n",
+        "                prompt += \" --mdxc_override_model_segment_size\"\n",
+        "            if use_autocast:\n",
+        "                prompt += \" --use_autocast\"\n",
+        "            !$prompt\n",
+        "\n",
+        "    if audio_input == f\"{path}/temp\":\n",
+        "        temp_files = glob.glob(f\"{path}/temp/*\")\n",
+        "        for file in temp_files:\n",
+        "            os.remove(file)\n",
+        "\n",
+        "#@markdown Input path for audio files or link:\n",
+        "audio_input = \"\" #@param {type:\"string\"}\n",
+        "#@markdown You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)\n",
+        "\n",
+        "#@markdown Output path for audio files:\n",
+        "output_folder = \"/content/Vocales\" #@param {type:\"string\"}\n",
+        "#@markdown Select the model:\n",
+        "model = \"BS-Roformer-Viperx-1297\" #@param [\"BS-Roformer-Viperx-1297\", \"BS-Roformer-Viperx-1296\", \"BS-Roformer-Viperx-1053\", \"Mel-Roformer-Viperx-1143\", \"BS-Roformer-De-Reverb\", \"Mel-Roformer-Crowd-Aufr33-Viperx\", \"Mel-Roformer-Denoise-Aufr33\", \"Mel-Roformer-Denoise-Aufr33-Aggr\", \"Mel-Roformer-Karaoke-Aufr33-Viperx\", \"MelBand Roformer | Vocals by Kimberley Jensen\", \"MelBand Roformer Kim | FT by unwa\", \"MelBand Roformer Kim | Inst V1 by Unwa\", \"MelBand Roformer Kim | Inst V1 (E) by Unwa\", \"MelBand Roformer Kim | Inst V2 by Unwa\", \"MelBand Roformer Kim | InstVoc Duality V1 by Unwa\", \"MelBand Roformer Kim | InstVoc Duality V2 by Unwa\", \"MelBand Roformer | De-Reverb by anvuew\", \"MelBand Roformer | De-Reverb Less Aggressive by anvuew\", \"MelBand Roformer | De-Reverb-Echo by Sucial\", \"MelBand Roformer | De-Reverb-Echo V2 by Sucial\", \"MelBand Roformer Kim | SYHFT by SYH99999\", \"MelBand Roformer Kim | SYHFT V2 by SYH99999\", \"MelBand Roformer Kim | SYHFT V2.5 by SYH99999\", \"MelBand Roformer Kim | SYHFT V3 by SYH99999\", \"MelBand Roformer Kim | Big SYHFT V1 by SYH99999\", \"MelBand Roformer Kim | Big Beta 4 FT by unwa\", \"MelBand Roformer Kim | Big Beta 5e FT by unwa\", \"BS Roformer | Chorus Male-Female by Sucial\", \"MelBand Roformer | Aspiration by Sucial\", \"MelBand Roformer | Aspiration Less Aggressive by Sucial\", \"MelBand Roformer | Bleed Suppressor V1 by unwa-97chris\"]\n",
+        "#@markdown Select the output format:\n",
+        "output_format = \"wav\" #@param [\"wav\", \"flac\", \"mp3\", \"ogg\", \"opus\", \"m4a\", \"aiff\", \"ac3\"]\n",
+        "#@markdown Larger consumes more resources, but may give better results.\n",
+        "segment_size = 256 #@param {type:\"slider\", min:32, max:4000, step:32}\n",
+        "#@markdown Amount of overlap between prediction windows.\n",
+        "overlap = 8 #@param {type:\"slider\", min:2, max:10, step:1}\n",
+        "#@markdown Larger consumes more RAM but may process slightly faster.\n",
+        "batch_size = 1 #@param {type:\"slider\", min:1, max:16, step:1}\n",
+        "#@markdown Override model default segment size instead of using the model default value.\n",
+        "override_model_segment_size = False #@param {type:\"boolean\"}\n",
+        "#@markdown Flag to use PyTorch autocast for faster inference. Do not use for CPU inference.\n",
+        "use_autocast = True #@param {type:\"boolean\"}\n",
+        "extensions = (\".wav\", \".flac\", \".mp3\", \".ogg\", \".opus\", \".m4a\", \".aiff\", \".ac3\")\n",
+        "\n",
+        "uvr_cli(audio_input, output_folder, model, output_format, segment_size, overlap, batch_size, override_model_segment_size, use_autocast, extensions)"
       ],
       "metadata": {
         "cellView": "form",
@@ -227,7 +278,7 @@
         "\n",
         "#@title ## Inference\n",
         "import os\n",
-        "main_dir = \"/content/Harmonify\"\n",
+        "main_dir = os.getcwd()\n",
         "os.chdir(main_dir)\n",
         "from lib.infer import infer_audio\n",
         "from google.colab import files\n",