mesolitica
diff --git a/‎docs/README.rst
Lines changed: 1 addition & 0 deletions b/‎docs/README.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/load-stt-transducer-model-mixed.ipynb
Lines changed: 4 additions & 4 deletions b/‎docs/load-stt-transducer-model-mixed.ipynb
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/load-stt-transducer-model.ipynb
Lines changed: 3 additions & 107 deletions b/‎docs/load-stt-transducer-model.ipynb
Lines changed: 3 additions & 107 deletions
diff --git a/‎example/stt-transducer-model-mixed/load-stt-transducer-model-mixed.ipynb
Lines changed: 4 additions & 4 deletions b/‎example/stt-transducer-model-mixed/load-stt-transducer-model-mixed.ipynb
Lines changed: 4 additions & 4 deletions
diff --git a/‎example/stt-transducer-model/load-stt-transducer-model.ipynb
Lines changed: 3 additions & 107 deletions b/‎example/stt-transducer-model/load-stt-transducer-model.ipynb
Lines changed: 3 additions & 107 deletions
diff --git a/‎pretrained-model/stt/conformer/README.md
Lines changed: 5 additions & 1 deletion b/‎pretrained-model/stt/conformer/README.md
Lines changed: 5 additions & 1 deletion
@@ -94,6 +94,7 @@ Malaya-Speech also released pretrained models, simply check at `malaya-speech/pr
 -  **FastSpeechSplit**, Unsupervised Speech Decomposition Via Triple Information Bottleneck using Transformer, no paper produced.
 -  **Sepformer**, Attention is All You Need in Speech Separation, https://arxiv.org/abs/2010.13154
 -  **FastSpeechSplit**, Faster and Accurate Speech Split Conversion using Transformer, no paper produced.
+-  **HuBERT**, Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units, https://arxiv.org/pdf/2106.07447v1.pdf
 
 References
 -----------
 
@@ -145,8 +145,8 @@
        "      <th>large-conformer-mixed</th>\n",
        "      <td>404</td>\n",
        "      <td>107</td>\n",
-       "      <td>0.25903</td>\n",
-       "      <td>0.17893</td>\n",
+       "      <td>0.24829</td>\n",
+       "      <td>0.16606</td>\n",
        "      <td>[malay, singlish]</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -160,7 +160,7 @@
        "large-conformer             404                 107  0.15986  0.05937   \n",
        "alconformer                38.1                15.1  0.20703  0.08533   \n",
        "conformer-mixed             125                37.1  0.25314  0.15836   \n",
-       "large-conformer-mixed       404                 107  0.25903  0.17893   \n",
+       "large-conformer-mixed       404                 107  0.24829  0.16606   \n",
        "\n",
        "                                Language  \n",
        "small-conformer                  [malay]  \n",
@@ -184,7 +184,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Lower is better. `mixed` and `bahasa` models tested on different test set."
+    "Lower is better. Mixed models tested on different dataset."
    ]
   },
   {
 
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -56,115 +56,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "scrolled": false
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Size (MB)</th>\n",
-       "      <th>Quantized Size (MB)</th>\n",
-       "      <th>WER</th>\n",
-       "      <th>CER</th>\n",
-       "      <th>Language</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>small-conformer</th>\n",
-       "      <td>49.2</td>\n",
-       "      <td>18.1</td>\n",
-       "      <td>0.20599</td>\n",
-       "      <td>0.08933</td>\n",
-       "      <td>[malay]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>conformer</th>\n",
-       "      <td>125</td>\n",
-       "      <td>37.1</td>\n",
-       "      <td>0.16547</td>\n",
-       "      <td>0.0641</td>\n",
-       "      <td>[malay]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>large-conformer</th>\n",
-       "      <td>404</td>\n",
-       "      <td>107</td>\n",
-       "      <td>0.15986</td>\n",
-       "      <td>0.05937</td>\n",
-       "      <td>[malay]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>alconformer</th>\n",
-       "      <td>38.1</td>\n",
-       "      <td>15.1</td>\n",
-       "      <td>0.20703</td>\n",
-       "      <td>0.08533</td>\n",
-       "      <td>[malay]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>conformer-mixed</th>\n",
-       "      <td>125</td>\n",
-       "      <td>37.1</td>\n",
-       "      <td>0.35191</td>\n",
-       "      <td>0.23667</td>\n",
-       "      <td>[malay, singlish]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>large-conformer-mixed</th>\n",
-       "      <td>404</td>\n",
-       "      <td>107</td>\n",
-       "      <td>0.3359</td>\n",
-       "      <td>0.1989</td>\n",
-       "      <td>[malay, singlish]</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                      Size (MB) Quantized Size (MB)      WER      CER  \\\n",
-       "small-conformer            49.2                18.1  0.20599  0.08933   \n",
-       "conformer                   125                37.1  0.16547   0.0641   \n",
-       "large-conformer             404                 107  0.15986  0.05937   \n",
-       "alconformer                38.1                15.1  0.20703  0.08533   \n",
-       "conformer-mixed             125                37.1  0.35191  0.23667   \n",
-       "large-conformer-mixed       404                 107   0.3359   0.1989   \n",
-       "\n",
-       "                                Language  \n",
-       "small-conformer                  [malay]  \n",
-       "conformer                        [malay]  \n",
-       "large-conformer                  [malay]  \n",
-       "alconformer                      [malay]  \n",
-       "conformer-mixed        [malay, singlish]  \n",
-       "large-conformer-mixed  [malay, singlish]  "
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "malaya_speech.stt.available_transducer()"
    ]
 
@@ -145,8 +145,8 @@
        "      <th>large-conformer-mixed</th>\n",
        "      <td>404</td>\n",
        "      <td>107</td>\n",
-       "      <td>0.25903</td>\n",
-       "      <td>0.17893</td>\n",
+       "      <td>0.24829</td>\n",
+       "      <td>0.16606</td>\n",
        "      <td>[malay, singlish]</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -160,7 +160,7 @@
        "large-conformer             404                 107  0.15986  0.05937   \n",
        "alconformer                38.1                15.1  0.20703  0.08533   \n",
        "conformer-mixed             125                37.1  0.25314  0.15836   \n",
-       "large-conformer-mixed       404                 107  0.25903  0.17893   \n",
+       "large-conformer-mixed       404                 107  0.24829  0.16606   \n",
        "\n",
        "                                Language  \n",
        "small-conformer                  [malay]  \n",
@@ -184,7 +184,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Lower is better. `mixed` and `bahasa` models tested on different test set."
+    "Lower is better. Mixed models tested on different dataset."
    ]
   },
   {
 
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -56,115 +56,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "scrolled": false
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Size (MB)</th>\n",
-       "      <th>Quantized Size (MB)</th>\n",
-       "      <th>WER</th>\n",
-       "      <th>CER</th>\n",
-       "      <th>Language</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>small-conformer</th>\n",
-       "      <td>49.2</td>\n",
-       "      <td>18.1</td>\n",
-       "      <td>0.20599</td>\n",
-       "      <td>0.08933</td>\n",
-       "      <td>[malay]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>conformer</th>\n",
-       "      <td>125</td>\n",
-       "      <td>37.1</td>\n",
-       "      <td>0.16547</td>\n",
-       "      <td>0.0641</td>\n",
-       "      <td>[malay]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>large-conformer</th>\n",
-       "      <td>404</td>\n",
-       "      <td>107</td>\n",
-       "      <td>0.15986</td>\n",
-       "      <td>0.05937</td>\n",
-       "      <td>[malay]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>alconformer</th>\n",
-       "      <td>38.1</td>\n",
-       "      <td>15.1</td>\n",
-       "      <td>0.20703</td>\n",
-       "      <td>0.08533</td>\n",
-       "      <td>[malay]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>conformer-mixed</th>\n",
-       "      <td>125</td>\n",
-       "      <td>37.1</td>\n",
-       "      <td>0.35191</td>\n",
-       "      <td>0.23667</td>\n",
-       "      <td>[malay, singlish]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>large-conformer-mixed</th>\n",
-       "      <td>404</td>\n",
-       "      <td>107</td>\n",
-       "      <td>0.3359</td>\n",
-       "      <td>0.1989</td>\n",
-       "      <td>[malay, singlish]</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                      Size (MB) Quantized Size (MB)      WER      CER  \\\n",
-       "small-conformer            49.2                18.1  0.20599  0.08933   \n",
-       "conformer                   125                37.1  0.16547   0.0641   \n",
-       "large-conformer             404                 107  0.15986  0.05937   \n",
-       "alconformer                38.1                15.1  0.20703  0.08533   \n",
-       "conformer-mixed             125                37.1  0.35191  0.23667   \n",
-       "large-conformer-mixed       404                 107   0.3359   0.1989   \n",
-       "\n",
-       "                                Language  \n",
-       "small-conformer                  [malay]  \n",
-       "conformer                        [malay]  \n",
-       "large-conformer                  [malay]  \n",
-       "alconformer                      [malay]  \n",
-       "conformer-mixed        [malay, singlish]  \n",
-       "large-conformer-mixed  [malay, singlish]  "
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "malaya_speech.stt.available_transducer()"
    ]
 
@@ -66,4 +66,8 @@ Tensorboard, https://tensorboard.dev/experiment/1qBD7FGyS32Q8uQvhA1NnA/
 
 12. Conformer, last update 12th June 2021, [output-base-conformer-v4.tar.gz](https://f000.backblazeb2.com/file/malaya-speech-model/pretrained/output-base-conformer-v4.tar.gz)
 
-13. Large Conformer, last update 12th June 2021, [output-large-conformer-v4.tar.gz](https://f000.backblazeb2.com/file/malaya-speech-model/pretrained/output-large-conformer-v4.tar.gz)
+13. Large Conformer, last update 12th June 2021, [output-large-conformer-v4.tar.gz](https://f000.backblazeb2.com/file/malaya-speech-model/pretrained/output-large-conformer-v4.tar.gz)
+
+14. Conformer Mixed, last update 29th June 2021, [output-base-mixed-conformer-v2.tar.gz](https://f000.backblazeb2.com/file/malaya-speech-model/pretrained/output-base-mixed-conformer-v2.tar.gz)
+
+15. Large Conformer Mixed, last update 29th June 2021, [output-large-mixed-conformer-v2.tar.gz](https://f000.backblazeb2.com/file/malaya-speech-model/pretrained/output-large-conformer-mixed.tar.gz)