import pandas as pd
from tqdm import tqdm
from malayalam_asr_benchmarking.commonvoice import evaluate_whisper_model_common_voice
Benchmarking Results in Malayalam datasets
Whisper-Event Leaderboard
HuggingFace Team conducted a whisper event on fine tuning Whisper model to achieve the State of the art results performance for various languages.
During this competitions lot of models where evaluated on dataset like Common Voice.
For the language Malayalam, the results are as follows in Common Voice dataset subsection of Malayalam:
There was an evaluation in Google Fluers Malaylam subsection as well:
Benchmarking in Common Voice Dataset
ASR models to benchmark
= ["thennal/whisper-medium-ml",
asr_models "anuragshas/whisper-large-v2-ml",
"DrishtiSharma/whisper-large-v2-malayalam",
"parambharat/whisper-small-ml",
"parambharat/whisper-base-ml",
"parambharat/whisper-tiny-ml"
]
= [
openai_models "openai/whisper-tiny",
"openai/whisper-base",
"openai/whisper-small",
"openai/whisper-medium",
"openai/whisper-large",
"openai/whisper-large-v2",
]
Running across all asr models
= []
wer_list = []
cer_list = []
model_size_list = [] time_list
for asr in tqdm(asr_models):
evaluate_whisper_model_common_voice(asr, wer_list, cer_list, model_size_list, time_list)
0%| | 0/7 [00:00<?, ?it/s]Found cached dataset common_voice_11_0 (/home/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ml/11.0.0/2c65b95d99ca879b1b1074ea197b65e0497848fd697fdb0582e0f6b75b6f4da0)
Loading cached processed dataset at /home/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ml/11.0.0/2c65b95d99ca879b1b1074ea197b65e0497848fd697fdb0582e0f6b75b6f4da0/cache-374585c2877047e3.arrow
Loading cached processed dataset at /home/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ml/11.0.0/2c65b95d99ca879b1b1074ea197b65e0497848fd697fdb0582e0f6b75b6f4da0/cache-22670505c562e0d4.arrow
/opt/conda/lib/python3.8/site-packages/transformers/generation_utils.py:1359: UserWarning: Neither `max_length` nor `max_new_tokens` has been set, `max_length` will default to 448 (`self.config.max_length`). Controlling `max_length` via the config is deprecated and `max_length` will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
warnings.warn(
wer_list
[11.56, 24.46, 21.65, 26.25, 30.33, 300.7, 38.31]
Store results in pandas
= pd.DataFrame({"models": asr_models, "wer": wer_list, "cer": cer_list, "model size": model_size_list,"time(s)": time_list,}) df
7) df.head(
models | wer | cer | model size | time(s) | |
---|---|---|---|---|---|
0 | thennal/whisper-medium-ml | 11.56 | 5.41 | 763.86M | 924.979711 |
1 | anuragshas/whisper-large-v2-ml | 24.46 | 11.64 | 1.54B | 1779.561592 |
2 | parambharat/whisper-small-ml | 21.65 | 11.78 | 241.73M | 273.555688 |
3 | DrishtiSharma/whisper-large-v2-malayalam | 26.25 | 13.17 | 1.54B | 1773.661774 |
4 | parambharat/whisper-base-ml | 30.33 | 16.16 | 72.59M | 96.419609 |
5 | kurianbenoy/whisper_malayalam_largev2 | 300.70 | 292.82 | 1.54B | 5034.771624 |
6 | parambharat/whisper-tiny-ml | 38.31 | 21.93 | 37.76M | 59.535259 |
"/home/commonvoice_benchmarking_results.parquet") df.to_parquet(
"kurianbenoy/whisper-small-ml-gmasc", [], [], [], []) evaluate_whisper_model_common_voice(
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Total time taken: 56.87792730331421
The WER of model: 41.12
The CER of model: 21.24
The model size is: 241.73M
['kurianbenoy', 'whisper-small-ml-gmasc']
Running OpenAI ASR models
= []
wer_list = []
cer_list = []
model_size_list = [] time_list
for asr in tqdm(openai_models):
evaluate_whisper_model_common_voice(asr, wer_list, cer_list, model_size_list, time_list)
0%| | 0/6 [00:02<?, ?it/s]
KeyboardInterrupt
= [154.21, 118.39, 100.06, 127.97, 125.73, 100.26]
wer_list
= [180.45, 131.08, 95.04, 136.43, 139.62, 93.6]
cer_list
= ['37.76M', '72.59M', '241.73M', '763.86M', '1.54B', '1.54B']
model_size_list
= [22.277158498764038, 22.35258674621582, 25.442846059799194, 53.88049054145813, 82.74607968330383, 71.14292621612549] time_list
"openai/whisper-large-v2", wer_list, cer_list, model_size_list, time_list, bs=4) evaluate_whisper_model_common_voice(
Found cached dataset common_voice_11_0 (/home/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ml/11.0.0/2c65b95d99ca879b1b1074ea197b65e0497848fd697fdb0582e0f6b75b6f4da0)
Loading cached processed dataset at /home/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ml/11.0.0/2c65b95d99ca879b1b1074ea197b65e0497848fd697fdb0582e0f6b75b6f4da0/cache-374585c2877047e3.arrow
Loading cached processed dataset at /home/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/ml/11.0.0/2c65b95d99ca879b1b1074ea197b65e0497848fd697fdb0582e0f6b75b6f4da0/cache-22670505c562e0d4.arrow
/opt/conda/lib/python3.8/site-packages/transformers/generation_utils.py:1359: UserWarning: Neither `max_length` nor `max_new_tokens` has been set, `max_length` will default to 448 (`self.config.max_length`). Controlling `max_length` via the config is deprecated and `max_length` will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
warnings.warn(
Total time taken: 71.14292621612549
The WER of model: 100.26
The CER of model: 93.6
The model size is: 1.54B
['openai', 'whisper-large-v2']
openai_models
['openai/whisper-tiny',
'openai/whisper-base',
'openai/whisper-small',
'openai/whisper-medium',
'openai/whisper-large',
'openai/whisper-large-v2']
= pd.DataFrame({"models": openai_models,
df "wer": wer_list,
"cer": cer_list,
"model size": model_size_list,
"time(s)": time_list
})
df.head()
models | wer | cer | model size | time(s) | |
---|---|---|---|---|---|
0 | openai/whisper-tiny | 154.21 | 180.45 | 37.76M | 22.277158 |
1 | openai/whisper-base | 118.39 | 131.08 | 72.59M | 22.352587 |
2 | openai/whisper-small | 100.06 | 95.04 | 241.73M | 25.442846 |
3 | openai/whisper-medium | 127.97 | 136.43 | 763.86M | 53.880491 |
4 | openai/whisper-large | 125.73 | 139.62 | 1.54B | 82.746080 |
"/home/commonvoice_benchmarking_openai_results.parquet") df.to_parquet(
Benchmarking in MSC dataset
from malayalam_asr_benchmarking.msc import evaluate_whisper_model_msc
"openai/whisper-medium",
evaluate_whisper_model_msc(
wer_list,
cer_list,
model_size_list,
time_list,=8
bs )
Found cached dataset parquet (/home/.cache/huggingface/datasets/thennal___parquet/thennal--msc-cc9d10989b2ac4bd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Loading cached processed dataset at /home/.cache/huggingface/datasets/thennal___parquet/thennal--msc-cc9d10989b2ac4bd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-30f1618974cdefce.arrow
Loading cached processed dataset at /home/.cache/huggingface/datasets/thennal___parquet/thennal--msc-cc9d10989b2ac4bd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-e4f860ca9b159c26.arrow
/opt/conda/lib/python3.8/site-packages/transformers/generation_utils.py:1359: UserWarning: Neither `max_length` nor `max_new_tokens` has been set, `max_length` will default to 448 (`self.config.max_length`). Controlling `max_length` via the config is deprecated and `max_length` will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
warnings.warn(
process of calculating predictions
completed getting predictions
Total time taken: 673.2912940979004
The WER of model: 101.45
The CER of model: 104.23
The model size is: 763.86M
['openai', 'whisper-medium']
"openai/whisper-large-v2",
evaluate_whisper_model_msc(
wer_list,
cer_list,
model_size_list,
time_list,=4
bs )
Found cached dataset parquet (/home/.cache/huggingface/datasets/thennal___parquet/thennal--msc-cc9d10989b2ac4bd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Loading cached processed dataset at /home/.cache/huggingface/datasets/thennal___parquet/thennal--msc-cc9d10989b2ac4bd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-30f1618974cdefce.arrow
Loading cached processed dataset at /home/.cache/huggingface/datasets/thennal___parquet/thennal--msc-cc9d10989b2ac4bd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-e4f860ca9b159c26.arrow
/opt/conda/lib/python3.8/site-packages/transformers/generation_utils.py:1359: UserWarning: Neither `max_length` nor `max_new_tokens` has been set, `max_length` will default to 448 (`self.config.max_length`). Controlling `max_length` via the config is deprecated and `max_length` will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
warnings.warn(
process of calculating predictions
completed getting predictions
Total time taken: 1040.2502796649933
The WER of model: 100.27
The CER of model: 102.4
The model size is: 1.54B
['openai', 'whisper-large-v2']
"openai/whisper-large",
evaluate_whisper_model_msc(
wer_list,
cer_list,
model_size_list,
time_list,=4
bs )
Found cached dataset parquet (/home/.cache/huggingface/datasets/thennal___parquet/thennal--msc-cc9d10989b2ac4bd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Loading cached processed dataset at /home/.cache/huggingface/datasets/thennal___parquet/thennal--msc-cc9d10989b2ac4bd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-30f1618974cdefce.arrow
Loading cached processed dataset at /home/.cache/huggingface/datasets/thennal___parquet/thennal--msc-cc9d10989b2ac4bd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-e4f860ca9b159c26.arrow
/opt/conda/lib/python3.8/site-packages/transformers/generation_utils.py:1359: UserWarning: Neither `max_length` nor `max_new_tokens` has been set, `max_length` will default to 448 (`self.config.max_length`). Controlling `max_length` via the config is deprecated and `max_length` will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.
warnings.warn(
process of calculating predictions
completed getting predictions
Total time taken: 1067.5574433803558
The WER of model: 107.01
The CER of model: 113.62
The model size is: 1.54B
['openai', 'whisper-large']
"kurianbenoy/whisper-small-ml-gmasc", [], [], [], []) evaluate_whisper_model_msc(
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
process of calculating predictions
completed getting predictions
Total time taken: 498.59665060043335
The WER of model: 32.07
The CER of model: 16.89
The model size is: 241.73M
['kurianbenoy', 'whisper-small-ml-gmasc']
Made by Kurian Benoy. See the code.