| import torch |
| import gradio as gr |
| from transformers import pipeline |
|
|
| model_id = "Sandiago21/whisper-large-v2-german-2" |
| pipe = pipeline("automatic-speech-recognition", model=model_id) |
|
|
|
|
| title = "Automatic Speech Recognition (ASR)" |
| description = """ |
| Demo for automatic speech recognition in German. Demo uses [Sandiago21/whisper-large-v2-german-2](https://huggingface.co/Sandiago21/whisper-large-v2-german-2) checkpoint, which is based on OpenAI's |
| [Whisper](https://huggingface.co/openai/whisper-large-v2) model and is fine-tuned in German Audio dataset |
| ") |
| """ |
|
|
| def transcribe_speech(filepath): |
| output = pipe( |
| filepath, |
| max_new_tokens=256, |
| generate_kwargs={ |
| "task": "transcribe", |
| "language": "german", |
| }, |
| chunk_length_s=30, |
| batch_size=8, |
| ) |
| return output["text"] |
|
|
| demo = gr.Blocks() |
|
|
| mic_transcribe = gr.Interface( |
| fn=transcribe_speech, |
| inputs=gr.Audio(sources="microphone", type="filepath"), |
| outputs=gr.Textbox(), |
| title=title, |
| description=description, |
| ) |
|
|
| file_transcribe = gr.Interface( |
| fn=transcribe_speech, |
| inputs=gr.Audio(sources="upload", type="filepath"), |
| outputs=gr.Textbox(), |
| examples=[["./example.wav"]], |
| title=title, |
| description=description, |
| ) |
|
|
| with demo: |
| gr.TabbedInterface( |
| [mic_transcribe, file_transcribe], |
| ["Transcribe Microphone", "Transcribe Audio File"], |
| ), |
|
|
| demo.launch() |
|
|