Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| import gradio as gr | |
| import time | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| codes_as_string = '''Assamese asm_Beng | |
| Awadhi awa_Deva | |
| Bengali ben_Beng | |
| Bhojpuri bho_Deva | |
| Standard Tibetan bod_Tibt | |
| Dzongkha dzo_Tibt | |
| English eng_Latn | |
| Gujarati guj_Gujr | |
| Hindi hin_Deva | |
| Chhattisgarhi hne_Deva | |
| Kannada kan_Knda | |
| Kashmiri (Arabic script) kas_Arab | |
| Kashmiri (Devanagari script) kas_Deva | |
| Mizo lus_Latn | |
| Magahi mag_Deva | |
| Maithili mai_Deva | |
| Malayalam mal_Mlym | |
| Marathi mar_Deva | |
| Meitei (Bengali script) mni_Beng | |
| Burmese mya_Mymr | |
| Nepali npi_Deva | |
| Odia ory_Orya | |
| Punjabi pan_Guru | |
| Sanskrit san_Deva | |
| Santali sat_Olck | |
| Sindhi snd_Arab | |
| Tamil tam_Taml | |
| Telugu tel_Telu | |
| Urdu urd_Arab | |
| Vietnamese vie_Latn''' | |
| def load_models(): | |
| # build model and tokenizer | |
| model_name_dict = { | |
| 'nllb-1.3B': "ychenNLP/nllb-200-distilled-1.3B-easyproject", | |
| } | |
| model_dict = {} | |
| for call_name, real_name in model_name_dict.items(): | |
| print('\tLoading model: %s' % call_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(real_name) | |
| tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") | |
| model_dict[call_name+'_model'] = model | |
| model_dict[call_name+'_tokenizer'] = tokenizer | |
| return model_dict | |
| def translation(source, target, text): | |
| if len(model_dict) == 2: | |
| model_name = 'nllb-1.3B' | |
| start_time = time.time() | |
| source = flores_codes[source] | |
| target = flores_codes[target] | |
| model = model_dict[model_name + '_model'] | |
| tokenizer = model_dict[model_name + '_tokenizer'] | |
| translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target) | |
| output = translator(text, max_length=400) | |
| end_time = time.time() | |
| full_output = output | |
| output = output[0]['translation_text'] | |
| # result = {'inference_time': end_time - start_time, | |
| # 'source': source, | |
| # 'target': target, | |
| # 'result': output, | |
| # 'full_output': full_output} | |
| return output | |
| if __name__ == '__main__': | |
| print('\tinit models') | |
| codes_as_string = codes_as_string.split('\n') | |
| flores_codes = {} | |
| for code in codes_as_string: | |
| lang, lang_code = code.split('\t') | |
| flores_codes[lang] = lang_code | |
| global model_dict | |
| model_dict = load_models() | |
| # define gradio demo | |
| lang_codes = list(flores_codes.keys()) | |
| inputs = [gr.inputs.Dropdown(lang_codes, default='English', label='Source'), | |
| gr.inputs.Dropdown(lang_codes, default='Hindi', label='Target'), | |
| gr.inputs.Textbox(lines=5, label="Input text"), | |
| ] | |
| outputs = gr.inputs.Textbox(label="Output text") | |
| title = "Machine Translation Demo" | |
| demo_status = "Machine Translation System." | |
| description = f"{demo_status}" | |
| gr.Interface(translation, | |
| inputs, | |
| outputs, | |
| title=title, | |
| description=description, | |
| examples_per_page=50, | |
| theme="JohnSmith9982/small_and_pretty" | |
| ).launch() | |