Archime commited on
Commit
fc64c8b
·
1 Parent(s): 799a0f6

add custom style

Browse files
app/canary_speech_engine.py CHANGED
@@ -215,12 +215,12 @@ class CanarySpeechEngine(IStreamingSpeechEngine):
215
  """Helper to calculate model-specific streaming parameters."""
216
  model_cfg = self.asr_model.cfg
217
  audio_sample_rate = model_cfg.preprocessor['sample_rate']
218
- feature_stride_sec = model_cfg.preprocessor['window_stride']
219
- features_per_sec = 1.0 / feature_stride_sec
220
  self.encoder_subsampling_factor = self.asr_model.encoder.subsampling_factor
221
 
222
  self.features_frame2audio_samples = make_divisible_by(
223
- int(audio_sample_rate * feature_stride_sec), factor=self.encoder_subsampling_factor
224
  )
225
  encoder_frame2audio_samples = self.features_frame2audio_samples * self.encoder_subsampling_factor
226
 
 
215
  """Helper to calculate model-specific streaming parameters."""
216
  model_cfg = self.asr_model.cfg
217
  audio_sample_rate = model_cfg.preprocessor['sample_rate']
218
+ self.feature_stride_sec = model_cfg.preprocessor['window_stride']
219
+ features_per_sec = 1.0 / self.feature_stride_sec
220
  self.encoder_subsampling_factor = self.asr_model.encoder.subsampling_factor
221
 
222
  self.features_frame2audio_samples = make_divisible_by(
223
+ int(audio_sample_rate * self.feature_stride_sec ), factor=self.encoder_subsampling_factor
224
  )
225
  encoder_frame2audio_samples = self.features_frame2audio_samples * self.encoder_subsampling_factor
226
 
app/supported_languages.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ SUPPORTED_LANGS_MAP = {
2
+ "Bulgarian": "bg", "Croatian": "hr", "Czech": "cs", "Danish": "da",
3
+ "Dutch": "nl", "English": "en", "Estonian": "et", "Finnish": "fi",
4
+ "French": "fr", "German": "de", "Greek": "el", "Hungarian": "hu",
5
+ "Italian": "it", "Latvian": "lv", "Lithuanian": "lt", "Maltese": "mt",
6
+ "Polish": "pl", "Portuguese": "pt", "Romanian": "ro", "Slovak": "sk",
7
+ "Slovenian": "sl", "Spanish": "es", "Swedish": "sv", "Russian": "ru", "Ukrainian": "uk"
8
+ }
app/ui_utils.py CHANGED
@@ -2,6 +2,24 @@ from app.logger_config import logger as logging
2
  import gradio as gr
3
  from pathlib import Path
4
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  DEFAULT_CONFIG = {
6
  "task_type": "Transcription",
7
  "lang_source": "French",
@@ -39,14 +57,6 @@ EXAMPLE_CONFIGS = {
39
  }
40
  }
41
 
42
- SUPPORTED_LANGS_MAP = {
43
- "Bulgarian": "bg", "Croatian": "hr", "Czech": "cs", "Danish": "da",
44
- "Dutch": "nl", "English": "en", "Estonian": "et", "Finnish": "fi",
45
- "French": "fr", "German": "de", "Greek": "el", "Hungarian": "hu",
46
- "Italian": "it", "Latvian": "lv", "Lithuanian": "lt", "Maltese": "mt",
47
- "Polish": "pl", "Portuguese": "pt", "Romanian": "ro", "Slovak": "sk",
48
- "Slovenian": "sl", "Spanish": "es", "Swedish": "sv", "Russian": "ru", "Ukrainian": "uk"
49
- }
50
 
51
 
52
 
@@ -132,7 +142,7 @@ def handle_additional_outputs(webrtc_stream, msg):
132
  Controls button states, audio visibility, and progress slider.
133
  """
134
  # logging.debug(f"Additional output received: {msg}")
135
- # ui_components = [start_stream_button, stop_stream_button,go_to_task, audio_source_step, status_slider,walkthrough]
136
 
137
  progress = float(0)
138
  # Handle structured error message
@@ -144,10 +154,11 @@ def handle_additional_outputs(webrtc_stream, msg):
144
  return (
145
  gr.update(visible=True), # start_stream_button enabled
146
  gr.update(visible=False), # stop_stream_button disabled
 
147
  gr.update(visible=False), # go_to_task disabled
148
  gr.update(interactive=True), # audio_source_step re-shown
149
  gr.update(visible=False, value=0), # slider hidden
150
- gr.update(selected=1), #walkthrough
151
  gr.update(value=f"**Error:** {value}", visible=True)
152
  )
153
 
@@ -159,6 +170,7 @@ def handle_additional_outputs(webrtc_stream, msg):
159
  return (
160
  gr.update(visible=True), # start_stream_button disabled
161
  gr.update(visible=False), # stop_stream_button enabled
 
162
  gr.update(visible=True), # go_to_task enabled
163
  gr.update(interactive=True), # hide audio_source_step
164
  gr.update(visible=True, value=progress), # show progress
@@ -169,6 +181,7 @@ def handle_additional_outputs(webrtc_stream, msg):
169
  return (
170
  gr.update(visible=False), # start_stream_button disabled
171
  gr.update(visible=True), # stop_stream_button enabled
 
172
  gr.update(visible=True), # go_to_task enabled
173
  gr.update(interactive=False), # hide audio_source_step
174
  gr.update(visible=True, value=progress), # show progress
@@ -180,10 +193,11 @@ def handle_additional_outputs(webrtc_stream, msg):
180
  return (
181
  gr.update(visible=True), # start_stream_button disabled
182
  gr.update(visible=False), # stop_stream_button enabled
 
183
  gr.update(visible=False), # go_to_task enabled
184
  gr.update(interactive=True), # hide audio_source_step
185
  gr.update(visible=True, value=0), # show progress
186
- gr.update(selected=1), #walkthrough
187
  gr.update(value="ℹStream stopped by user.", visible=True)
188
  )
189
 
@@ -222,6 +236,54 @@ def get_custom_theme() :
222
 
223
 
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  def raise_error(message="Une erreur est survenue."):
226
  raise gr.Error(message)
227
 
 
2
  import gradio as gr
3
  from pathlib import Path
4
  import os
5
+
6
+ from app.utils import (
7
+ remove_active_task_flag_file,
8
+ task_fake,
9
+ is_active_task
10
+ # task
11
+ )
12
+
13
+ # from app.utils import (
14
+ # raise_error,
15
+ # READ_SIZE,
16
+ # generate_coturn_config,
17
+ # read_and_stream_audio,
18
+ # stop_streaming,
19
+ # task,
20
+ # task_fake
21
+ # )
22
+
23
  DEFAULT_CONFIG = {
24
  "task_type": "Transcription",
25
  "lang_source": "French",
 
57
  }
58
  }
59
 
 
 
 
 
 
 
 
 
60
 
61
 
62
 
 
142
  Controls button states, audio visibility, and progress slider.
143
  """
144
  # logging.debug(f"Additional output received: {msg}")
145
+ # ui_components = [start_stream_button, stop_stream_button,start_task_button,go_to_task, audio_source_step, status_slider,walkthrough]
146
 
147
  progress = float(0)
148
  # Handle structured error message
 
154
  return (
155
  gr.update(visible=True), # start_stream_button enabled
156
  gr.update(visible=False), # stop_stream_button disabled
157
+ gr.update(visible=False), #start_task_button
158
  gr.update(visible=False), # go_to_task disabled
159
  gr.update(interactive=True), # audio_source_step re-shown
160
  gr.update(visible=False, value=0), # slider hidden
161
+ gr.update(), #walkthrough
162
  gr.update(value=f"**Error:** {value}", visible=True)
163
  )
164
 
 
170
  return (
171
  gr.update(visible=True), # start_stream_button disabled
172
  gr.update(visible=False), # stop_stream_button enabled
173
+ gr.update(visible=False), #start_task_button
174
  gr.update(visible=True), # go_to_task enabled
175
  gr.update(interactive=True), # hide audio_source_step
176
  gr.update(visible=True, value=progress), # show progress
 
181
  return (
182
  gr.update(visible=False), # start_stream_button disabled
183
  gr.update(visible=True), # stop_stream_button enabled
184
+ gr.update() if is_active_task(msg.get("session_hash_code")) else gr.update(visible=True), #start_task_button
185
  gr.update(visible=True), # go_to_task enabled
186
  gr.update(interactive=False), # hide audio_source_step
187
  gr.update(visible=True, value=progress), # show progress
 
193
  return (
194
  gr.update(visible=True), # start_stream_button disabled
195
  gr.update(visible=False), # stop_stream_button enabled
196
+ gr.update(visible=False), #start_task_button
197
  gr.update(visible=False), # go_to_task enabled
198
  gr.update(interactive=True), # hide audio_source_step
199
  gr.update(visible=True, value=0), # show progress
200
+ gr.update(), #walkthrough
201
  gr.update(value="ℹStream stopped by user.", visible=True)
202
  )
203
 
 
236
 
237
 
238
 
239
+
240
+ ########## task
241
+ def start_task_asr_ast(
242
+ session_hash_code,
243
+ task_type, lang_source, lang_target,
244
+ chunk_secs, left_context_secs, right_context_secs,
245
+ streaming_policy, alignatt_thr, waitk_lagging,
246
+ exclude_sink_frames, xatt_scores_layer, hallucinations_detector
247
+ ):
248
+ """Stream transcription or translation results in real time."""
249
+ accumulated = ""
250
+ # Boucle sur le générateur de `task2()`
251
+ # outputs=[task_output,status_message_task,start_task_button,stop_task_button,config_step]
252
+ for result, status, current_chunk in task_fake(
253
+ session_hash_code,
254
+ task_type, lang_source, lang_target,
255
+ chunk_secs, left_context_secs, right_context_secs,
256
+ streaming_policy, alignatt_thr, waitk_lagging,
257
+ exclude_sink_frames, xatt_scores_layer, hallucinations_detector
258
+ ):
259
+ if status == "success":
260
+ yield (accumulated + result, #task_output
261
+ gr.update(visible=True,value=current_chunk,elem_classes=[status]),#status_message_task
262
+ gr.update(visible=False),#start_task_button
263
+ gr.update(visible=True), #stop_task_button
264
+ gr.update(interactive=False) # config_step
265
+ )
266
+ accumulated += result
267
+ elif status in ["warning","info" ]:
268
+ yield (accumulated, #task_output
269
+ gr.update(visible=True,value=result , elem_classes=[status]),#status_message_task
270
+ gr.update(visible=False),#start_task_button
271
+ gr.update(visible=True),#stop_task_button
272
+ gr.update(interactive=False) # config_step
273
+ )
274
+ elif status in [ "done"]:
275
+ yield (accumulated, #task_output
276
+ gr.update(visible=True,value=result , elem_classes=[status]),#status_message_task
277
+ gr.update(visible=True),#start_task_button
278
+ gr.update(visible=False),#stop_task_button
279
+ gr.update(interactive=True) # config_step
280
+ )
281
+
282
+ def stop_task_fn(session_hash_code):
283
+ remove_active_task_flag_file(session_hash_code)
284
+ yield "Task stopped by user."
285
+ # # --------------------------------------------------------
286
+
287
  def raise_error(message="Une erreur est survenue."):
288
  raise gr.Error(message)
289
 
app/utils.py CHANGED
@@ -21,7 +21,7 @@ from app.session_utils import (
21
  remove_active_task_flag_file,
22
  get_session_hashe_chunks_dir
23
  )
24
- from app.ui_utils import (
25
  SUPPORTED_LANGS_MAP
26
  )
27
  from app.canary_speech_engine import CanarySpeechEngine,CanaryConfig
@@ -94,12 +94,12 @@ def read_and_stream_audio(filepath_to_stream: str, session_hash_code: str,read_s
94
  frame_rate = chunk.frame_rate
95
  samples = np.array(chunk.get_array_of_samples()).reshape(1, -1)
96
  progress = round(((i + 1) / total_chunks) * 100, 2)
97
- if _is_stop_requested(session_hash_code):
98
  logging.info(f"[{session_hash_code}] Stop signal received. Terminating stream.")
99
- yield ((frame_rate, samples), AdditionalOutputs({"stoped": True, "value": "STREAM_STOPED"} ) )
100
  break
101
 
102
- yield ((frame_rate, samples), AdditionalOutputs({"progressed": True, "value": progress} ))
103
  logging.debug(f"[{session_hash_code}] Sent chunk {i+1}/{total_chunks} ({progress}%).")
104
 
105
  time.sleep(chunk_duration_ms/1000)
@@ -130,8 +130,8 @@ def read_and_stream_audio(filepath_to_stream: str, session_hash_code: str,read_s
130
 
131
 
132
 
133
- asr_model = nemo_asr.models.ASRModel.from_pretrained("nvidia/canary-1b-v2")
134
- # asr_model = None
135
 
136
  @spaces.GPU
137
  def task_fake(session_hash_code: str,
@@ -354,7 +354,7 @@ def handle_stream_error(session_hash_code: str, error: Exception):
354
 
355
  remove_active_stream_flag_file(session_hash_code)
356
 
357
- yield ((16000,np.zeros(16000, dtype=np.float32).reshape(1, -1)), AdditionalOutputs({"errored": True, "value": msg}))
358
 
359
 
360
 
@@ -386,10 +386,13 @@ def start_streaming(session_hash_code: str):
386
  with open(active_stream_flag, "w") as f:
387
  f.write("1")
388
 
389
- def _is_stop_requested(session_hash_code) -> bool:
390
  """Check if the stop signal was requested."""
391
  return not os.path.exists(get_active_stream_flag_file(session_hash_code))
392
 
 
 
 
393
 
394
 
395
  def raise_error():
 
21
  remove_active_task_flag_file,
22
  get_session_hashe_chunks_dir
23
  )
24
+ from app.supported_languages import (
25
  SUPPORTED_LANGS_MAP
26
  )
27
  from app.canary_speech_engine import CanarySpeechEngine,CanaryConfig
 
94
  frame_rate = chunk.frame_rate
95
  samples = np.array(chunk.get_array_of_samples()).reshape(1, -1)
96
  progress = round(((i + 1) / total_chunks) * 100, 2)
97
+ if is_stop_requested(session_hash_code):
98
  logging.info(f"[{session_hash_code}] Stop signal received. Terminating stream.")
99
+ yield ((frame_rate, samples), AdditionalOutputs({"stoped": True, "value": "STREAM_STOPED", "session_hash_code" : session_hash_code } ) )
100
  break
101
 
102
+ yield ((frame_rate, samples), AdditionalOutputs({"progressed": True, "value": progress , "session_hash_code" : session_hash_code} ))
103
  logging.debug(f"[{session_hash_code}] Sent chunk {i+1}/{total_chunks} ({progress}%).")
104
 
105
  time.sleep(chunk_duration_ms/1000)
 
130
 
131
 
132
 
133
+ # asr_model = nemo_asr.models.ASRModel.from_pretrained("nvidia/canary-1b-v2")
134
+ asr_model = None
135
 
136
  @spaces.GPU
137
  def task_fake(session_hash_code: str,
 
354
 
355
  remove_active_stream_flag_file(session_hash_code)
356
 
357
+ yield ((16000,np.zeros(16000, dtype=np.float32).reshape(1, -1)), AdditionalOutputs({"errored": True, "value": msg, "session_hash_code" : session_hash_code}))
358
 
359
 
360
 
 
386
  with open(active_stream_flag, "w") as f:
387
  f.write("1")
388
 
389
+ def is_stop_requested(session_hash_code) -> bool:
390
  """Check if the stop signal was requested."""
391
  return not os.path.exists(get_active_stream_flag_file(session_hash_code))
392
 
393
+ def is_active_task(session_hash_code) -> bool:
394
+ """Check if the stop signal was requested."""
395
+ return os.path.exists(get_active_task_flag_file(session_hash_code))
396
 
397
 
398
  def raise_error():
assets/custom_style.css CHANGED
@@ -179,20 +179,55 @@ body {
179
  display: block; /* Le rend visible */
180
  }
181
 
182
- /* Styles personnalisés pour le WebRTC */
183
- #webcam-stream {
184
- border: 2px solid #007bff;
185
- border-radius: 10px;
186
- box-shadow: 0 4px 8px rgba(0,0,0,0.1);
187
- background-color: #f8f9fa;
188
- margin: 10px 0;
189
- }
190
-
191
- #webcam-stream .gr-webRTC {
192
- background-color: #e9ecef;
193
- }
194
 
195
  #task-output-box textarea {
196
  font-size: 1.15em; /* 'Moyenne taille' - ajustez au besoin */
197
  font-weight: bold; /* 'En gras' */
198
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  display: block; /* Le rend visible */
180
  }
181
 
182
+ /* #live-stream {
183
+ position: relative;
184
+ display: flex;
185
+ min-height: 100px;
186
+ max-height: 128px;
187
+ justify-content: center;
188
+ align-items: center;
189
+ margin: 2rem 0;
190
+ } */
 
 
 
191
 
192
  #task-output-box textarea {
193
  font-size: 1.15em; /* 'Moyenne taille' - ajustez au besoin */
194
  font-weight: bold; /* 'En gras' */
195
+ }
196
+
197
+ .gradio-webrtc-waveContainer {
198
+ background-color :white
199
+
200
+ }
201
+
202
+ /* --- 4. L'Icône Centrale (Style Bouton) --- */
203
+ .gradio-webrtc-icon-container {
204
+ position: relative;
205
+ width: 128px;
206
+ height: 128px;
207
+ display: flex;
208
+ justify-content: center;
209
+ align-items: center;
210
+ }
211
+
212
+ .gradio-webrtc-icon {
213
+ position: relative;
214
+ width: 64px;
215
+ height: 64px;
216
+ border-radius: 0.5rem; /* Carré arrondi comme les boutons de l'exemple */
217
+ display: flex;
218
+ justify-content: center;
219
+ align-items: center;
220
+ z-index: 2;
221
+
222
+ /* Style Bouton Plein (Solid) */
223
+ background-color: var(--color-accent);
224
+ color: white;
225
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
226
+ transition: all 0.2s ease;
227
+ }
228
+
229
+ /* Effet Hover */
230
+ .gradio-webrtc-icon:hover {
231
+ opacity: 0.9;
232
+ transform: translateY(-1px);
233
+ }