File size: 4,098 Bytes
f795870
 
 
1da4beb
 
398871a
f795870
 
 
 
290abed
b4d05df
933eb40
071e466
9994342
9b4d9d5
f795870
398871a
 
f795870
 
 
 
 
 
398871a
f795870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398871a
a5f8f3c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
const { join } = require('path');
const { whisper } = require('../../../build/Release/addon.node');
const { promisify } = require('util');

const whisperAsync = promisify(whisper);

const commonParams = {
  language: 'en',
  model: join(__dirname, '../../../models/ggml-base.en.bin'),
  fname_inp: join(__dirname, '../../../samples/jfk.wav'),
  use_gpu: true,
  flash_attn: false,
  no_prints: true,
  no_timestamps: false,
  detect_language: false,
  audio_ctx: 0,
  max_len: 0
};

describe('Whisper.cpp Node.js addon with VAD support', () => {
  test('Basic whisper transcription without VAD', async () => {
    const params = {
      ...commonParams,
      vad: false
    };

    const result = await whisperAsync(params);
    
    expect(typeof result).toBe('object');
    expect(Array.isArray(result.transcription)).toBe(true);
    expect(result.transcription.length).toBeGreaterThan(0);
    
    // Check that we got some transcription text
    const text = result.transcription.map(segment => segment[2]).join(' ');
    expect(text.length).toBeGreaterThan(0);
    expect(text.toLowerCase()).toContain('ask not');
  }, 30000);

  test('VAD parameters validation', async () => {
    // Test with invalid VAD model - should return empty transcription
    const invalidParams = {
      ...commonParams,
      vad: true,
      vad_model: 'non-existent-model.bin',
      vad_threshold: 0.5
    };

    // This should handle the error gracefully and return empty transcription
    const result = await whisperAsync(invalidParams);
    expect(typeof result).toBe('object');
    expect(Array.isArray(result.transcription)).toBe(true);
    // When VAD model doesn't exist, it should return empty transcription
    expect(result.transcription.length).toBe(0);
  }, 10000);

  test('VAD parameter parsing', async () => {
    // Test that VAD parameters are properly parsed (even if VAD model doesn't exist)
    const vadParams = {
      ...commonParams,
      vad: false, // Disabled so no model required
      vad_threshold: 0.7,
      vad_min_speech_duration_ms: 300,
      vad_min_silence_duration_ms: 150,
      vad_max_speech_duration_s: 45.0,
      vad_speech_pad_ms: 50,
      vad_samples_overlap: 0.15
    };

    const result = await whisperAsync(vadParams);
    
    expect(typeof result).toBe('object');
    expect(Array.isArray(result.transcription)).toBe(true);
  }, 30000);

  test('Progress callback with VAD disabled', async () => {
    let progressCalled = false;
    let lastProgress = 0;

    const params = {
      ...commonParams,
      vad: false,
      progress_callback: (progress) => {
        progressCalled = true;
        lastProgress = progress;
        expect(progress).toBeGreaterThanOrEqual(0);
        expect(progress).toBeLessThanOrEqual(100);
      }
    };

    const result = await whisperAsync(params);
    
    expect(progressCalled).toBe(true);
    expect(lastProgress).toBe(100);
    expect(typeof result).toBe('object');
  }, 30000);

  test('Language detection without VAD', async () => {
    const params = {
      ...commonParams,
      vad: false,
      detect_language: true,
      language: 'auto'
    };

    const result = await whisperAsync(params);
    
    expect(typeof result).toBe('object');
    expect(typeof result.language).toBe('string');
    expect(result.language.length).toBeGreaterThan(0);
  }, 30000);

  test('Basic transcription with all VAD parameters set', async () => {
    // Test with VAD disabled but all parameters set to ensure no crashes
    const params = {
      ...commonParams,
      vad: false, // Disabled so it works without VAD model
      vad_model: '', // Empty model path
      vad_threshold: 0.6,
      vad_min_speech_duration_ms: 200,
      vad_min_silence_duration_ms: 80,
      vad_max_speech_duration_s: 25.0,
      vad_speech_pad_ms: 40,
      vad_samples_overlap: 0.08
    };

    const result = await whisperAsync(params);
    
    expect(typeof result).toBe('object');
    expect(Array.isArray(result.transcription)).toBe(true);
    expect(result.transcription.length).toBeGreaterThan(0);
  }, 30000);
});