whisper.cpp

Running

App Files Files Community

whisper.cpp / examples /whisper.objc /whisper.objc /ViewController.m

ggerganov

whisper.objc : disable timestamps for real-time transcription

fc6989a unverified about 2 years ago

raw

history blame

9.65 kB

	//
	// ViewController.m
	// whisper.objc
	//
	// Created by Georgi Gerganov on 23.10.22.
	//

	#import "ViewController.h"

	#import "whisper.h"

	#define NUM_BYTES_PER_BUFFER 16*1024

	// callback used to process captured audio
	void AudioInputCallback(void * inUserData,
	AudioQueueRef inAQ,
	AudioQueueBufferRef inBuffer,
	const AudioTimeStamp * inStartTime,
	UInt32 inNumberPacketDescriptions,
	const AudioStreamPacketDescription * inPacketDescs);

	@interface ViewController ()

	@property (weak, nonatomic) IBOutlet UILabel *labelStatusInp;
	@property (weak, nonatomic) IBOutlet UIButton *buttonToggleCapture;
	@property (weak, nonatomic) IBOutlet UIButton *buttonTranscribe;
	@property (weak, nonatomic) IBOutlet UIButton *buttonRealtime;
	@property (weak, nonatomic) IBOutlet UITextView *textviewResult;

	@end

	@implementation ViewController

	- (void)setupAudioFormat:(AudioStreamBasicDescription*)format
	{
	format->mSampleRate = WHISPER_SAMPLE_RATE;
	format->mFormatID = kAudioFormatLinearPCM;
	format->mFramesPerPacket = 1;
	format->mChannelsPerFrame = 1;
	format->mBytesPerFrame = 2;
	format->mBytesPerPacket = 2;
	format->mBitsPerChannel = 16;
	format->mReserved = 0;
	format->mFormatFlags = kLinearPCMFormatFlagIsSignedInteger;
	}

	- (void)viewDidLoad {
	[super viewDidLoad];

	// whisper.cpp initialization
	{
	// load the model
	NSString *modelPath = [[NSBundle mainBundle] pathForResource:@"ggml-base.en" ofType:@"bin"];

	// check if the model exists
	if (![[NSFileManager defaultManager] fileExistsAtPath:modelPath]) {
	NSLog(@"Model file not found");
	return;
	}

	NSLog(@"Loading model from %@", modelPath);

	// create ggml context

	struct whisper_context_params cparams = whisper_context_default_params();
	#if TARGET_OS_SIMULATOR
	cparams.use_gpu = false;
	NSLog(@"Running on simulator, using CPU");
	#endif
	stateInp.ctx = whisper_init_from_file_with_params([modelPath UTF8String], cparams);

	// check if the model was loaded successfully
	if (stateInp.ctx == NULL) {
	NSLog(@"Failed to load model");
	return;
	}
	}

	// initialize audio format and buffers
	{
	[self setupAudioFormat:&stateInp.dataFormat];

	stateInp.n_samples = 0;
	stateInp.audioBufferI16 = malloc(MAX_AUDIO_SECSAMPLE_RATEsizeof(int16_t));
	stateInp.audioBufferF32 = malloc(MAX_AUDIO_SECSAMPLE_RATEsizeof(float));
	}

	stateInp.isTranscribing = false;
	stateInp.isRealtime = false;
	}

	-(IBAction) stopCapturing {
	NSLog(@"Stop capturing");

	_labelStatusInp.text = @"Status: Idle";

	[_buttonToggleCapture setTitle:@"Start capturing" forState:UIControlStateNormal];
	[_buttonToggleCapture setBackgroundColor:[UIColor grayColor]];

	stateInp.isCapturing = false;

	AudioQueueStop(stateInp.queue, true);
	for (int i = 0; i < NUM_BUFFERS; i++) {
	AudioQueueFreeBuffer(stateInp.queue, stateInp.buffers[i]);
	}

	AudioQueueDispose(stateInp.queue, true);
	}

	- (IBAction)toggleCapture:(id)sender {
	if (stateInp.isCapturing) {
	// stop capturing
	[self stopCapturing];

	return;
	}

	// initiate audio capturing
	NSLog(@"Start capturing");

	stateInp.n_samples = 0;
	stateInp.vc = (__bridge void *)(self);

	OSStatus status = AudioQueueNewInput(&stateInp.dataFormat,
	AudioInputCallback,
	&stateInp,
	CFRunLoopGetCurrent(),
	kCFRunLoopCommonModes,
	0,
	&stateInp.queue);

	if (status == 0) {
	for (int i = 0; i < NUM_BUFFERS; i++) {
	AudioQueueAllocateBuffer(stateInp.queue, NUM_BYTES_PER_BUFFER, &stateInp.buffers[i]);
	AudioQueueEnqueueBuffer (stateInp.queue, stateInp.buffers[i], 0, NULL);
	}

	stateInp.isCapturing = true;
	status = AudioQueueStart(stateInp.queue, NULL);
	if (status == 0) {
	_labelStatusInp.text = @"Status: Capturing";
	[sender setTitle:@"Stop Capturing" forState:UIControlStateNormal];
	[_buttonToggleCapture setBackgroundColor:[UIColor redColor]];
	}
	}

	if (status != 0) {
	[self stopCapturing];
	}
	}

	- (IBAction)onTranscribePrepare:(id)sender {
	_textviewResult.text = @"Processing - please wait ...";

	if (stateInp.isRealtime) {
	[self onRealtime:(id)sender];
	}

	if (stateInp.isCapturing) {
	[self stopCapturing];
	}
	}

	- (IBAction)onRealtime:(id)sender {
	stateInp.isRealtime = !stateInp.isRealtime;

	if (stateInp.isRealtime) {
	[_buttonRealtime setBackgroundColor:[UIColor greenColor]];
	} else {
	[_buttonRealtime setBackgroundColor:[UIColor grayColor]];
	}

	NSLog(@"Realtime: %@", stateInp.isRealtime ? @"ON" : @"OFF");
	}

	- (IBAction)onTranscribe:(id)sender {
	if (stateInp.isTranscribing) {
	return;
	}

	NSLog(@"Processing %d samples", stateInp.n_samples);

	stateInp.isTranscribing = true;

	// dispatch the model to a background thread
	dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
	// process captured audio
	// convert I16 to F32
	for (int i = 0; i < self->stateInp.n_samples; i++) {
	self->stateInp.audioBufferF32[i] = (float)self->stateInp.audioBufferI16[i] / 32768.0f;
	}

	// run the model
	struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);

	// get maximum number of threads on this device (max 8)
	const int max_threads = MIN(8, (int)[[NSProcessInfo processInfo] processorCount]);

	params.print_realtime = true;
	params.print_progress = false;
	params.print_timestamps = true;
	params.print_special = false;
	params.translate = false;
	params.language = "en";
	params.n_threads = max_threads;
	params.offset_ms = 0;
	params.no_context = true;
	params.single_segment = self->stateInp.isRealtime;
	params.no_timestamps = params.single_segment;

	CFTimeInterval startTime = CACurrentMediaTime();

	whisper_reset_timings(self->stateInp.ctx);

	if (whisper_full(self->stateInp.ctx, params, self->stateInp.audioBufferF32, self->stateInp.n_samples) != 0) {
	NSLog(@"Failed to run the model");
	self->_textviewResult.text = @"Failed to run the model";

	return;
	}

	whisper_print_timings(self->stateInp.ctx);

	CFTimeInterval endTime = CACurrentMediaTime();

	NSLog(@"\nProcessing time: %5.3f, on %d threads", endTime - startTime, params.n_threads);

	// result text
	NSString *result = @"";

	int n_segments = whisper_full_n_segments(self->stateInp.ctx);
	for (int i = 0; i < n_segments; i++) {
	const char * text_cur = whisper_full_get_segment_text(self->stateInp.ctx, i);

	// append the text to the result
	result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
	}

	const float tRecording = (float)self->stateInp.n_samples / (float)self->stateInp.dataFormat.mSampleRate;

	// append processing time
	result = [result stringByAppendingString:[NSString stringWithFormat:@"\n\n[recording time: %5.3f s]", tRecording]];
	result = [result stringByAppendingString:[NSString stringWithFormat:@" \n[processing time: %5.3f s]", endTime - startTime]];

	// dispatch the result to the main thread
	dispatch_async(dispatch_get_main_queue(), ^{
	self->_textviewResult.text = result;
	self->stateInp.isTranscribing = false;
	});
	});
	}

	//
	// Callback implementation
	//

	void AudioInputCallback(void * inUserData,
	AudioQueueRef inAQ,
	AudioQueueBufferRef inBuffer,
	const AudioTimeStamp * inStartTime,
	UInt32 inNumberPacketDescriptions,
	const AudioStreamPacketDescription * inPacketDescs)
	{
	StateInp * stateInp = (StateInp*)inUserData;

	if (!stateInp->isCapturing) {
	NSLog(@"Not capturing, ignoring audio");
	return;
	}

	const int n = inBuffer->mAudioDataByteSize / 2;

	NSLog(@"Captured %d new samples", n);

	if (stateInp->n_samples + n > MAX_AUDIO_SEC*SAMPLE_RATE) {
	NSLog(@"Too much audio data, ignoring");

	dispatch_async(dispatch_get_main_queue(), ^{
	ViewController * vc = (__bridge ViewController *)(stateInp->vc);
	[vc stopCapturing];
	});

	return;
	}

	for (int i = 0; i < n; i++) {
	stateInp->audioBufferI16[stateInp->n_samples + i] = ((short*)inBuffer->mAudioData)[i];
	}

	stateInp->n_samples += n;

	// put the buffer back in the queue
	AudioQueueEnqueueBuffer(stateInp->queue, inBuffer, 0, NULL);

	if (stateInp->isRealtime) {
	// dipatch onTranscribe() to the main thread
	dispatch_async(dispatch_get_main_queue(), ^{
	ViewController * vc = (__bridge ViewController *)(stateInp->vc);
	[vc onTranscribe:nil];
	});
	}
	}

	@end