Skip to content

Commit 95ce6be

Browse files
nchopraharitabh-z01
authored andcommitted
chore: decouple audio file logging + transcription svc
1 parent c71241d commit 95ce6be

File tree

3 files changed

+96
-78
lines changed

3 files changed

+96
-78
lines changed

apps/desktop/src/main/managers/recording-manager.ts

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
import { ipcMain } from "electron";
1+
import { ipcMain, app } from "electron";
22
import { EventEmitter } from "node:events";
33
import { logger, logPerformance } from "../logger";
44
import { ServiceManager } from "./service-manager";
55
import type { RecordingState } from "../../types/recording";
66
import { Mutex } from "async-mutex";
77
import type { ShortcutManager } from "../services/shortcut-manager";
8+
import { StreamingWavWriter } from "../../utils/streaming-wav-writer";
9+
import * as fs from "node:fs";
10+
import * as path from "node:path";
811

912
export type RecordingMode = "idle" | "ptt" | "hands-free";
1013

@@ -17,6 +20,10 @@ export class RecordingManager extends EventEmitter {
1720
private recordingState: RecordingState = "idle";
1821
private recordingMutex = new Mutex();
1922
private recordingMode: RecordingMode = "idle";
23+
private currentAudioRecording: {
24+
audioFilePath: string;
25+
wavWriter: StreamingWavWriter;
26+
} | null = null;
2027

2128
constructor(private serviceManager: ServiceManager) {
2229
super();
@@ -87,6 +94,26 @@ export class RecordingManager extends EventEmitter {
8794
this.emit("mode-changed", this.getRecordingMode());
8895
}
8996

97+
/**
98+
* Create audio file for recording session
99+
*/
100+
private async createAudioFile(sessionId: string): Promise<string> {
101+
// Create audio directory in app temp path
102+
const audioDir = path.join(app.getPath("temp"), "amical-audio");
103+
await fs.promises.mkdir(audioDir, { recursive: true });
104+
105+
// Create file path
106+
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
107+
const filePath = path.join(audioDir, `audio-${sessionId}-${timestamp}.wav`);
108+
109+
logger.audio.info("Created audio file for session", {
110+
sessionId,
111+
filePath,
112+
});
113+
114+
return filePath;
115+
}
116+
90117
private setupIPCHandlers(): void {
91118
// Handle audio data chunks from renderer
92119
ipcMain.handle(
@@ -148,21 +175,28 @@ export class RecordingManager extends EventEmitter {
148175
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
149176
this.currentSessionId = `session-${timestamp}`;
150177

178+
// Create audio file and WAV writer
179+
const audioFilePath = await this.createAudioFile(this.currentSessionId);
180+
this.currentAudioRecording = {
181+
audioFilePath,
182+
wavWriter: new StreamingWavWriter(audioFilePath),
183+
};
184+
185+
logger.audio.info("Audio recording initialized", {
186+
sessionId: this.currentSessionId,
187+
audioFilePath,
188+
});
189+
151190
// Mute system audio
152191
try {
153192
const swiftBridge = this.serviceManager.getService("swiftIOBridge");
154193
if (swiftBridge) {
155-
await swiftBridge.call("muteSystemAudio", {});
194+
//await swiftBridge.call("muteSystemAudio", {});
156195
}
157196
} catch (error) {
158197
logger.main.warn("Swift bridge not available for audio muting");
159198
}
160199

161-
// Refresh accessibility context - fire and forget
162-
// appContextStore.refreshAccessibilityData();
163-
164-
// TODO: Preload models if needed (Phase 2)
165-
166200
this.setState("recording");
167201
logger.audio.info("Recording started successfully", {
168202
sessionId: this.currentSessionId,
@@ -255,8 +289,10 @@ export class RecordingManager extends EventEmitter {
255289
}
256290

257291
// Session should already exist from startRecording
258-
if (!this.currentSessionId) {
259-
logger.audio.error("No session ID found while handling audio chunk");
292+
if (!this.currentSessionId || !this.currentAudioRecording) {
293+
logger.audio.error(
294+
"No session ID or audio recording found while handling audio chunk",
295+
);
260296
return;
261297
}
262298

@@ -266,6 +302,8 @@ export class RecordingManager extends EventEmitter {
266302
return;
267303
}
268304

305+
await this.currentAudioRecording.wavWriter.appendAudio(chunk);
306+
269307
try {
270308
const transcriptionService = this.serviceManager.getService(
271309
"transcriptionService",
@@ -275,12 +313,13 @@ export class RecordingManager extends EventEmitter {
275313
}
276314
const startTime = Date.now();
277315

278-
// Process the chunk - pass isFinal flag
316+
// Process the chunk - pass isFinal flag and audio file path
279317
const transcriptionResult =
280318
await transcriptionService.processStreamingChunk({
281319
sessionId: this.currentSessionId,
282320
audioChunk: chunk,
283321
isFinal: isFinalChunk,
322+
audioFilePath: this.currentAudioRecording.audioFilePath,
284323
});
285324

286325
logger.audio.debug("Processed audio chunk", {
@@ -292,6 +331,14 @@ export class RecordingManager extends EventEmitter {
292331

293332
// If this was the final chunk, handle completion
294333
if (isFinalChunk) {
334+
// Finalize the WAV file
335+
await this.currentAudioRecording.wavWriter.finalize();
336+
logger.audio.info("Finalized WAV file", {
337+
sessionId: this.currentSessionId,
338+
filePath: this.currentAudioRecording.audioFilePath,
339+
dataSize: this.currentAudioRecording.wavWriter.getDataSize(),
340+
});
341+
295342
logPerformance("streaming transcription complete", startTime, {
296343
sessionId: this.currentSessionId,
297344
resultLength: transcriptionResult?.length || 0,
@@ -308,8 +355,9 @@ export class RecordingManager extends EventEmitter {
308355
await this.pasteTranscription(transcriptionResult);
309356
}
310357

311-
// Clean up session
358+
// Clean up session and audio recording
312359
this.currentSessionId = null;
360+
this.currentAudioRecording = null;
313361

314362
// Ensure state is idle after completion
315363
if (this.recordingState === "stopping") {
@@ -320,8 +368,9 @@ export class RecordingManager extends EventEmitter {
320368
logger.audio.error("Error processing audio chunk:", error);
321369

322370
if (isFinalChunk) {
323-
// Clean up session on error
371+
// Clean up session and audio recording on error
324372
this.currentSessionId = null;
373+
this.currentAudioRecording = null;
325374
this.setState("error");
326375
}
327376
}
@@ -363,8 +412,9 @@ export class RecordingManager extends EventEmitter {
363412
await this.stopRecording();
364413
}
365414

366-
// Clear any active session
415+
// Clear any active session and audio recording
367416
this.currentSessionId = null;
417+
this.currentAudioRecording = null;
368418
this.setState("idle");
369419
}
370420
}

apps/desktop/src/services/transcription-service.ts

Lines changed: 29 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,16 @@ import { createTranscription } from "../db/transcriptions";
1313
import { logger } from "../main/logger";
1414
import { v4 as uuid } from "uuid";
1515
import { VADService } from "./vad-service";
16-
import { app } from "electron";
17-
import * as fs from "node:fs";
18-
import * as path from "node:path";
19-
20-
import { StreamingWavWriter } from "../utils/streaming-wav-writer";
2116
import { Mutex } from "async-mutex";
2217

2318
/**
2419
* Service for audio transcription and optional formatting
2520
*/
26-
interface ExtendedStreamingSession extends StreamingSession {
27-
wavWriter?: StreamingWavWriter;
28-
audioFilePath?: string;
29-
}
30-
3121
export class TranscriptionService {
3222
private whisperProvider: WhisperProvider;
3323
private openRouterProvider: OpenRouterProvider | null = null;
3424
private formatterEnabled = false;
35-
private streamingSessions: Map<string, ExtendedStreamingSession> = new Map();
25+
private streamingSessions: Map<string, StreamingSession> = new Map();
3626
private vadService: VADService | null;
3727
private settingsService: SettingsService;
3828
private vadMutex: Mutex;
@@ -143,35 +133,16 @@ export class TranscriptionService {
143133
}
144134
}
145135

146-
/**
147-
* Create audio file for recording session
148-
*/
149-
private async createAudioFile(sessionId: string): Promise<string> {
150-
// Create audio directory in app temp path
151-
const audioDir = path.join(app.getPath("temp"), "amical-audio");
152-
await fs.promises.mkdir(audioDir, { recursive: true });
153-
154-
// Create file path
155-
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
156-
const filePath = path.join(audioDir, `audio-${sessionId}-${timestamp}.wav`);
157-
158-
logger.transcription.info("Created audio file for session", {
159-
sessionId,
160-
filePath,
161-
});
162-
163-
return filePath;
164-
}
165-
166136
/**
167137
* Process a single audio chunk in streaming mode
168138
*/
169139
async processStreamingChunk(options: {
170140
sessionId: string;
171141
audioChunk: Float32Array;
172142
isFinal?: boolean;
143+
audioFilePath?: string;
173144
}): Promise<string> {
174-
const { sessionId, audioChunk, isFinal = false } = options;
145+
const { sessionId, audioChunk, isFinal = false, audioFilePath } = options;
175146

176147
// Run VAD on the audio chunk
177148
let speechProbability = 0;
@@ -198,6 +169,7 @@ export class TranscriptionService {
198169

199170
// Acquire transcription mutex
200171
await this.transcriptionMutex.acquire();
172+
201173
// Auto-create session if it doesn't exist
202174
let session = this.streamingSessions.get(sessionId);
203175
if (!session) {
@@ -214,31 +186,18 @@ export class TranscriptionService {
214186
streamingContext.sharedData.accessibilityContext =
215187
appContextStore.getAccessibilityContext();
216188

217-
// Create audio file for this session
218-
const audioFilePath = await this.createAudioFile(sessionId);
219-
220-
// Create streaming WAV writer
221-
const wavWriter = new StreamingWavWriter(audioFilePath);
222-
223189
session = {
224190
context: streamingContext,
225191
transcriptionResults: [],
226-
audioFilePath,
227-
wavWriter,
228192
};
229193

230194
this.streamingSessions.set(sessionId, session);
195+
231196
logger.transcription.info("Started streaming session", {
232197
sessionId,
233-
audioFilePath,
234198
});
235199
}
236200

237-
// Write audio chunk to WAV file immediately
238-
if (audioChunk.length > 0 && session.wavWriter) {
239-
await session.wavWriter.appendAudio(audioChunk);
240-
}
241-
242201
// Process chunk if it has content
243202
if (audioChunk.length > 0) {
244203
// Direct frame to Whisper - it will handle aggregation and VAD internally
@@ -285,7 +244,7 @@ export class TranscriptionService {
285244

286245
// Release transcription mutex
287246
this.transcriptionMutex.release();
288-
let completeTranscriptionTillNow = session.transcriptionResults
247+
const completeTranscriptionTillNow = session.transcriptionResults
289248
.join(" ")
290249
.trim();
291250

@@ -302,13 +261,11 @@ export class TranscriptionService {
302261
chunkCount: session.transcriptionResults.length,
303262
});
304263

305-
// Format if enabled (currently disabled with && false)
306-
// Commenting out to fix TypeScript errors since this code path is never executed
307-
/*
308-
if (this.formatterEnabled && this.openRouterProvider && false) {
264+
if (this.formatterEnabled && this.openRouterProvider) {
265+
try {
309266
const style =
310267
session.context.sharedData.userPreferences?.formattingStyle;
311-
completeTranscription = await this.openRouterProvider.format({
268+
const formattedText = await this.openRouterProvider.format({
312269
text: completeTranscription,
313270
context: {
314271
style,
@@ -324,24 +281,31 @@ export class TranscriptionService {
324281
aggregatedTranscription: completeTranscription,
325282
},
326283
});
327-
}
328-
*/
329284

330-
// Finalize the WAV file
331-
if (session.wavWriter) {
332-
await session.wavWriter.finalize();
333-
logger.transcription.info("Finalized WAV file", {
334-
sessionId,
335-
filePath: session.audioFilePath,
336-
dataSize: session.wavWriter.getDataSize(),
337-
});
285+
logger.transcription.info("Text formatted successfully", {
286+
sessionId,
287+
originalLength: completeTranscription.length,
288+
formattedLength: formattedText.length,
289+
});
290+
291+
completeTranscription = formattedText;
292+
} catch (error) {
293+
logger.transcription.error(
294+
"Formatting failed, using unformatted text",
295+
{
296+
sessionId,
297+
error,
298+
},
299+
);
300+
// Continue with unformatted text
301+
}
338302
}
339303

340304
// Save directly to database
341305
logger.transcription.info("Saving transcription with audio file", {
342306
sessionId,
343-
audioFilePath: session.audioFilePath,
344-
hasAudioFile: !!session.audioFilePath,
307+
audioFilePath,
308+
hasAudioFile: !!audioFilePath,
345309
});
346310

347311
await createTranscription({
@@ -350,7 +314,7 @@ export class TranscriptionService {
350314
duration: session.context.sharedData.audioMetadata?.duration,
351315
speechModel: "whisper-local",
352316
formattingModel: this.formatterEnabled ? "openrouter" : undefined,
353-
audioFile: session.audioFilePath,
317+
audioFile: audioFilePath,
354318
meta: {
355319
sessionId,
356320
source: session.context.sharedData.audioMetadata?.source,

apps/desktop/src/utils/streaming-wav-writer.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ export class StreamingWavWriter {
6666
* @param audioData Float32Array of audio samples
6767
*/
6868
async appendAudio(audioData: Float32Array): Promise<void> {
69+
if (!audioData.length) {
70+
return;
71+
}
72+
6973
if (this.isFinalized) {
7074
throw new Error("Cannot append to finalized WAV file");
7175
}

0 commit comments

Comments
 (0)