chore: decouple audio file logging + transcription svc

nchopra · haritabh-z01 · commit 95ce6bea36ff · 2025-07-12T10:06:17.000+05:30
diff --git a/apps/desktop/src/main/managers/recording-manager.ts b/apps/desktop/src/main/managers/recording-manager.ts
@@ -1,10 +1,13 @@
-import { ipcMain } from "electron";
+import { ipcMain, app } from "electron";
 import { EventEmitter } from "node:events";
 import { logger, logPerformance } from "../logger";
 import { ServiceManager } from "./service-manager";
 import type { RecordingState } from "../../types/recording";
 import { Mutex } from "async-mutex";
 import type { ShortcutManager } from "../services/shortcut-manager";
+import { StreamingWavWriter } from "../../utils/streaming-wav-writer";
+import * as fs from "node:fs";
+import * as path from "node:path";
 
 export type RecordingMode = "idle" | "ptt" | "hands-free";
 
@@ -17,6 +20,10 @@ export class RecordingManager extends EventEmitter {
   private recordingState: RecordingState = "idle";
   private recordingMutex = new Mutex();
   private recordingMode: RecordingMode = "idle";
+  private currentAudioRecording: {
+    audioFilePath: string;
+    wavWriter: StreamingWavWriter;
+  } | null = null;
 
   constructor(private serviceManager: ServiceManager) {
     super();
@@ -87,6 +94,26 @@ export class RecordingManager extends EventEmitter {
     this.emit("mode-changed", this.getRecordingMode());
   }
 
+  /**
+   * Create audio file for recording session
+   */
+  private async createAudioFile(sessionId: string): Promise<string> {
+    // Create audio directory in app temp path
+    const audioDir = path.join(app.getPath("temp"), "amical-audio");
+    await fs.promises.mkdir(audioDir, { recursive: true });
+
+    // Create file path
+    const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
+    const filePath = path.join(audioDir, `audio-${sessionId}-${timestamp}.wav`);
+
+    logger.audio.info("Created audio file for session", {
+      sessionId,
+      filePath,
+    });
+
+    return filePath;
+  }
+
   private setupIPCHandlers(): void {
     // Handle audio data chunks from renderer
     ipcMain.handle(
@@ -148,21 +175,28 @@ export class RecordingManager extends EventEmitter {
       const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
       this.currentSessionId = `session-${timestamp}`;
 
+      // Create audio file and WAV writer
+      const audioFilePath = await this.createAudioFile(this.currentSessionId);
+      this.currentAudioRecording = {
+        audioFilePath,
+        wavWriter: new StreamingWavWriter(audioFilePath),
+      };
+
+      logger.audio.info("Audio recording initialized", {
+        sessionId: this.currentSessionId,
+        audioFilePath,
+      });
+
       // Mute system audio
       try {
         const swiftBridge = this.serviceManager.getService("swiftIOBridge");
         if (swiftBridge) {
-          await swiftBridge.call("muteSystemAudio", {});
+          //await swiftBridge.call("muteSystemAudio", {});
         }
       } catch (error) {
         logger.main.warn("Swift bridge not available for audio muting");
       }
 
-      // Refresh accessibility context - fire and forget
-      // appContextStore.refreshAccessibilityData();
-
-      // TODO: Preload models if needed (Phase 2)
-
       this.setState("recording");
       logger.audio.info("Recording started successfully", {
         sessionId: this.currentSessionId,
@@ -255,8 +289,10 @@ export class RecordingManager extends EventEmitter {
     }
 
     // Session should already exist from startRecording
-    if (!this.currentSessionId) {
-      logger.audio.error("No session ID found while handling audio chunk");
+    if (!this.currentSessionId || !this.currentAudioRecording) {
+      logger.audio.error(
+        "No session ID or audio recording found while handling audio chunk",
+      );
       return;
     }
 
@@ -266,6 +302,8 @@ export class RecordingManager extends EventEmitter {
       return;
     }
 
+    await this.currentAudioRecording.wavWriter.appendAudio(chunk);
+
     try {
       const transcriptionService = this.serviceManager.getService(
         "transcriptionService",
@@ -275,12 +313,13 @@ export class RecordingManager extends EventEmitter {
       }
       const startTime = Date.now();
 
-      // Process the chunk - pass isFinal flag
+      // Process the chunk - pass isFinal flag and audio file path
       const transcriptionResult =
         await transcriptionService.processStreamingChunk({
           sessionId: this.currentSessionId,
           audioChunk: chunk,
           isFinal: isFinalChunk,
+          audioFilePath: this.currentAudioRecording.audioFilePath,
         });
 
       logger.audio.debug("Processed audio chunk", {
@@ -292,6 +331,14 @@ export class RecordingManager extends EventEmitter {
 
       // If this was the final chunk, handle completion
       if (isFinalChunk) {
+        // Finalize the WAV file
+        await this.currentAudioRecording.wavWriter.finalize();
+        logger.audio.info("Finalized WAV file", {
+          sessionId: this.currentSessionId,
+          filePath: this.currentAudioRecording.audioFilePath,
+          dataSize: this.currentAudioRecording.wavWriter.getDataSize(),
+        });
+
         logPerformance("streaming transcription complete", startTime, {
           sessionId: this.currentSessionId,
           resultLength: transcriptionResult?.length || 0,
@@ -308,8 +355,9 @@ export class RecordingManager extends EventEmitter {
           await this.pasteTranscription(transcriptionResult);
         }
 
-        // Clean up session
+        // Clean up session and audio recording
         this.currentSessionId = null;
+        this.currentAudioRecording = null;
 
         // Ensure state is idle after completion
         if (this.recordingState === "stopping") {
@@ -320,8 +368,9 @@ export class RecordingManager extends EventEmitter {
       logger.audio.error("Error processing audio chunk:", error);
 
       if (isFinalChunk) {
-        // Clean up session on error
+        // Clean up session and audio recording on error
         this.currentSessionId = null;
+        this.currentAudioRecording = null;
         this.setState("error");
       }
     }
@@ -363,8 +412,9 @@ export class RecordingManager extends EventEmitter {
       await this.stopRecording();
     }
 
-    // Clear any active session
+    // Clear any active session and audio recording
     this.currentSessionId = null;
+    this.currentAudioRecording = null;
     this.setState("idle");
   }
 }
diff --git a/apps/desktop/src/services/transcription-service.ts b/apps/desktop/src/services/transcription-service.ts
@@ -13,26 +13,16 @@ import { createTranscription } from "../db/transcriptions";
 import { logger } from "../main/logger";
 import { v4 as uuid } from "uuid";
 import { VADService } from "./vad-service";
-import { app } from "electron";
-import * as fs from "node:fs";
-import * as path from "node:path";
-
-import { StreamingWavWriter } from "../utils/streaming-wav-writer";
 import { Mutex } from "async-mutex";
 
 /**
  * Service for audio transcription and optional formatting
  */
-interface ExtendedStreamingSession extends StreamingSession {
-  wavWriter?: StreamingWavWriter;
-  audioFilePath?: string;
-}
-
 export class TranscriptionService {
   private whisperProvider: WhisperProvider;
   private openRouterProvider: OpenRouterProvider | null = null;
   private formatterEnabled = false;
-  private streamingSessions: Map<string, ExtendedStreamingSession> = new Map();
+  private streamingSessions: Map<string, StreamingSession> = new Map();
   private vadService: VADService | null;
   private settingsService: SettingsService;
   private vadMutex: Mutex;
@@ -143,35 +133,16 @@ export class TranscriptionService {
     }
   }
 
-  /**
-   * Create audio file for recording session
-   */
-  private async createAudioFile(sessionId: string): Promise<string> {
-    // Create audio directory in app temp path
-    const audioDir = path.join(app.getPath("temp"), "amical-audio");
-    await fs.promises.mkdir(audioDir, { recursive: true });
-
-    // Create file path
-    const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
-    const filePath = path.join(audioDir, `audio-${sessionId}-${timestamp}.wav`);
-
-    logger.transcription.info("Created audio file for session", {
-      sessionId,
-      filePath,
-    });
-
-    return filePath;
-  }
-
   /**
    * Process a single audio chunk in streaming mode
    */
   async processStreamingChunk(options: {
     sessionId: string;
     audioChunk: Float32Array;
     isFinal?: boolean;
+    audioFilePath?: string;
   }): Promise<string> {
-    const { sessionId, audioChunk, isFinal = false } = options;
+    const { sessionId, audioChunk, isFinal = false, audioFilePath } = options;
 
     // Run VAD on the audio chunk
     let speechProbability = 0;
@@ -198,6 +169,7 @@ export class TranscriptionService {
 
     // Acquire transcription mutex
     await this.transcriptionMutex.acquire();
+
     // Auto-create session if it doesn't exist
     let session = this.streamingSessions.get(sessionId);
     if (!session) {
@@ -214,31 +186,18 @@ export class TranscriptionService {
       streamingContext.sharedData.accessibilityContext =
         appContextStore.getAccessibilityContext();
 
-      // Create audio file for this session
-      const audioFilePath = await this.createAudioFile(sessionId);
-
-      // Create streaming WAV writer
-      const wavWriter = new StreamingWavWriter(audioFilePath);
-
       session = {
         context: streamingContext,
         transcriptionResults: [],
-        audioFilePath,
-        wavWriter,
       };
 
       this.streamingSessions.set(sessionId, session);
+
       logger.transcription.info("Started streaming session", {
         sessionId,
-        audioFilePath,
       });
     }
 
-    // Write audio chunk to WAV file immediately
-    if (audioChunk.length > 0 && session.wavWriter) {
-      await session.wavWriter.appendAudio(audioChunk);
-    }
-
     // Process chunk if it has content
     if (audioChunk.length > 0) {
       // Direct frame to Whisper - it will handle aggregation and VAD internally
@@ -285,7 +244,7 @@ export class TranscriptionService {
 
     // Release transcription mutex
     this.transcriptionMutex.release();
-    let completeTranscriptionTillNow = session.transcriptionResults
+    const completeTranscriptionTillNow = session.transcriptionResults
       .join(" ")
       .trim();
 
@@ -302,13 +261,11 @@ export class TranscriptionService {
       chunkCount: session.transcriptionResults.length,
     });
 
-    // Format if enabled (currently disabled with && false)
-    // Commenting out to fix TypeScript errors since this code path is never executed
-    /*
-      if (this.formatterEnabled && this.openRouterProvider && false) {
+    if (this.formatterEnabled && this.openRouterProvider) {
+      try {
         const style =
           session.context.sharedData.userPreferences?.formattingStyle;
-        completeTranscription = await this.openRouterProvider.format({
+        const formattedText = await this.openRouterProvider.format({
           text: completeTranscription,
           context: {
             style,
@@ -324,24 +281,31 @@ export class TranscriptionService {
             aggregatedTranscription: completeTranscription,
           },
         });
-      }
-      */
 
-    // Finalize the WAV file
-    if (session.wavWriter) {
-      await session.wavWriter.finalize();
-      logger.transcription.info("Finalized WAV file", {
-        sessionId,
-        filePath: session.audioFilePath,
-        dataSize: session.wavWriter.getDataSize(),
-      });
+        logger.transcription.info("Text formatted successfully", {
+          sessionId,
+          originalLength: completeTranscription.length,
+          formattedLength: formattedText.length,
+        });
+
+        completeTranscription = formattedText;
+      } catch (error) {
+        logger.transcription.error(
+          "Formatting failed, using unformatted text",
+          {
+            sessionId,
+            error,
+          },
+        );
+        // Continue with unformatted text
+      }
     }
 
     // Save directly to database
     logger.transcription.info("Saving transcription with audio file", {
       sessionId,
-      audioFilePath: session.audioFilePath,
-      hasAudioFile: !!session.audioFilePath,
+      audioFilePath,
+      hasAudioFile: !!audioFilePath,
     });
 
     await createTranscription({
@@ -350,7 +314,7 @@ export class TranscriptionService {
       duration: session.context.sharedData.audioMetadata?.duration,
       speechModel: "whisper-local",
       formattingModel: this.formatterEnabled ? "openrouter" : undefined,
-      audioFile: session.audioFilePath,
+      audioFile: audioFilePath,
       meta: {
         sessionId,
         source: session.context.sharedData.audioMetadata?.source,
diff --git a/apps/desktop/src/utils/streaming-wav-writer.ts b/apps/desktop/src/utils/streaming-wav-writer.ts
@@ -66,6 +66,10 @@ export class StreamingWavWriter {
    * @param audioData Float32Array of audio samples
    */
   async appendAudio(audioData: Float32Array): Promise<void> {
+    if (!audioData.length) {
+      return;
+    }
+
     if (this.isFinalized) {
       throw new Error("Cannot append to finalized WAV file");
     }