Predictive-Code-Repo/whisper_test.py at main · LouisBrammer/Predictive-Code-Repo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#before running , put the following in the terminal:
#source whisper_env/bin/activate

import whisper
import sounddevice as sd
import numpy as np
import scipy.io.wavfile as wav
import os
import time
import signal
import pandas as pd
from datetime import datetime
import glob

import sys

# Set the microphone device index here
MICROPHONE_DEVICE_INDEX = 0  # 0 = MacBook Air-Mikrofon

# Configuration
MAX_RECORDINGS = 50  # Maximum number of recordings before stopping
CLEANUP_OLD_FILES = True  # Whether to delete old audio files
AUDIO_FOLDER = "backup/audio_files"

# Print available audio devices
print("Available audio devices:")
print(sd.query_devices())

def cleanup_old_files(folder=AUDIO_FOLDER, keep_last=5):
    """Clean up old audio files, keeping only the most recent ones."""
    if not os.path.exists(folder):
        return

    # Get all wav files
    wav_files = glob.glob(os.path.join(folder, "*.wav"))
    # Sort by modification time (newest first)
    wav_files.sort(key=os.path.getmtime, reverse=True)

    # Delete old files
    for old_file in wav_files[keep_last:]:
        try:
            os.remove(old_file)
            print(f"Cleaned up old file: {old_file}")
        except Exception as e:
            print(f"Error cleaning up {old_file}: {e}")

def signal_handler(sig, frame):
    print("\nStopping the recording process...")
    sys.exit(0)

def record_audio(duration=10, sample_rate=16000):
    """Record audio for a specified duration."""
    print(f"\nRecording for {duration} seconds...")
    print("Get ready to speak...")

    # Countdown
    for i in range(3, 0, -1):
        print(f"Starting in {i}...")
        time.sleep(1)

    print("Please speak into your microphone...")

    # Initialize recording array
    recording = np.zeros((int(duration * sample_rate), 1), dtype='float32')

    # Start recording
    with sd.InputStream(samplerate=sample_rate, channels=1, dtype='float32', device=MICROPHONE_DEVICE_INDEX) as stream:
        for i in range(0, int(duration * sample_rate), sample_rate):
            chunk, _ = stream.read(sample_rate)
            recording[i:i+sample_rate] = chunk

            # Calculate and print audio level
            level = np.abs(chunk).mean()
            print(f"Audio level: {level:.4f}", end='\r')

    print("\nRecording finished!")

    # Check if we got any audio
    avg_level = np.abs(recording).mean()
    if avg_level < 0.01:
        print("\n⚠️  WARNING: Very low audio levels detected!")
        print("Please try:")
        print("1. Speaking louder")
        print("2. Moving closer to the microphone")
        print("3. Checking your system's microphone settings")
        print(f"Current audio level: {avg_level:.4f}")

    return recording

def save_audio(recording, sample_rate=16000, folder=AUDIO_FOLDER):
    """Save the recording to a WAV file in the specified folder with a timestamped filename."""
    if not os.path.exists(folder):
        os.makedirs(folder)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = os.path.join(folder, f"recording_{timestamp}.wav")
    # Convert float32 to int16
    recording = (recording * 32767).astype(np.int16)
    wav.write(filename, sample_rate, recording)
    return filename

def transcribe_audio(model, audio_file):
    """Transcribe the audio file using Whisper."""
    print("Transcribing...")
    result = model.transcribe(audio_file)
    return result["text"]

def main():
    # Set up signal handler for Ctrl+C
    signal.signal(signal.SIGINT, signal_handler)

    # Load the Whisper model
    print("Loading Whisper model...")
    model = whisper.load_model("tiny")

    # Print selected audio device
    device_info = sd.query_devices(MICROPHONE_DEVICE_INDEX)
    print(f"\nUsing audio device: {device_info['name']}")
    print(f"Input channels: {device_info['max_input_channels']}")

    # DataFrame to store transcriptions
    df = pd.DataFrame(columns=["audio_file", "transcription"])

    try:
        for i in range(MAX_RECORDINGS):
            print(f"\n{'='*50}")
            print(f"Recording {i+1} of {MAX_RECORDINGS}")
            print(f"{'='*50}")

            try:
                # Record audio
                recording = record_audio(duration=10)  # Record for 10 seconds

                # Save the recording
                audio_file = save_audio(recording)
                print(f"Saved audio to: {audio_file}")

                # Transcribe the recording
                print("\nTranscribing...")
                transcription = transcribe_audio(model, audio_file)

                # Print the transcription
                print("\nTranscription:", transcription if transcription.strip() else "[No speech detected]")

                # Store in DataFrame
                df = pd.concat([df, pd.DataFrame({"audio_file": [audio_file], "transcription": [transcription]})], ignore_index=True)
                print(f"Added transcription to DataFrame. Current size: {len(df)}")

                # Clean up old files if enabled
                if CLEANUP_OLD_FILES:
                    cleanup_old_files(keep_last=MAX_RECORDINGS)

                # Wait a bit before next recording
                if i < MAX_RECORDINGS - 1:  # Don't wait after the last recording
                    print("\nWaiting 2 seconds before next recording...")
                    time.sleep(2)

            except Exception as e:
                print(f"\nError during recording {i+1}: {str(e)}")
                print("Continuing with next recording...")
                continue

    except KeyboardInterrupt:
        print("\nStopping the recording process...")
    finally:
        # Save DataFrame to CSV
        if not df.empty:
            csv_path = os.path.join(AUDIO_FOLDER, "transcriptions.csv")
            df.to_csv(csv_path, index=False)
            print(f"\nTranscriptions saved to {csv_path}")
            print(f"Total recordings processed: {len(df)}")
        else:
            print("\nNo recordings were processed.")
        sys.exit(0)

if __name__ == "__main__":
    main()