Back to Projects
nlp
activeLocal Audio Transcription Pipeline — Whisper + CUDA
Local pipeline using openai-whisper with CUDA (RTX 3000) for legal-context audio transcription. Includes faster-whisper alternative for speed.
PythonWhisperCUDANLP
Architecture
Audio Input
→Preprocessing
→Model Inference
→Post-process
Code Snippet
import whisper
import os
from pathlib import Path
from datetime import datetime
# Set FFmpeg path — update to match your local installation
FFMPEG_BIN = r"C:\ffmpeg\bin"
os.environ["PATH"] = FFMPEG_BIN + os.pathsep + os.environ.get("PATH", "")
AUDIO_PATH = r"path\to\your\audio_file.m4a"
MODEL_SIZE = "small" # tiny | base | small | medium | large
OUTPUT_FOLDER = "Transcriptions"
LANGUAGE = None # None = auto-detect, or set e.g. "en", "es"
def transcribe_audio():
file = Path(AUDIO_PATH)
if not file.exists():
print(f"File not found: {file}")
return
print(f"Model: whisper-{MODEL_SIZE}")
model = whisper.load_model(MODEL_SIZE)
result = model.transcribe(
str(file),
language=LANGUAGE,
fp16=False,
)
text = result["text"].strip()
output_dir = file.parent / OUTPUT_FOLDER
output_dir.mkdir(exist_ok=True)
stem = file.stem.replace(" ", "_").replace(".", "_")
out_path = output_dir / f"transcription_{stem}.txt"
with open(out_path, "w", encoding="utf-8") as f:
f.write(f"Source file : {file.name}\n")
f.write(f"Model : whisper-{MODEL_SIZE}\n")
f.write(f"Language : {'auto-detect' if LANGUAGE is None else LANGUAGE}\n")
f.write("=" * 60 + "\n\n")
f.write(text + "\n")
print(f"Transcription saved to: {out_path}")
if __name__ == "__main__":
transcribe_audio()Detailed write-up, screenshots, and metrics coming in Phase 4.