
Here’s a ready-to-run Python GUI that does Voice → DeepL → Voice:
SpeechRecognition)pyttsx3)It’s designed to “just work” and gracefully fall back when optional pieces aren’t installed.
pip install tkinterdnd2 requests python-dotenv pyttsx3 sounddevice scipy numpy SpeechRecognition pydub
tkinterships with most Python installs (on Windows/macOS).
Linux users: you may needsudo apt-get install python3-tk portaudio19-devfirst.
Local Whisper (offline/online models, very accurate):
pip install -U openai-whisper  # aka "whisper"
# You also need ffmpeg:
# Windows: install https://www.gyan.dev/ffmpeg/builds/ and add to PATH
# macOS: brew install ffmpeg
# Linux: sudo apt-get install ffmpeg
f Whisper isn’t available, the app falls back to Google Web Speech via
SpeechRecognition(no key, internet required, lighter accuracy/limits).
.env file next to the script:DEEPL_API_KEY=your_key_here
voice_deepl_voice.py)#!/usr/bin/env python3
"""
Voice → DeepL → Voice
- Record microphone audio
- Transcribe via Whisper (if available) or Google Web Speech as fallback
- Translate via DeepL API (Free or Pro)
- Speak translated text with pyttsx3 (offline)
Requirements:
  pip install requests python-dotenv pyttsx3 sounddevice scipy numpy SpeechRecognition pydub
Optional for better STT:
  pip install -U openai-whisper
  + ffmpeg installed & in PATH
Run:
  python voice_deepl_voice.py
"""
import os
import io
import time
import queue
import threading
from pathlib import Path
from dataclasses import dataclass
import numpy as np
import sounddevice as sd
from scipy.io.wavfile import write as wav_write
import requests
from tkinter import Tk, StringVar, BooleanVar, ttk, filedialog, messagebox
from dotenv import load_dotenv
import pyttsx3
# Optional imports (Whisper and SpeechRecognition)
HAS_WHISPER = False
try:
    import whisper  # openai-whisper
    HAS_WHISPER = True
except Exception:
    HAS_WHISPER = False
import speech_recognition as sr  # Google Web Speech fallback
# ------------- Config -------------
APP_TITLE = "Voice → DeepL → Voice"
SAMPLE_RATE = 16000
CHANNELS = 1
BLOCK_SIZE = 1024
TARGET_LANG_DEFAULT = "EN"
DEEPL_PRO = "https://api.deepl.com"
DEEPL_FREE = "https://api-free.deepl.com"
SUPPORTED_TARGETS = [
    "BG","CS","DA","DE","EL","EN","EN-GB","EN-US","ES","ET","FI","FR","HU","ID","IT","JA","KO",
    "LT","LV","NB","NL","PL","PT","PT-BR","PT-PT","RO","RU","SK","SL","SV","TR","UK","ZH"
]
@dataclass
class RecordingState:
    q: queue.Queue
    buffers: list
    recording: bool
    stream: sd.InputStream | None
# ------------- Helpers -------------
def infer_deepl_base(api_key: str, force_free: bool) -> str:
    if force_free:
        return DEEPL_FREE
    if api_key and api_key.endswith(":fx"):
        return DEEPL_FREE
    return DEEPL_PRO
def translate_deepl(text: str, api_key: str, target_lang: str, source_lang: str | None, force_free: bool=False) -> str:
    base = infer_deepl_base(api_key, force_free)
    url = f"{base}/v2/translate"
    headers = {"Authorization": f"DeepL-Auth-Key {api_key}"}
    data = {
        "text": text,
        "target_lang": target_lang
    }
    if source_lang:
        data["source_lang"] = source_lang
    r = requests.post(url, headers=headers, data=data, timeout=60)
    r.raise_for_status()
    js = r.json()
    # DeepL returns {"translations":[{"detected_source_language":"DE","text":"..."}]}
    return js["translations"][0]["text"]
def speak(text: str):
    engine = pyttsx3.init()
    engine.say(text)
    engine.runAndWait()
def ensure_wav_path(tmp_dir: Path) -> Path:
    tmp_dir.mkdir(parents=True, exist_ok=True)
    return tmp_dir / f"recording_{int(time.time())}.wav"
def transcribe_with_whisper(wav_path: Path, model_name: str="base") -> str:
    model = whisper.load_model(model_name)  # "tiny", "base", "small", "medium", "large"
    result = model.transcribe(str(wav_path), fp16=False)
    return (result.get("text") or "").strip()
def transcribe_with_google_sr(wav_path: Path) -> str:
    r = sr.Recognizer()
    with sr.AudioFile(str(wav_path)) as source:
        audio = r.record(source)
    # You can set language hint like "de-DE" here if you like:
    return r.recognize_google(audio)  # raises on failure
# ------------- GUI -------------
class App:
    def __init__(self, root: Tk):
        self.root = root
        root.title(APP_TITLE)
        root.geometry("780x560")
        root.minsize(720, 520)
        load_dotenv()
        self.api_key = StringVar(value=os.getenv("DEEPL_API_KEY", ""))
        self.use_free = BooleanVar(value=False)
        self.source_lang = StringVar(value="")        # empty → auto
        self.target_lang = StringVar(value=TARGET_LANG_DEFAULT)
        self.auto_speak = BooleanVar(value=True)
        self.use_whisper = BooleanVar(value=HAS_WHISPER)
        self.whisper_model = StringVar(value="base")
        self.status = StringVar(value="Ready.")
        self.transcript = StringVar(value="")
        self.translation = StringVar(value="")
        self.rec_state = RecordingState(q=queue.Queue(), buffers=[], recording=False, stream=None)
        self.wav_path: Path | None = None
        self._build_ui()
    def _build_ui(self):
        pad = {"padx":8, "pady":6}
        frm = ttk.Frame(self.root)
        frm.pack(fill="both", expand=True)
        # Row 0 - API key
        ttk.Label(frm, text="DeepL API Key:").grid(row=0, column=0, sticky="e", **pad)
        self.ent_api = ttk.Entry(frm, textvariable=self.api_key, width=48, show="•")
        self.ent_api.grid(row=0, column=1, columnspan=2, sticky="we", **pad)
        self.show_key = BooleanVar(value=False)
        ttk.Checkbutton(frm, text="Show", variable=self.show_key, command=self._toggle_key).grid(row=0, column=3, sticky="w", **pad)
        ttk.Checkbutton(frm, text="Use Free API (api-free.deepl.com)", variable=self.use_free).grid(row=1, column=1, sticky="w", **pad, columnspan=2)
        # Row 2 - Langs
        ttk.Label(frm, text="Source (optional):").grid(row=2, column=0, sticky="e", **pad)
        self.ent_src = ttk.Entry(frm, textvariable=self.source_lang, width=10)
        self.ent_src.grid(row=2, column=1, sticky="w", **pad)
        ttk.Label(frm, text="Target:").grid(row=2, column=2, sticky="e", **pad)
        self.cmb_tgt = ttk.Combobox(frm, state="readonly", values=SUPPORTED_TARGETS, textvariable=self.target_lang, width=10)
        self.cmb_tgt.grid(row=2, column=3, sticky="w", **pad)
        if self.target_lang.get() not in SUPPORTED_TARGETS:
            self.cmb_tgt.set(TARGET_LANG_DEFAULT)
        # Row 3 - STT settings
        ttk.Checkbutton(frm, text=f"Use Whisper (installed: {HAS_WHISPER})", variable=self.use_whisper).grid(row=3, column=1, sticky="w", **pad)
        ttk.Label(frm, text="Whisper model:").grid(row=3, column=2, sticky="e", **pad)
        self.cmb_wm = ttk.Combobox(frm, state="readonly", values=["tiny","base","small","medium","large"], textvariable=self.whisper_model, width=10)
        self.cmb_wm.grid(row=3, column=3, sticky="w", **pad)
        # Row 4 - controls
        self.btn_rec = ttk.Button(frm, text="● Record", command=self._start_record)
        self.btn_rec.grid(row=4, column=1, sticky="we", **pad)
        self.btn_stop = ttk.Button(frm, text="■ Stop", command=self._stop_record, state="disabled")
        self.btn_stop.grid(row=4, column=2, sticky="we", **pad)
        ttk.Button(frm, text="Open WAV…", command=self._open_wav).grid(row=4, column=3, sticky="we", **pad)
        # Row 5 - actions
        ttk.Button(frm, text="Transcribe", command=self._transcribe).grid(row=5, column=1, sticky="we", **pad)
        ttk.Button(frm, text="Translate", command=self._translate).grid(row=5, column=2, sticky="we", **pad)
        ttk.Checkbutton(frm, text="Auto speak after translate", variable=self.auto_speak).grid(row=5, column=3, sticky="w", **pad)
        # Row 6 - transcript
        ttk.Label(frm, text="Transcript:").grid(row=6, column=0, sticky="ne", **pad)
        self.txt_trans = ttk.Treeview(frm, columns=("t",), show="tree", height=6)
        self.txt_trans.grid(row=6, column=1, columnspan=3, sticky="nsew", **pad)
        # Row 7 - translation
        ttk.Label(frm, text="Translation:").grid(row=7, column=0, sticky="ne", **pad)
        self.txt_transl = ttk.Treeview(frm, columns=("t",), show="tree", height=6)
        self.txt_transl.grid(row=7, column=1, columnspan=3, sticky="nsew", **pad)
        # Row 8 - status
        ttk.Label(frm, text="Status:").grid(row=8, column=0, sticky="e", **pad)
        self.lbl_status = ttk.Label(frm, textvariable=self.status)
        self.lbl_status.grid(row=8, column=1, columnspan=3, sticky="w", **pad)
        # stretch
        frm.columnconfigure(1, weight=1)
        frm.columnconfigure(2, weight=1)
        frm.columnconfigure(3, weight=1)
        frm.rowconfigure(6, weight=1)
        frm.rowconfigure(7, weight=1)
    # ---------- UI helpers ----------
    def _toggle_key(self):
        self.ent_api.config(show="" if self.show_key.get() else "•")
    def _log(self, tree: ttk.Treeview, text: str):
        tree.insert("", "end", text=text)
        children = tree.get_children()
        if children:
            tree.see(children[-1])
    def _set_status(self, s: str):
        self.status.set(s)
        self.root.update_idletasks()
    # ---------- Recording ----------
    def _audio_callback(self, indata, frames, time_info, status):
        if status:
            # You can log overflows/underflows here
            pass
        self.rec_state.q.put(indata.copy())
    def _start_record(self):
        if self.rec_state.recording:
            return
        self.rec_state.buffers.clear()
        self.rec_state.q.queue.clear()
        self._set_status("Recording…")
        self.btn_rec.config(state="disabled")
        self.btn_stop.config(state="normal")
        def feeder():
            while self.rec_state.recording:
                try:
                    block = self.rec_state.q.get(timeout=0.2)
                    self.rec_state.buffers.append(block)
                except queue.Empty:
                    continue
        self.rec_state.recording = True
        self.rec_state.stream = sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS,
                                               blocksize=BLOCK_SIZE, dtype="float32",
                                               callback=self._audio_callback)
        self.rec_state.stream.start()
        threading.Thread(target=feeder, daemon=True).start()
    def _stop_record(self):
        if not self.rec_state.recording:
            return
        self.rec_state.recording = False
        try:
            self.rec_state.stream.stop(); self.rec_state.stream.close()
        except Exception:
            pass
        self.rec_state.stream = None
        self._set_status("Saving WAV…")
        data = np.concatenate(self.rec_state.buffers, axis=0) if self.rec_state.buffers else np.zeros((0, CHANNELS), dtype="float32")
        data = (data * 32767).astype(np.int16)  # float32 → int16
        wav_path = ensure_wav_path(Path("./_tmp_audio"))
        wav_write(str(wav_path), SAMPLE_RATE, data)
        self.wav_path = wav_path
        self._set_status(f"Saved: {wav_path}")
        self.btn_rec.config(state="normal")
        self.btn_stop.config(state="disabled")
    def _open_wav(self):
        p = filedialog.askopenfilename(title="Choose WAV", filetypes=[("WAV", "*.wav")])
        if p:
            self.wav_path = Path(p)
            self._set_status(f"Selected WAV: {self.wav_path}")
    # ---------- STT / Translate ----------
    def _transcribe(self):
        if not self.wav_path or not self.wav_path.exists():
            messagebox.showwarning("No audio", "Record audio or choose a WAV first.")
            return
        self._set_status("Transcribing…")
        self.txt_trans.delete(*self.txt_trans.get_children())
        def worker():
            try:
                if self.use_whisper.get() and HAS_WHISPER:
                    text = transcribe_with_whisper(self.wav_path, self.whisper_model.get())
                else:
                    text = transcribe_with_google_sr(self.wav_path)
                text = text.strip()
                self.root.after(0, lambda: self._log(self.txt_trans, text or "(empty)"))
                self.root.after(0, lambda: self._set_status("Transcription OK."))
            except Exception as e:
                self.root.after(0, lambda: self._log(self.txt_trans, f"ERROR: {e}"))
                self.root.after(0, lambda: self._set_status("Transcription failed."))
        threading.Thread(target=worker, daemon=True).start()
    def _translate(self):
        # Get last transcript line
        children = self.txt_trans.get_children()
        if not children:
            messagebox.showwarning("No transcript", "Transcribe first.")
            return
        last_text = self.txt_trans.item(children[-1], "text") or ""
        last_text = last_text.strip()
        if not last_text:
            messagebox.showwarning("Empty text", "Transcript is empty.")
            return
        api_key = self.api_key.get().strip()
        if not api_key:
            messagebox.showwarning("Missing DeepL key", "Paste your DeepL API key or put it in .env")
            return
        tgt = self.target_lang.get().strip() or TARGET_LANG_DEFAULT
        src = self.source_lang.get().strip() or None
        self._set_status("Translating with DeepL…")
        self.txt_transl.delete(*self.txt_transl.get_children())
        def worker():
            try:
                out = translate_deepl(last_text, api_key, tgt, src, self.use_free.get())
                out = out.strip()
                self.root.after(0, lambda: self._log(self.txt_transl, out or "(empty)"))
                self.root.after(0, lambda: self._set_status("Translation OK."))
                if self.auto_speak.get() and out:
                    speak(out)
            except Exception as e:
                self.root.after(0, lambda: self._log(self.txt_transl, f"ERROR: {e}"))
                self.root.after(0, lambda: self._set_status("Translation failed."))
        threading.Thread(target=worker, daemon=True).start()
# ------------- Entry -------------
def main():
    root = Tk()
    try:
        style = ttk.Style()
        style.theme_use("clam")
    except Exception:
        pass
    App(root)
    root.mainloop()
if __name__ == "__main__":
    main()
InputStream at 16kHz mono; incoming blocks are collected in a queue and appended into buffers in a background thread._tmp_audio/recording_*.wav).tiny/base/small/…) and calls model.transcribe(wav).SpeechRecognition as a fallback (recognize_google).POST /v2/translate with your DeepL API key, target language (and optional source).:fx; otherwise api.deepl.com.engine.setProperty("voice", ...) or engine.setProperty("rate", 180) before speaking.pip install sounddevice is usually enough (bundles binaries).sudo apt-get install portaudio19-dev then reinstall sounddevice.pip install -U openai-whisper
# plus ffmpeg installed & in PATH
If Whisper is heavy for your machine, switch to Google Web Speech by unchecking “Use Whisper”.
DeepL errors
target_lang: choose from the dropdown (e.g., EN, DE, ES, FR, JA, ZH, etc.).Voice qualitypyttsx3 voices depend on your OS. On Windows, you can install additional SAPI5 voices; on macOS, install extra system voices.