fix: voice loop — persistent event loop, markdown stripping, MCP noise
Three fixes from real-world testing: 1. Event loop: replaced asyncio.run() with a persistent loop so Agno's MCP sessions survive across conversation turns. No more 'Event loop is closed' errors on turn 2+. 2. Markdown stripping: voice preamble tells Timmy to respond in natural spoken language, plus _strip_markdown() as a safety net removes **bold**, *italic*, bullets, headers, code fences, etc. TTS no longer reads 'asterisk asterisk'. 3. MCP noise: _suppress_mcp_noise() quiets mcp/agno/httpx loggers during voice mode so the terminal shows clean transcript only. 32 tests (12 new for markdown stripping + persistent loop).
This commit is contained in:
@@ -15,6 +15,7 @@ Requires: sounddevice, numpy, whisper, piper-tts
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
@@ -26,6 +27,44 @@ import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Voice-mode system instruction ───────────────────────────────────────────
|
||||
# Prepended to user messages so Timmy responds naturally for TTS.
|
||||
_VOICE_PREAMBLE = (
|
||||
"[VOICE MODE] You are speaking aloud through a text-to-speech system. "
|
||||
"Respond in short, natural spoken sentences. No markdown, no bullet points, "
|
||||
"no asterisks, no numbered lists, no headers, no bold/italic formatting. "
|
||||
"Talk like a person in a conversation — concise, warm, direct. "
|
||||
"Keep responses under 3-4 sentences unless the user asks for detail."
|
||||
)
|
||||
|
||||
|
||||
def _strip_markdown(text: str) -> str:
|
||||
"""Remove markdown formatting so TTS reads naturally.
|
||||
|
||||
Strips: **bold**, *italic*, `code`, # headers, - bullets,
|
||||
numbered lists, [links](url), etc.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
# Remove bold/italic markers
|
||||
text = re.sub(r"\*{1,3}([^*]+)\*{1,3}", r"\1", text)
|
||||
# Remove inline code
|
||||
text = re.sub(r"`([^`]+)`", r"\1", text)
|
||||
# Remove headers (# Header)
|
||||
text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
|
||||
# Remove bullet points (-, *, +) at start of line
|
||||
text = re.sub(r"^[\s]*[-*+]\s+", "", text, flags=re.MULTILINE)
|
||||
# Remove numbered lists (1. 2. etc)
|
||||
text = re.sub(r"^[\s]*\d+\.\s+", "", text, flags=re.MULTILINE)
|
||||
# Remove link syntax [text](url) → text
|
||||
text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
|
||||
# Remove horizontal rules
|
||||
text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE)
|
||||
# Collapse multiple newlines
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
# ── Defaults ────────────────────────────────────────────────────────────────
|
||||
|
||||
DEFAULT_WHISPER_MODEL = "base.en"
|
||||
@@ -75,6 +114,9 @@ class VoiceLoop:
|
||||
self._running = False
|
||||
self._speaking = False # True while TTS is playing
|
||||
self._interrupted = False # set when user talks over TTS
|
||||
# Persistent event loop — reused across all chat calls so Agno's
|
||||
# MCP sessions don't die when the loop closes.
|
||||
self._loop: asyncio.AbstractEventLoop | None = None
|
||||
|
||||
# ── Lazy initialization ─────────────────────────────────────────────
|
||||
|
||||
@@ -283,6 +325,16 @@ class VoiceLoop:
|
||||
|
||||
# ── LLM: Text → Response ───────────────────────────────────────────
|
||||
|
||||
def _get_loop(self) -> asyncio.AbstractEventLoop:
|
||||
"""Return a persistent event loop, creating one if needed.
|
||||
|
||||
A single loop is reused for the entire voice session so Agno's
|
||||
MCP tool-server connections survive across turns.
|
||||
"""
|
||||
if self._loop is None or self._loop.is_closed():
|
||||
self._loop = asyncio.new_event_loop()
|
||||
return self._loop
|
||||
|
||||
def _think(self, user_text: str) -> str:
|
||||
"""Send text to Timmy and get a response."""
|
||||
sys.stdout.write(" 💭 Thinking...\r")
|
||||
@@ -291,20 +343,29 @@ class VoiceLoop:
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
response = asyncio.run(self._chat(user_text))
|
||||
loop = self._get_loop()
|
||||
response = loop.run_until_complete(self._chat(user_text))
|
||||
except Exception as exc:
|
||||
logger.error("Timmy chat failed: %s", exc)
|
||||
response = "I'm having trouble thinking right now. Could you try again?"
|
||||
|
||||
elapsed = time.monotonic() - t0
|
||||
logger.info("Timmy responded in %.1fs", elapsed)
|
||||
|
||||
# Strip markdown so TTS doesn't read asterisks, bullets, etc.
|
||||
response = _strip_markdown(response)
|
||||
return response
|
||||
|
||||
async def _chat(self, message: str) -> str:
|
||||
"""Async wrapper around Timmy's session.chat()."""
|
||||
"""Async wrapper around Timmy's session.chat().
|
||||
|
||||
Prepends the voice-mode instruction so Timmy responds in
|
||||
natural spoken language rather than markdown.
|
||||
"""
|
||||
from timmy.session import chat
|
||||
|
||||
return await chat(message, session_id=self.config.session_id)
|
||||
voiced = f"{_VOICE_PREAMBLE}\n\nUser said: {message}"
|
||||
return await chat(voiced, session_id=self.config.session_id)
|
||||
|
||||
# ── Main Loop ───────────────────────────────────────────────────────
|
||||
|
||||
@@ -312,6 +373,11 @@ class VoiceLoop:
|
||||
"""Run the voice loop. Blocks until Ctrl-C."""
|
||||
self._ensure_piper()
|
||||
|
||||
# Suppress MCP / Agno stderr noise during voice mode.
|
||||
# The "Secure MCP Filesystem Server running on stdio" messages
|
||||
# are distracting in a voice session.
|
||||
_suppress_mcp_noise()
|
||||
|
||||
tts_label = (
|
||||
"macOS say"
|
||||
if self.config.use_say_fallback
|
||||
@@ -381,7 +447,36 @@ class VoiceLoop:
|
||||
print("\n\n 👋 Voice loop stopped.\n")
|
||||
finally:
|
||||
self._running = False
|
||||
self._cleanup_loop()
|
||||
|
||||
def _cleanup_loop(self) -> None:
|
||||
"""Shut down the persistent event loop cleanly."""
|
||||
if self._loop is not None and not self._loop.is_closed():
|
||||
try:
|
||||
self._loop.run_until_complete(self._loop.shutdown_asyncgens())
|
||||
except Exception:
|
||||
pass
|
||||
self._loop.close()
|
||||
self._loop = None
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop the voice loop (from another thread)."""
|
||||
self._running = False
|
||||
|
||||
|
||||
def _suppress_mcp_noise() -> None:
|
||||
"""Quiet down noisy MCP/Agno loggers during voice mode.
|
||||
|
||||
Sets specific loggers to WARNING so the terminal stays clean
|
||||
for the voice transcript.
|
||||
"""
|
||||
for name in (
|
||||
"mcp",
|
||||
"mcp.server",
|
||||
"mcp.client",
|
||||
"agno",
|
||||
"agno.mcp",
|
||||
"httpx",
|
||||
"httpcore",
|
||||
):
|
||||
logging.getLogger(name).setLevel(logging.WARNING)
|
||||
|
||||
@@ -9,7 +9,7 @@ from unittest.mock import MagicMock, patch
|
||||
|
||||
import numpy as np
|
||||
|
||||
from timmy.voice_loop import VoiceConfig, VoiceLoop
|
||||
from timmy.voice_loop import VoiceConfig, VoiceLoop, _strip_markdown
|
||||
|
||||
# ── VoiceConfig tests ──────────────────────────────────────────────────────
|
||||
|
||||
@@ -97,21 +97,72 @@ class TestTranscribe:
|
||||
assert result == ""
|
||||
|
||||
|
||||
class TestStripMarkdown:
|
||||
def test_strips_bold(self):
|
||||
assert _strip_markdown("**hello**") == "hello"
|
||||
|
||||
def test_strips_italic(self):
|
||||
assert _strip_markdown("*hello*") == "hello"
|
||||
|
||||
def test_strips_headers(self):
|
||||
assert _strip_markdown("## Header\ntext") == "Header\ntext"
|
||||
|
||||
def test_strips_bullets(self):
|
||||
assert _strip_markdown("- item one\n- item two") == "item one\nitem two"
|
||||
|
||||
def test_strips_numbered_lists(self):
|
||||
assert _strip_markdown("1. first\n2. second") == "first\nsecond"
|
||||
|
||||
def test_strips_inline_code(self):
|
||||
assert _strip_markdown("use `pip install`") == "use pip install"
|
||||
|
||||
def test_strips_links(self):
|
||||
assert _strip_markdown("[click here](https://x.com)") == "click here"
|
||||
|
||||
def test_preserves_plain_text(self):
|
||||
assert _strip_markdown("Hello, how are you?") == "Hello, how are you?"
|
||||
|
||||
def test_empty_string(self):
|
||||
assert _strip_markdown("") == ""
|
||||
|
||||
def test_none_passthrough(self):
|
||||
assert _strip_markdown(None) is None
|
||||
|
||||
def test_complex_markdown(self):
|
||||
md = "**1. First** thing\n- use `code`\n*emphasis*"
|
||||
result = _strip_markdown(md)
|
||||
assert "**" not in result
|
||||
assert "`" not in result
|
||||
assert "*" not in result
|
||||
|
||||
|
||||
class TestThink:
|
||||
@patch("timmy.voice_loop.asyncio")
|
||||
def test_think_returns_response(self, mock_asyncio):
|
||||
mock_asyncio.run.return_value = "I am Timmy."
|
||||
def test_think_returns_response(self):
|
||||
loop = VoiceLoop()
|
||||
loop._loop = MagicMock()
|
||||
loop._loop.is_closed.return_value = False
|
||||
loop._loop.run_until_complete.return_value = "I am Timmy."
|
||||
result = loop._think("Who are you?")
|
||||
assert result == "I am Timmy."
|
||||
|
||||
@patch("timmy.voice_loop.asyncio")
|
||||
def test_think_handles_error(self, mock_asyncio):
|
||||
mock_asyncio.run.side_effect = RuntimeError("Ollama down")
|
||||
def test_think_handles_error(self):
|
||||
loop = VoiceLoop()
|
||||
loop._loop = MagicMock()
|
||||
loop._loop.is_closed.return_value = False
|
||||
loop._loop.run_until_complete.side_effect = RuntimeError("Ollama down")
|
||||
result = loop._think("test")
|
||||
assert "trouble" in result.lower()
|
||||
|
||||
def test_think_strips_markdown(self):
|
||||
loop = VoiceLoop()
|
||||
loop._loop = MagicMock()
|
||||
loop._loop.is_closed.return_value = False
|
||||
loop._loop.run_until_complete.return_value = "**Hello** from *Timmy*"
|
||||
result = loop._think("test")
|
||||
assert "**" not in result
|
||||
assert "*" not in result
|
||||
assert "Hello" in result
|
||||
|
||||
|
||||
class TestSpeakSay:
|
||||
@patch("subprocess.Popen")
|
||||
|
||||
Reference in New Issue
Block a user