[loop-cycle-6] Break thinking rumination loop — semantic dedup (#38)

Add post-generation similarity check to ThinkingEngine.think_once(). Problem: Timmy's thinking engine generates repetitive thoughts because small local models ignore 'don't repeat' instructions in the prompt. The same observation ('still no chat messages', 'Alexander's name is in profile') would appear 14+ times in a single day's journal. Fix: After generating a thought, compare it against the last 5 thoughts using SequenceMatcher. If similarity >= 0.6, retry with a new seed up to 2 times. If all retries produce repetitive content, discard rather than store. Uses stdlib difflib — no new dependencies. Changes: - thinking.py: Add _is_too_similar() method with SequenceMatcher - thinking.py: Wrap generation in retry loop with dedup check - test_thinking.py: 7 new tests covering exact match, near match, different thoughts, retry behavior, and max-retry discard +96/-20 lines in thinking.py, +87 lines in tests.
2026-03-14 16:21:16 -04:00
parent 0d61b709da
commit b30b5c6b57
2 changed files with 164 additions and 21 deletions
--- a/src/timmy/thinking.py
+++ b/src/timmy/thinking.py
@@ -23,6 +23,7 @@ import sqlite3
 import uuid
 from dataclasses import dataclass
 from datetime import UTC, datetime, timedelta
+from difflib import SequenceMatcher
 from pathlib import Path

 from config import settings
@@ -196,33 +197,63 @@ class ThinkingEngine:
        if not settings.thinking_enabled:
            return None

-        if prompt:
-            seed_type = "prompted"
-            seed_context = f"Journal prompt: {prompt}"
-        else:
-            seed_type, seed_context = self._gather_seed()
-        continuity = self._build_continuity_context()
        memory_context = self._load_memory_context()
        system_context = self._gather_system_snapshot()
+        recent_thoughts = self.get_recent_thoughts(limit=5)

-        prompt = _THINKING_PROMPT.format(
-            memory_context=memory_context,
-            system_context=system_context,
-            seed_context=seed_context,
-            continuity_context=continuity,
-        )
+        content: str | None = None
+        seed_type: str = "freeform"

-        try:
-            content = await self._call_agent(prompt)
-        except Exception as exc:
-            logger.warning("Thinking cycle failed (Ollama likely down): %s", exc)
+        for attempt in range(self._MAX_DEDUP_RETRIES + 1):
+            if prompt:
+                seed_type = "prompted"
+                seed_context = f"Journal prompt: {prompt}"
+            else:
+                seed_type, seed_context = self._gather_seed()
+
+            continuity = self._build_continuity_context()
+
+            full_prompt = _THINKING_PROMPT.format(
+                memory_context=memory_context,
+                system_context=system_context,
+                seed_context=seed_context,
+                continuity_context=continuity,
+            )
+
+            try:
+                raw = await self._call_agent(full_prompt)
+            except Exception as exc:
+                logger.warning("Thinking cycle failed (Ollama likely down): %s", exc)
+                return None
+
+            if not raw or not raw.strip():
+                logger.debug("Thinking cycle produced empty response, skipping")
+                return None
+
+            content = raw.strip()
+
+            # Dedup: reject thoughts too similar to recent ones
+            if not self._is_too_similar(content, recent_thoughts):
+                break  # Good — novel thought
+
+            if attempt < self._MAX_DEDUP_RETRIES:
+                logger.info(
+                    "Thought too similar to recent (attempt %d/%d), retrying with new seed",
+                    attempt + 1,
+                    self._MAX_DEDUP_RETRIES + 1,
+                )
+                content = None  # Will retry
+            else:
+                logger.warning(
+                    "Thought still repetitive after %d retries, discarding",
+                    self._MAX_DEDUP_RETRIES + 1,
+                )
+                return None
+
+        if not content:
            return None

-        if not content or not content.strip():
-            logger.debug("Thinking cycle produced empty response, skipping")
-            return None
-
-        thought = self._store_thought(content.strip(), seed_type)
+        thought = self._store_thought(content, seed_type)
        self._last_thought_id = thought.id

        # Post-hook: distill facts from recent thoughts periodically
@@ -743,6 +774,31 @@ class ThinkingEngine:
            logger.debug("Observation seed data unavailable: %s", exc)
        return "\n".join(context_parts)

+    # Maximum retries when a generated thought is too similar to recent ones
+    _MAX_DEDUP_RETRIES = 2
+    # Similarity threshold (0.0 = completely different, 1.0 = identical)
+    _SIMILARITY_THRESHOLD = 0.6
+
+    def _is_too_similar(self, candidate: str, recent: list["Thought"]) -> bool:
+        """Check if *candidate* is semantically too close to any recent thought.
+
+        Uses SequenceMatcher on normalised text (lowered, stripped) for a fast
+        approximation of semantic similarity that works without external deps.
+        """
+        norm_candidate = candidate.lower().strip()
+        for thought in recent:
+            norm_existing = thought.content.lower().strip()
+            ratio = SequenceMatcher(None, norm_candidate, norm_existing).ratio()
+            if ratio >= self._SIMILARITY_THRESHOLD:
+                logger.debug(
+                    "Thought rejected (%.0f%% similar to %s): %.60s",
+                    ratio * 100,
+                    thought.id[:8],
+                    candidate,
+                )
+                return True
+        return False
+
    def _build_continuity_context(self) -> str:
        """Build context from recent thoughts with anti-repetition guidance.

--- a/tests/timmy/test_thinking.py
+++ b/tests/timmy/test_thinking.py
@@ -588,6 +588,93 @@ def test_thinking_prompt_anti_confabulation():
    )


+# ---------------------------------------------------------------------------
+# Semantic dedup (anti-rumination)
+# ---------------------------------------------------------------------------
+
+
+def test_is_too_similar_exact_match(tmp_path):
+    """Identical thoughts should be detected as too similar."""
+    engine = _make_engine(tmp_path)
+    t1 = engine._store_thought("The swarm is quiet today.", "observation")
+    assert engine._is_too_similar("The swarm is quiet today.", [t1])
+
+
+def test_is_too_similar_near_match(tmp_path):
+    """Minor variations of the same thought should be caught."""
+    engine = _make_engine(tmp_path)
+    t1 = engine._store_thought("The swarm is quiet today, nothing happening.", "observation")
+    assert engine._is_too_similar("The swarm is quiet today. Nothing is happening.", [t1])
+
+
+def test_is_too_similar_different_thought(tmp_path):
+    """Genuinely different thoughts should pass the check."""
+    engine = _make_engine(tmp_path)
+    t1 = engine._store_thought("The swarm is quiet today.", "observation")
+    assert not engine._is_too_similar(
+        "Alexander's preference for YAML config reflects a deep design philosophy.", [t1]
+    )
+
+
+def test_is_too_similar_empty_recent(tmp_path):
+    """No recent thoughts means nothing to match against."""
+    engine = _make_engine(tmp_path)
+    assert not engine._is_too_similar("Any thought at all.", [])
+
+
+@pytest.mark.asyncio
+async def test_think_once_retries_on_similar(tmp_path):
+    """think_once should retry with a new seed when the thought is too similar."""
+    engine = _make_engine(tmp_path)
+
+    # Seed with an existing thought
+    engine._store_thought("Still no chat messages from Alexander.", "observation")
+
+    call_count = 0
+
+    def agent_side_effect(prompt):
+        nonlocal call_count
+        call_count += 1
+        if call_count == 1:
+            return "Still no chat messages from Alexander today."  # too similar
+        return "The sovereignty model provides independence from cloud dependencies."  # novel
+
+    with (
+        patch.object(engine, "_call_agent", side_effect=agent_side_effect),
+        patch.object(engine, "_log_event"),
+        patch.object(engine, "_update_memory"),
+        patch.object(engine, "_broadcast", new_callable=AsyncMock),
+    ):
+        thought = await engine.think_once()
+
+    assert thought is not None
+    assert "sovereignty" in thought.content.lower()
+    assert call_count == 2  # Had to retry once
+
+
+@pytest.mark.asyncio
+async def test_think_once_discards_after_max_retries(tmp_path):
+    """think_once should discard and return None after all retries produce similar thoughts."""
+    engine = _make_engine(tmp_path)
+
+    engine._store_thought("Still no chat messages from Alexander.", "observation")
+
+    def always_similar(prompt):
+        return "Still no chat messages from Alexander today."
+
+    with (
+        patch.object(engine, "_call_agent", side_effect=always_similar),
+        patch.object(engine, "_log_event"),
+        patch.object(engine, "_update_memory"),
+        patch.object(engine, "_broadcast", new_callable=AsyncMock),
+    ):
+        thought = await engine.think_once()
+
+    assert thought is None
+    # Only the seed thought should be stored, not the rejected ones
+    assert engine.count_thoughts() == 1
+
+
 def test_thinking_prompt_brevity_limit():
    """_THINKING_PROMPT must enforce a 2-3 sentence limit."""
    from timmy.thinking import _THINKING_PROMPT