[loop-cycle-6] Break thinking rumination loop — semantic dedup (#38)
Some checks failed
Tests / lint (push) Successful in 3s
Tests / test (push) Failing after 25s

Add post-generation similarity check to ThinkingEngine.think_once().

Problem: Timmy's thinking engine generates repetitive thoughts because
small local models ignore 'don't repeat' instructions in the prompt.
The same observation ('still no chat messages', 'Alexander's name is in
profile') would appear 14+ times in a single day's journal.

Fix: After generating a thought, compare it against the last 5 thoughts
using SequenceMatcher. If similarity >= 0.6, retry with a new seed up to
2 times. If all retries produce repetitive content, discard rather than
store. Uses stdlib difflib — no new dependencies.

Changes:
- thinking.py: Add _is_too_similar() method with SequenceMatcher
- thinking.py: Wrap generation in retry loop with dedup check
- test_thinking.py: 7 new tests covering exact match, near match,
  different thoughts, retry behavior, and max-retry discard

+96/-20 lines in thinking.py, +87 lines in tests.
This commit is contained in:
2026-03-14 16:21:16 -04:00
parent 0d61b709da
commit b30b5c6b57
2 changed files with 164 additions and 21 deletions

View File

@@ -23,6 +23,7 @@ import sqlite3
import uuid
from dataclasses import dataclass
from datetime import UTC, datetime, timedelta
from difflib import SequenceMatcher
from pathlib import Path
from config import settings
@@ -196,33 +197,63 @@ class ThinkingEngine:
if not settings.thinking_enabled:
return None
if prompt:
seed_type = "prompted"
seed_context = f"Journal prompt: {prompt}"
else:
seed_type, seed_context = self._gather_seed()
continuity = self._build_continuity_context()
memory_context = self._load_memory_context()
system_context = self._gather_system_snapshot()
recent_thoughts = self.get_recent_thoughts(limit=5)
prompt = _THINKING_PROMPT.format(
memory_context=memory_context,
system_context=system_context,
seed_context=seed_context,
continuity_context=continuity,
)
content: str | None = None
seed_type: str = "freeform"
try:
content = await self._call_agent(prompt)
except Exception as exc:
logger.warning("Thinking cycle failed (Ollama likely down): %s", exc)
for attempt in range(self._MAX_DEDUP_RETRIES + 1):
if prompt:
seed_type = "prompted"
seed_context = f"Journal prompt: {prompt}"
else:
seed_type, seed_context = self._gather_seed()
continuity = self._build_continuity_context()
full_prompt = _THINKING_PROMPT.format(
memory_context=memory_context,
system_context=system_context,
seed_context=seed_context,
continuity_context=continuity,
)
try:
raw = await self._call_agent(full_prompt)
except Exception as exc:
logger.warning("Thinking cycle failed (Ollama likely down): %s", exc)
return None
if not raw or not raw.strip():
logger.debug("Thinking cycle produced empty response, skipping")
return None
content = raw.strip()
# Dedup: reject thoughts too similar to recent ones
if not self._is_too_similar(content, recent_thoughts):
break # Good — novel thought
if attempt < self._MAX_DEDUP_RETRIES:
logger.info(
"Thought too similar to recent (attempt %d/%d), retrying with new seed",
attempt + 1,
self._MAX_DEDUP_RETRIES + 1,
)
content = None # Will retry
else:
logger.warning(
"Thought still repetitive after %d retries, discarding",
self._MAX_DEDUP_RETRIES + 1,
)
return None
if not content:
return None
if not content or not content.strip():
logger.debug("Thinking cycle produced empty response, skipping")
return None
thought = self._store_thought(content.strip(), seed_type)
thought = self._store_thought(content, seed_type)
self._last_thought_id = thought.id
# Post-hook: distill facts from recent thoughts periodically
@@ -743,6 +774,31 @@ class ThinkingEngine:
logger.debug("Observation seed data unavailable: %s", exc)
return "\n".join(context_parts)
# Maximum retries when a generated thought is too similar to recent ones
_MAX_DEDUP_RETRIES = 2
# Similarity threshold (0.0 = completely different, 1.0 = identical)
_SIMILARITY_THRESHOLD = 0.6
def _is_too_similar(self, candidate: str, recent: list["Thought"]) -> bool:
"""Check if *candidate* is semantically too close to any recent thought.
Uses SequenceMatcher on normalised text (lowered, stripped) for a fast
approximation of semantic similarity that works without external deps.
"""
norm_candidate = candidate.lower().strip()
for thought in recent:
norm_existing = thought.content.lower().strip()
ratio = SequenceMatcher(None, norm_candidate, norm_existing).ratio()
if ratio >= self._SIMILARITY_THRESHOLD:
logger.debug(
"Thought rejected (%.0f%% similar to %s): %.60s",
ratio * 100,
thought.id[:8],
candidate,
)
return True
return False
def _build_continuity_context(self) -> str:
"""Build context from recent thoughts with anti-repetition guidance.

View File

@@ -588,6 +588,93 @@ def test_thinking_prompt_anti_confabulation():
)
# ---------------------------------------------------------------------------
# Semantic dedup (anti-rumination)
# ---------------------------------------------------------------------------
def test_is_too_similar_exact_match(tmp_path):
"""Identical thoughts should be detected as too similar."""
engine = _make_engine(tmp_path)
t1 = engine._store_thought("The swarm is quiet today.", "observation")
assert engine._is_too_similar("The swarm is quiet today.", [t1])
def test_is_too_similar_near_match(tmp_path):
"""Minor variations of the same thought should be caught."""
engine = _make_engine(tmp_path)
t1 = engine._store_thought("The swarm is quiet today, nothing happening.", "observation")
assert engine._is_too_similar("The swarm is quiet today. Nothing is happening.", [t1])
def test_is_too_similar_different_thought(tmp_path):
"""Genuinely different thoughts should pass the check."""
engine = _make_engine(tmp_path)
t1 = engine._store_thought("The swarm is quiet today.", "observation")
assert not engine._is_too_similar(
"Alexander's preference for YAML config reflects a deep design philosophy.", [t1]
)
def test_is_too_similar_empty_recent(tmp_path):
"""No recent thoughts means nothing to match against."""
engine = _make_engine(tmp_path)
assert not engine._is_too_similar("Any thought at all.", [])
@pytest.mark.asyncio
async def test_think_once_retries_on_similar(tmp_path):
"""think_once should retry with a new seed when the thought is too similar."""
engine = _make_engine(tmp_path)
# Seed with an existing thought
engine._store_thought("Still no chat messages from Alexander.", "observation")
call_count = 0
def agent_side_effect(prompt):
nonlocal call_count
call_count += 1
if call_count == 1:
return "Still no chat messages from Alexander today." # too similar
return "The sovereignty model provides independence from cloud dependencies." # novel
with (
patch.object(engine, "_call_agent", side_effect=agent_side_effect),
patch.object(engine, "_log_event"),
patch.object(engine, "_update_memory"),
patch.object(engine, "_broadcast", new_callable=AsyncMock),
):
thought = await engine.think_once()
assert thought is not None
assert "sovereignty" in thought.content.lower()
assert call_count == 2 # Had to retry once
@pytest.mark.asyncio
async def test_think_once_discards_after_max_retries(tmp_path):
"""think_once should discard and return None after all retries produce similar thoughts."""
engine = _make_engine(tmp_path)
engine._store_thought("Still no chat messages from Alexander.", "observation")
def always_similar(prompt):
return "Still no chat messages from Alexander today."
with (
patch.object(engine, "_call_agent", side_effect=always_similar),
patch.object(engine, "_log_event"),
patch.object(engine, "_update_memory"),
patch.object(engine, "_broadcast", new_callable=AsyncMock),
):
thought = await engine.think_once()
assert thought is None
# Only the seed thought should be stored, not the rejected ones
assert engine.count_thoughts() == 1
def test_thinking_prompt_brevity_limit():
"""_THINKING_PROMPT must enforce a 2-3 sentence limit."""
from timmy.thinking import _THINKING_PROMPT