fix: restore task processor pipeline and eliminate /ws 403 spam (#91)

The microservices refactoring (PR #88) accidentally dropped handler
registration, zombie reconciliation, and startup drain from app.py.
Every task entering the queue was immediately backlogged with
"No handler for task type" because self._handlers stayed empty.

Restores the three critical blocks from app_backup.py:
- Register handlers for chat_response, thought, internal, bug_report,
  task_request
- Reconcile zombie RUNNING tasks from previous crashes
- Drain all pending tasks on startup before entering steady-state loop
- Re-approve tasks that were backlogged due to missing handlers

Also adds a /ws WebSocket catch-all that accepts stale connections and
closes with code 1008 instead of spamming 403 on every retry, and a
`make fresh` target for clean container rebuilds with no cached state.

Co-authored-by: Alexander Payne <apayne@MM.local>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alexander Whitestone
2026-02-28 12:18:18 -05:00
committed by GitHub
parent 79e8a6894a
commit 7b967d84b2
2 changed files with 88 additions and 3 deletions

View File

@@ -1,4 +1,4 @@
.PHONY: install install-bigbrain install-creative dev nuke test test-cov test-cov-html watch lint clean help \
.PHONY: install install-bigbrain install-creative dev nuke fresh test test-cov test-cov-html watch lint clean help \
up down logs \
docker-build docker-up docker-down docker-agent docker-logs docker-shell \
cloud-deploy cloud-up cloud-down cloud-logs cloud-status cloud-update
@@ -59,6 +59,18 @@ nuke:
@sleep 0.5
@echo " ✓ Port 8000 free, containers stopped, caches cleared"
# Full clean rebuild: wipe containers, images, volumes, rebuild from scratch.
# Ensures no stale code, cached layers, or old DB state persists.
fresh: nuke
docker compose down -v --rmi local 2>/dev/null || true
docker compose build --no-cache
mkdir -p data
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
@echo ""
@echo " ✓ Fresh rebuild complete — Timmy Time at http://localhost:8000"
@echo " Hot-reload active. Logs: make logs"
@echo ""
# Print the local IP addresses your phone can use to reach this machine.
# Connect your phone to the same hotspot your Mac is sharing from,
# then open http://<IP>:8000 in your phone browser.
@@ -254,6 +266,7 @@ help:
@echo " make install-creative install with creative extras (torch, diffusers)"
@echo " make dev clean up + start dashboard (auto-fixes errno 48)"
@echo " make nuke kill port 8000, stop containers, reset state"
@echo " make fresh full clean rebuild (no cached layers/volumes)"
@echo " make ip print local IP addresses for phone testing"
@echo " make test run all tests"
@echo " make test-cov tests + coverage report (terminal + XML)"

View File

@@ -14,7 +14,7 @@ import os
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import FastAPI, Request
from fastapi import FastAPI, Request, WebSocket
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
@@ -163,7 +163,7 @@ async def _thinking_loop() -> None:
async def _task_processor_loop() -> None:
"""Background task: Timmy's task queue processor."""
from swarm.task_processor import task_processor
from swarm.task_queue.models import update_task_status, TaskStatus
from swarm.task_queue.models import update_task_status, list_tasks, TaskStatus
from timmy.session import chat as timmy_chat
from datetime import datetime
import json
@@ -255,6 +255,65 @@ async def _task_processor_loop() -> None:
pass
return f"Error: {str(e)}"
# Register handlers
task_processor.register_handler("chat_response", handle_chat_response)
task_processor.register_handler("thought", handle_thought)
task_processor.register_handler("internal", handle_thought)
task_processor.register_handler("bug_report", handle_bug_report)
task_processor.register_handler("task_request", handle_task_request)
# ── Reconcile zombie tasks from previous crash ──
zombie_count = task_processor.reconcile_zombie_tasks()
if zombie_count:
logger.info("Recycled %d zombie task(s) back to approved", zombie_count)
# ── Re-approve tasks backlogged due to missing handlers ──
stale = list_tasks(status=TaskStatus.BACKLOGGED, assigned_to="timmy")
requeued = 0
for t in stale:
if t.backlog_reason and "No handler for task type" in t.backlog_reason:
update_task_status(t.id, TaskStatus.APPROVED, result=None)
requeued += 1
if requeued:
logger.info("Re-queued %d task(s) that were backlogged due to missing handlers", requeued)
# ── Startup drain: iterate through all pending tasks immediately ──
logger.info("Draining task queue on startup...")
try:
summary = await task_processor.drain_queue()
if summary["processed"] or summary["backlogged"]:
logger.info(
"Startup drain: %d processed, %d backlogged, %d skipped, %d failed",
summary["processed"],
summary["backlogged"],
summary["skipped"],
summary["failed"],
)
# Notify via WebSocket so the dashboard updates
try:
from infrastructure.ws_manager.handler import ws_manager
asyncio.create_task(
ws_manager.broadcast_json(
{
"type": "task_event",
"event": "startup_drain_complete",
"summary": summary,
}
)
)
except Exception:
pass
except Exception as exc:
logger.error("Startup drain failed: %s", exc)
try:
from infrastructure.error_capture import capture_error
capture_error(exc, source="task_processor_startup")
except Exception:
pass
# ── Steady-state: poll for new tasks ──
logger.info("Task processor entering steady-state loop")
await task_processor.run_loop(interval_seconds=3.0)
@@ -465,6 +524,19 @@ app.include_router(bugs_router)
app.include_router(cascade_router)
@app.websocket("/ws")
async def ws_redirect(websocket: WebSocket):
"""Catch stale /ws connections and close cleanly.
Before PR #82, frontend code connected to /ws which never existed as
an endpoint. Stale browser tabs retry forever, spamming 403 errors.
Accept the connection and immediately close with a policy-violation
code so the client stops retrying.
"""
await websocket.accept()
await websocket.close(code=1008, reason="Use /swarm/live instead")
@app.get("/", response_class=HTMLResponse)
async def root(request: Request):
"""Serve the main dashboard page."""