Replace Groq with Gemini Flash everywhere + fix agent error propagation

Work · claude · Work · commit 84eb24e9d3d8 · 2026-05-26T02:03:58.000+05:30
- Add lib/ai/gemini.ts: Gemini 2.0 Flash via OpenAI-compatible API (json, text, tool loop)
- Wire Gemini into all agents (TRIAGE, INVESTIGATOR, NETWORK_REASONER, DEVILS_ADVOCATE, SCRIBE) and so-what route; Groq kept as fallback when GOOGLE_AI_API_KEY is absent
- Fix failed_generation 400 errors in groq.ts: return null parsed instead of throwing when all retries exhaust
- Fix graph.v2.ts: move createRunningStep inside try/catch; wrap syncClaimsFromAgentOutputs with .catch() to prevent graph crashes
- Fix orchestrator.ts: isolate persistRuntimeResultV2/handlePostAgentRun/ensureDefaultMonitors so DB errors don't mark agent runs as ERROR
- Reduce INVESTIGATOR tools from 8 to 6 to lower Groq failed_generation rate

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/.env.example b/.env.example
@@ -9,6 +9,8 @@ UPSTOX_ACCESS_TOKEN=
 DATABASE_URL="postgresql://localhost:5432/smallens"
 REDIS_URL="redis://localhost:6379"
 GROQ_API_KEY=
+GOOGLE_AI_API_KEY=
+GEMINI_MODEL=gemini-2.0-flash
 GROQ_MODEL="llama-3.1-8b-instant"
 GROQ_AGENT_DELAY_MS=3000
 AGENT_QUEUE_CONCURRENCY=1
diff --git a/app/api/company/[ticker]/so-what/route.ts b/app/api/company/[ticker]/so-what/route.ts
@@ -1,6 +1,7 @@
 import { NextResponse } from "next/server";
 import { auth } from "@/lib/auth/session";
 import { prisma } from "@/lib/db";
+import { runGeminiText, isGeminiAvailable } from "@/lib/ai/gemini";
 import { runGroqText, GROQ_MODELS } from "@/lib/ai/groq";
 import Redis from "ioredis";
 
@@ -61,11 +62,10 @@ Metric: ${metric}
 Recent Values (chronological): ${JSON.stringify(values)}
 `;
 
-    const response = await runGroqText({
-      system: "You are a senior equity analyst writing for serious Indian retail investors. Write 2-3 sentences maximum. Be direct. Reference specific numbers. If trend is bad, say so. Never use filler phrases. End with what the investor should watch next. Format your response using markdown bolding (**bold**) for key numbers, percentages, and important trends.",
-      user: prompt,
-      model: GROQ_MODELS.quick
-    });
+    const soWhatSystem = "You are a senior equity analyst writing for serious Indian retail investors. Write 2-3 sentences maximum. Be direct. Reference specific numbers. If trend is bad, say so. Never use filler phrases. End with what the investor should watch next. Format your response using markdown bolding (**bold**) for key numbers, percentages, and important trends.";
+    const response = isGeminiAvailable()
+      ? await runGeminiText({ system: soWhatSystem, user: prompt })
+      : await runGroqText({ system: soWhatSystem, user: prompt, model: GROQ_MODELS.quick });
 
     const resultText = response.text || "Analysis unavailable.";
 
diff --git a/lib/agents/orchestrator.ts b/lib/agents/orchestrator.ts
@@ -54,19 +54,16 @@ export async function processAgentRun(agentRunId: string) {
     }
   });
 
+  let state: Awaited<ReturnType<typeof runResearchRuntimeV2>> | null = null;
   try {
-    const state = await runResearchRuntimeV2({
+    state = await runResearchRuntimeV2({
       runId: run.id,
       companyId: run.companyId,
       ticker: run.company.ticker,
       companyName: run.company.name,
       mission: run.mission,
       eventType: inferEventType(run.mission)
     });
-
-    await persistRuntimeResultV2(state);
-    await handlePostAgentRun(state);
-    await ensureDefaultMonitors(state.ticker, state.runId);
   } catch (error) {
     await prisma.agentRun.update({
       where: { id: run.id },
@@ -78,4 +75,15 @@ export async function processAgentRun(agentRunId: string) {
     });
     throw error;
   }
+
+  // Post-graph persistence — errors here should not mark the run as ERROR
+  await persistRuntimeResultV2(state).catch((err) =>
+    console.error(`[ORCHESTRATOR] persistRuntimeResultV2 failed for ${run.id}:`, err instanceof Error ? err.message : err)
+  );
+  await handlePostAgentRun(state).catch((err) =>
+    console.error(`[ORCHESTRATOR] handlePostAgentRun failed for ${run.id}:`, err instanceof Error ? err.message : err)
+  );
+  await ensureDefaultMonitors(state.ticker, state.runId).catch((err) =>
+    console.error(`[ORCHESTRATOR] ensureDefaultMonitors failed for ${run.id}:`, err instanceof Error ? err.message : err)
+  );
 }
diff --git a/lib/agents/runtime/agents.v2.ts b/lib/agents/runtime/agents.v2.ts
@@ -1,4 +1,5 @@
 import { z } from "zod";
+import { runGeminiJson, runGeminiToolLoopJson, runGeminiText, isGeminiAvailable, GEMINI_MODEL } from "@/lib/ai/gemini";
 import { GROQ_MODELS, runGroqJson, runGroqToolLoopJson } from "@/lib/ai/groq";
 import {
   agentOutputSchema,
@@ -126,14 +127,7 @@ function createReActAgent(config: {
         null,
         2
       ).slice(0, 8000);
-      try {
-        const response = await withAgentTimeout(`${config.id}`, 180000, () =>
-          runGroqToolLoopJson<AgentOutput>({
-            model: config.model ?? GROQ_MODELS.investigator,
-            tools: getRuntimeToolDefinitions(config.tools),
-            executeTool: executeRuntimeTool,
-            maxToolCalls: Math.min(config.maxIterations, 6),
-            system: `${config.system}
+      const agentSystem = `${config.system}
 Allowed tools (use exact names only): ${config.tools.join(", ")}.
 Return strict JSON matching exactly this contract:
 {
@@ -147,9 +141,26 @@ Return strict JSON matching exactly this contract:
   "claims": ["string"],
   "flags": [{"type": "string", "severity": "HIGH", "evidenceId": "string", "confidence": 0.9}],
   "metrics": {}
-}`,
-            user: slimUser
-          })
+}`;
+      try {
+        const response = await withAgentTimeout(`${config.id}`, 180000, () =>
+          isGeminiAvailable()
+            ? runGeminiToolLoopJson<AgentOutput>({
+                model: GEMINI_MODEL,
+                tools: getRuntimeToolDefinitions(config.tools),
+                executeTool: executeRuntimeTool,
+                maxToolCalls: Math.min(config.maxIterations, 6),
+                system: agentSystem,
+                user: slimUser
+              })
+            : runGroqToolLoopJson<AgentOutput>({
+                model: config.model ?? GROQ_MODELS.investigator,
+                tools: getRuntimeToolDefinitions(config.tools),
+                executeTool: executeRuntimeTool,
+                maxToolCalls: Math.min(config.maxIterations, 6),
+                system: agentSystem,
+                user: slimUser
+              })
         );
 
         const parsed = agent.outputSchema.safeParse(response.parsed);
@@ -190,10 +201,7 @@ export const triageAgent: RuntimeAgent<RuntimeAgentInput, TriageOutput> = {
   maxIterations: 1,
   async run(ctx) {
     const input = { ticker: ctx.state.ticker, eventType: ctx.state.eventType, mission: ctx.state.mission };
-    const response = await withAgentTimeout("TRIAGE", 30000, () =>
-      runGroqJson<TriageOutput>({
-        model: GROQ_MODELS.triage,
-        system: `You are TRIAGE for an Indian smallcap forensics platform.
+    const triageSystem = `You are TRIAGE for an Indian smallcap forensics platform.
 
 Decide investigation depth and pick agents:
 - SEBI_ACTION / AUDITOR_RESIGNATION / PROMOTER_PLEDGE_CHANGE → requiredRuntimeDepth: "DEEP", executionPlan: ["INVESTIGATOR", "NETWORK_REASONER"]
@@ -206,9 +214,11 @@ Set signalSeverity based on eventType:
 - MANUAL_MISSION = "MEDIUM"
 
 confidenceFloor: always 0.55 (lower threshold allows agents to proceed with available data).
-Return strict JSON.`,
-        user: JSON.stringify(input)
-      })
+Return strict JSON.`;
+    const response = await withAgentTimeout("TRIAGE", 30000, () =>
+      isGeminiAvailable()
+        ? runGeminiJson<TriageOutput>({ system: triageSystem, user: JSON.stringify(input) })
+        : runGroqJson<TriageOutput>({ model: GROQ_MODELS.triage, system: triageSystem, user: JSON.stringify(input) })
     );
 
     const parsed = this.outputSchema.safeParse(response.parsed);
@@ -242,10 +252,8 @@ export const runtimeAgentsV2 = {
     tools: [
       "fetchQuarterRange",
       "fetchPromoterHoldingHistory",
-      "fetchPledgeHistory",
       "fetchAuditorChanges",
       "fetchSEBIActions",
-      "fetchNewsTimeline",
       "detectTrend",
       "queryMemory",
     ],
@@ -332,10 +340,7 @@ export const devilsAdvocateAgent: RuntimeAgent<RuntimeAgentInput, DevilsAdvocate
       ])
     );
     const input = { ticker: ctx.state.ticker, outputs: slimOutputs };
-    const response = await withAgentTimeout("DEVILS_ADVOCATE", 45000, () =>
-      runGroqJson<DevilsAdvocateOutput>({
-        model: GROQ_MODELS.devilsAdvocate,
-        system: `You are DEVILS_ADVOCATE for an Indian smallcap forensics platform.
+    const daSystem = `You are DEVILS_ADVOCATE for an Indian smallcap forensics platform.
 
 Your job:
 1. Review all agent outputs for internal contradictions or unsupported claims
@@ -350,9 +355,11 @@ IMPORTANT RULES:
 - overallConfidence = weighted average of agent confidences, minimum 0.45
 - If one agent found a CRITICAL flag, overall confidence should be >= 0.65
 
-Return strict JSON.`,
-        user: JSON.stringify(input).slice(0, 7000)
-      })
+Return strict JSON.`;
+    const response = await withAgentTimeout("DEVILS_ADVOCATE", 45000, () =>
+      isGeminiAvailable()
+        ? runGeminiJson<DevilsAdvocateOutput>({ system: daSystem, user: JSON.stringify(input).slice(0, 7000) })
+        : runGroqJson<DevilsAdvocateOutput>({ model: GROQ_MODELS.devilsAdvocate, system: daSystem, user: JSON.stringify(input).slice(0, 7000) })
     );
     const parsed = this.outputSchema.safeParse(response.parsed);
     const tokensUsed = response.usage?.total_tokens ?? 0;
@@ -398,10 +405,7 @@ export const scribeAgent: RuntimeAgent<RuntimeAgentInput, ScribeOutput> = {
       ])
     );
     const input = { ticker: ctx.state.ticker, companyName: ctx.state.companyName, outputs: slimOutputs };
-    const response = await withAgentTimeout("SCRIBE", 45000, () =>
-      runGroqJson<ScribeOutput>({
-        model: GROQ_MODELS.scribe,
-        system: `You are a senior equity analyst writing for serious Indian retail investors.
+    const scribeSystem = `You are a senior equity analyst writing for serious Indian retail investors.
 
 Write a definitive investment memo based on all prior agent findings.
 
@@ -420,9 +424,11 @@ REQUIRED CONTENT:
 - unresolvedRisks: items that need monitoring
 
 AVOID: Generic phrases like "company shows mixed signals" or "further analysis needed". Be specific and cite numbers.
-Return strict JSON matching the required schema.`,
-        user: JSON.stringify(input).slice(0, 7000)
-      })
+Return strict JSON matching the required schema.`;
+    const response = await withAgentTimeout("SCRIBE", 45000, () =>
+      isGeminiAvailable()
+        ? runGeminiJson<ScribeOutput>({ system: scribeSystem, user: JSON.stringify(input).slice(0, 7000) })
+        : runGroqJson<ScribeOutput>({ model: GROQ_MODELS.scribe, system: scribeSystem, user: JSON.stringify(input).slice(0, 7000) })
     );
     const parsed = this.outputSchema.safeParse(response.parsed);
     const tokensUsed = response.usage?.total_tokens ?? 0;
diff --git a/lib/agents/runtime/graph.v2.ts b/lib/agents/runtime/graph.v2.ts
@@ -281,8 +281,9 @@ async function parallelAgentMeshNode(state: RuntimeState) {
   for (const agentId of planned) {
     await pauseBetweenAgentCalls();
     const agentStart = Date.now();
-    const step = await createRunningStep(state, agentId);
+    let step: Awaited<ReturnType<typeof createRunningStep>> | null = null;
     try {
+      step = await createRunningStep(state, agentId);
       const agent =
         agentId === "NETWORK_REASONER"
           ? runtimeAgentsV2.NETWORK_REASONER
@@ -306,7 +307,7 @@ async function parallelAgentMeshNode(state: RuntimeState) {
       totalTools += output.toolCallCount ?? 0;
       results.push({ agentId, output, latencyMs: Date.now() - agentStart });
     } catch (error) {
-      await failStep(step.id, error);
+      if (step) await failStep(step.id, error).catch(() => {});
       const errMsg = error instanceof Error ? error.message : `${agentId} failed`;
       console.error(`[PARALLEL_AGENT_MESH] ${agentId} failed, using fallback:`, errMsg);
       const fallback = {
@@ -330,7 +331,7 @@ async function parallelAgentMeshNode(state: RuntimeState) {
       state.ticker,
       state.runId,
       investigatorOutputs
-    );
+    ).catch((err) => console.error("[PARALLEL_AGENT_MESH] syncClaimsFromAgentOutputs failed:", err instanceof Error ? err.message : err));
   }
 
   return {
@@ -394,12 +395,13 @@ async function rerunInvestigatorsNode(state: RuntimeState) {
     await pauseBetweenAgentCalls();
     const agentStart = Date.now();
     const reason = requests.find((request) => request.agent === agentId)?.reason ?? null;
-    const step = await createRunningStep(
-      state,
-      agentId,
-      `Rerun requested: ${reason ?? "confidence reinforcement"}`
-    );
+    let step: Awaited<ReturnType<typeof createRunningStep>> | null = null;
     try {
+      step = await createRunningStep(
+        state,
+        agentId,
+        `Rerun requested: ${reason ?? "confidence reinforcement"}`
+      );
       const agent =
         agentId === "NETWORK_REASONER"
           ? runtimeAgentsV2.NETWORK_REASONER
@@ -423,7 +425,7 @@ async function rerunInvestigatorsNode(state: RuntimeState) {
       totalTools += output.toolCallCount ?? 0;
       results.push({ agentId, output, latencyMs: Date.now() - agentStart });
     } catch (error) {
-      await failStep(step.id, error);
+      if (step) await failStep(step.id, error).catch(() => {});
       const errMsg = error instanceof Error ? error.message : `${agentId} rerun failed`;
       console.error(`[RERUN_INVESTIGATORS] ${agentId} failed, using fallback:`, errMsg);
       const fallback = {
@@ -447,7 +449,7 @@ async function rerunInvestigatorsNode(state: RuntimeState) {
       state.ticker,
       state.runId,
       investigatorOutputs
-    );
+    ).catch((err) => console.error("[RERUN_INVESTIGATORS] syncClaimsFromAgentOutputs failed:", err instanceof Error ? err.message : err));
   }
 
   return {
diff --git a/lib/ai/gemini.ts b/lib/ai/gemini.ts
diff --git a/lib/ai/groq.ts b/lib/ai/groq.ts