Skip to content

Commit 8e9e309

Browse files
review logic changed to achieve consensus accurately
1 parent d9453ba commit 8e9e309

5 files changed

Lines changed: 234 additions & 52 deletions

File tree

__tests__/review-orchestrator.test.ts

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { ReviewOrchestrator } from "../src/review/review-orchestrator";
22
import { ReviewerOpinion } from "../src/utils/types";
33

44
describe("ReviewOrchestrator", () => {
5-
it("treats a 2-of-3 reviewer agreement as consensus", () => {
5+
it("requires unanimous agreement (all 3) for consensus in early rounds", () => {
66
const orchestrator = new ReviewOrchestrator("test-key", {
77
reviewerModels: [
88
"gemini-2.5-flash",
@@ -52,6 +52,64 @@ describe("ReviewOrchestrator", () => {
5252
}
5353
).evaluateConsensus(opinions);
5454

55+
// With 2-of-3 agreement (not unanimous), consensus is NOT achieved
56+
expect(result).toEqual({
57+
isConsensus: false,
58+
decision: "COMMENT",
59+
});
60+
});
61+
62+
it("achieves consensus when all 3 reviewers agree", () => {
63+
const orchestrator = new ReviewOrchestrator("test-key", {
64+
reviewerModels: [
65+
"gemini-2.5-flash",
66+
"gemini-2.5-flash-lite",
67+
"gemini-2.0-flash",
68+
],
69+
judgeModel: "gemini-2.5-pro",
70+
maxConsensusRounds: 3,
71+
});
72+
73+
const opinions: ReviewerOpinion[] = [
74+
{
75+
reviewerId: "Reviewer_1",
76+
modelName: "gemini-2.5-flash",
77+
decision: "APPROVE",
78+
reasoning: "Looks good.",
79+
findings: [],
80+
summary: "Summary",
81+
timestamp: new Date().toISOString(),
82+
},
83+
{
84+
reviewerId: "Reviewer_2",
85+
modelName: "gemini-2.5-flash-lite",
86+
decision: "APPROVE",
87+
reasoning: "Looks good.",
88+
findings: [],
89+
summary: "Summary",
90+
timestamp: new Date().toISOString(),
91+
},
92+
{
93+
reviewerId: "Reviewer_3",
94+
modelName: "gemini-2.0-flash",
95+
decision: "APPROVE",
96+
reasoning: "No issues found.",
97+
findings: [],
98+
summary: "Summary",
99+
timestamp: new Date().toISOString(),
100+
},
101+
];
102+
103+
const result = (
104+
orchestrator as unknown as {
105+
evaluateConsensus: (input: ReviewerOpinion[]) => {
106+
isConsensus: boolean;
107+
decision: string;
108+
};
109+
}
110+
).evaluateConsensus(opinions);
111+
112+
// All 3 agree on APPROVE
55113
expect(result).toEqual({
56114
isConsensus: true,
57115
decision: "APPROVE",

dist/index.js

Lines changed: 76 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1370,16 +1370,27 @@ RESPOND ONLY WITH THE JSON OBJECT. NO OTHER TEXT.`;
13701370
extractResponseText(response) {
13711371
if (this.provider === "groq") {
13721372
const groqResponse = response;
1373-
const openAIText = groqResponse.choices
1374-
?.map((choice) => choice.text || choice.message?.content || "")
1375-
.join("");
1376-
if (openAIText) {
1377-
return openAIText.trim();
1373+
// Try OpenAI-compatible chat/completions format first
1374+
if (groqResponse.choices && Array.isArray(groqResponse.choices)) {
1375+
const openAIText = groqResponse.choices
1376+
.map((choice) => choice.text || choice.message?.content || "")
1377+
.join("")
1378+
.trim();
1379+
if (openAIText) {
1380+
return openAIText;
1381+
}
1382+
}
1383+
// Fallback to legacy Groq format
1384+
if (groqResponse.output && Array.isArray(groqResponse.output)) {
1385+
const groqText = groqResponse.output
1386+
.map((out) => out.content?.map((item) => item.text || "").join(""))
1387+
.join("")
1388+
.trim();
1389+
if (groqText) {
1390+
return groqText;
1391+
}
13781392
}
1379-
const groqText = groqResponse.output
1380-
?.map((out) => out.content?.map((item) => item.text || "").join(""))
1381-
.join("") || "";
1382-
return groqText.trim();
1393+
return "";
13831394
}
13841395
const geminiResponse = response;
13851396
const text = geminiResponse.candidates?.[0]?.content?.parts
@@ -2167,8 +2178,8 @@ class ReviewOrchestrator {
21672178
allRounds.push(round_data);
21682179
break; // Exit loop, consensus achieved
21692180
}
2170-
// Step 3: Opinions differ - call judge model
2171-
this.logger.warn(`❌ No consensus yet. Reviewers differ:${opinions
2181+
// Step 3: Opinions differ - call judge model to guide next round
2182+
this.logger.warn(`❌ No full consensus yet. Reviewers differ:${opinions
21722183
.map((o) => ` ${o.reviewerId}:${o.decision}`)
21732184
.join(",")}`);
21742185
const judgeDecision = await this.llmClient.callJudgeModel(this.judgeModel, opinions, context, round);
@@ -2181,19 +2192,41 @@ class ReviewOrchestrator {
21812192
needsRetry: round < this.maxConsensusRounds,
21822193
};
21832194
allRounds.push(round_data);
2184-
// If this is the last round or judge is confident, use judge's decision
2185-
if (round === this.maxConsensusRounds || judgeDecision.confidence > 0.8) {
2186-
finalDecision = judgeDecision.decision;
2187-
finalReasoning = judgeDecision.reasoning;
2188-
if (round === this.maxConsensusRounds) {
2189-
this.logger.warn(`⚠️ Max consensus rounds reached. Using judge's final decision.`);
2195+
// If this is the last round, use fallback strategy
2196+
if (round === this.maxConsensusRounds) {
2197+
this.logger.warn(`⚠️ Max consensus rounds reached. Applying fallback decision strategy...`);
2198+
// First, check if we can use judge's decision if confident
2199+
if (judgeDecision.confidence > 0.8) {
2200+
finalDecision = judgeDecision.decision;
2201+
finalReasoning = judgeDecision.reasoning;
2202+
this.logger.success(`✅ Using judge's high-confidence final decision: ${judgeDecision.decision}`);
21902203
}
21912204
else {
2192-
this.logger.success(`✅ Judge achieved high-confidence consensus in round ${round}`);
2205+
// Otherwise, fall back to majority consensus among reviewers
2206+
const majority = this.evaluateMajorityConsensus(opinions);
2207+
if (majority.hasMajority) {
2208+
finalDecision = majority.decision;
2209+
const agreeingReviewers = opinions.filter((o) => o.decision === majority.decision);
2210+
finalReasoning = `${agreeingReviewers.length} of 3 reviewers agreed on **${majority.decision}** after ${round} rounds (fallback to majority)`;
2211+
this.logger.success(`✅ Using majority consensus on final round: ${majority.decision}`);
2212+
}
2213+
else {
2214+
// Last resort: use judge's decision
2215+
finalDecision = judgeDecision.decision;
2216+
finalReasoning = judgeDecision.reasoning;
2217+
this.logger.warn(`⚠️ No majority found. Using judge's decision as final fallback: ${judgeDecision.decision}`);
2218+
}
21932219
}
21942220
break;
21952221
}
2196-
this.logger.warn(`⏳ Low judge confidence (${(judgeDecision.confidence * 100).toFixed(0)}%). Requesting another review round...`);
2222+
// Before final round: only stop if judge is very confident
2223+
if (judgeDecision.confidence > 0.9) {
2224+
finalDecision = judgeDecision.decision;
2225+
finalReasoning = judgeDecision.reasoning;
2226+
this.logger.success(`✅ Judge achieved very high-confidence consensus in round ${round}: ${judgeDecision.decision}`);
2227+
break;
2228+
}
2229+
this.logger.warn(`⏳ Judge confidence is ${(judgeDecision.confidence * 100).toFixed(0)}%. Requesting another review round to reach consensus...`);
21972230
}
21982231
// Step 4: Consolidate findings
21992232
const inlineFindings = this.consolidateFindings(allOpinions);
@@ -2254,12 +2287,33 @@ class ReviewOrchestrator {
22542287
/**
22552288
* Evaluate if consensus is achieved
22562289
*
2257-
* Consensus = a majority (2 of 3) reviewers agree on the same decision
2290+
* Consensus = ALL 3 reviewers agree on the same decision (unanimous).
2291+
* This ensures reviewers are truly on the same page before concluding.
22582292
*/
22592293
evaluateConsensus(opinions) {
22602294
if (opinions.length !== 3) {
22612295
return { isConsensus: false, decision: "COMMENT" };
22622296
}
2297+
// Check if all 3 reviewers have the same decision
2298+
const firstDecision = opinions[0].decision;
2299+
const allAgree = opinions.every((o) => o.decision === firstDecision);
2300+
if (allAgree) {
2301+
return {
2302+
isConsensus: true,
2303+
decision: firstDecision,
2304+
};
2305+
}
2306+
return { isConsensus: false, decision: "COMMENT" };
2307+
}
2308+
/**
2309+
* Evaluate if a majority consensus is reached (fallback for final round)
2310+
*
2311+
* Majority = 2 of 3 reviewers agree on the same decision
2312+
*/
2313+
evaluateMajorityConsensus(opinions) {
2314+
if (opinions.length !== 3) {
2315+
return { hasMajority: false, decision: "COMMENT" };
2316+
}
22632317
const decisionCounts = opinions.reduce((counts, opinion) => {
22642318
counts[opinion.decision] += 1;
22652319
return counts;
@@ -2271,11 +2325,11 @@ class ReviewOrchestrator {
22712325
const majorityEntry = Object.entries(decisionCounts).find(([, count]) => count >= 2);
22722326
if (majorityEntry) {
22732327
return {
2274-
isConsensus: true,
2328+
hasMajority: true,
22752329
decision: majorityEntry[0],
22762330
};
22772331
}
2278-
return { isConsensus: false, decision: "COMMENT" };
2332+
return { hasMajority: false, decision: "COMMENT" };
22792333
}
22802334
/**
22812335
* Format reasoning when a consensus is reached

dist/index.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/llm/llm-client.ts

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -702,18 +702,29 @@ RESPOND ONLY WITH THE JSON OBJECT. NO OTHER TEXT.`;
702702
private extractResponseText(response: LLMResponse): string {
703703
if (this.provider === "groq") {
704704
const groqResponse = response as GroqResponse;
705-
const openAIText = groqResponse.choices
706-
?.map((choice) => choice.text || choice.message?.content || "")
707-
.join("");
705+
// Try OpenAI-compatible chat/completions format first
706+
if (groqResponse.choices && Array.isArray(groqResponse.choices)) {
707+
const openAIText = groqResponse.choices
708+
.map((choice) => choice.text || choice.message?.content || "")
709+
.join("")
710+
.trim();
711+
if (openAIText) {
712+
return openAIText;
713+
}
714+
}
708715

709-
if (openAIText) {
710-
return openAIText.trim();
716+
// Fallback to legacy Groq format
717+
if (groqResponse.output && Array.isArray(groqResponse.output)) {
718+
const groqText = groqResponse.output
719+
.map((out) => out.content?.map((item) => item.text || "").join(""))
720+
.join("")
721+
.trim();
722+
if (groqText) {
723+
return groqText;
724+
}
711725
}
712726

713-
const groqText = groqResponse.output
714-
?.map((out) => out.content?.map((item) => item.text || "").join(""))
715-
.join("") || "";
716-
return groqText.trim();
727+
return "";
717728
}
718729

719730
const geminiResponse = response as GeminiResponse;
@@ -853,8 +864,8 @@ RESPOND ONLY WITH THE JSON OBJECT. NO OTHER TEXT.`;
853864
const models = Array.isArray(data.data)
854865
? (data.data as Array<Record<string, unknown>>)
855866
: Array.isArray(data.models)
856-
? (data.models as Array<Record<string, unknown>>)
857-
: [];
867+
? (data.models as Array<Record<string, unknown>>)
868+
: [];
858869

859870
for (const model of models) {
860871
const modelName = this.normalizeModelName(
@@ -872,7 +883,9 @@ RESPOND ONLY WITH THE JSON OBJECT. NO OTHER TEXT.`;
872883
FALLBACK_MODELS.groq.reviewer.forEach((model) =>
873884
availableModels.add(model)
874885
);
875-
FALLBACK_MODELS.groq.judge.forEach((model) => availableModels.add(model));
886+
FALLBACK_MODELS.groq.judge.forEach((model) =>
887+
availableModels.add(model)
888+
);
876889
}
877890

878891
this.availableGenerateContentModels = availableModels;

0 commit comments

Comments
 (0)