Skip to content

Commit 3030547

Browse files
authored
feat: force LLM output language from UI locale (#26)
Previously the main chat system prompt was majority-Chinese prose and the suggestion/title prompts relied on soft "match the anchor's language" rules. Free-tier summarizer models (llama-3.1-8b etc.) routinely ignored those directives, so English sessions drifted into Chinese titles/suggestions and occasionally Chinese assistant replies. Replace the soft rules with a single shared directive: - _lang_directive(lang) prepends a one-line demonstration in the target language (strongest steering signal for small models) plus an English MUST-directive (strongest signal for larger chat models). - chat_stream / generate_title_and_suggestions / summarize / merge_summary / assess_relevance / merge_threads all accept a `lang` param and inject the directive into their system message. - Chinese prose removed from the chat_stream system prompt and from context_builder labels; everything the LLM sees is English except the single target-language demo line. The UI locale is the single source of truth. Plumb it through: ChatRequest.lang → stream.py → stream_manager → chat_stream. CreateThreadRequest.lang → threads.py → generate_title_and_suggestions. GET /threads/:id/suggest accepts ?lang=. Frontend callers pass useLangStore.getState().lang. Tests: - New TestLangDirective, TestChatStreamSystemPrompt, TestSummarizerLangDirective classes assert the directive is injected correctly and guard against non-Latin prose leaking back into the default prompt. - Update test_context_builder's _messages_to_text assertions for the new English "User:/AI:" labels and the English long-text placeholder.
1 parent 5af89c7 commit 3030547

13 files changed

Lines changed: 404 additions & 139 deletions

File tree

backend/models/message.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ class ChatRequest(BaseModel):
2020
# Passed by the frontend when the user has just uploaded a file, so RAG prioritizes its chunks.
2121
attachment_filename: str | None = None
2222

23+
# Current frontend UI locale; forces the output language of the assistant reply,
24+
# the META-block summary/title, and any fallback summarization that runs afterwards.
25+
lang: str | None = None
26+
2327

2428
class Message(BaseModel):
2529
"""

backend/models/thread.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ class CreateThreadRequest(BaseModel):
3333
# Depth passed directly from the frontend to avoid an extra DB round-trip
3434
depth: int | None = None
3535

36+
# Current frontend UI locale; forces the output language of the LLM-generated
37+
# title and suggested follow-up questions for this sub-thread.
38+
lang: str | None = None
39+
3640

3741
class Thread(BaseModel):
3842
"""

backend/routers/stream.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ async def chat(
9595
body.attachment_filename,
9696
thread_meta=thread_meta,
9797
session_id=session_id,
98+
lang=body.lang,
9899
),
99100
media_type="text/event-stream",
100101
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},

backend/routers/threads.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ async def create_thread(body: CreateThreadRequest, auth=Depends(get_current_user
9898
thread_id_str,
9999
body.anchor_text,
100100
str(body.anchor_message_id) if body.anchor_message_id else None,
101+
lang=body.lang,
101102
)
102103
)
103104

@@ -108,6 +109,7 @@ async def _generate_and_patch(
108109
thread_id: str,
109110
anchor_text: str,
110111
anchor_message_id: str | None,
112+
lang: str | None = None,
111113
) -> None:
112114
"""
113115
Background task: fetch the full message containing the anchor → generate title and suggestions → write back to DB.
@@ -135,7 +137,7 @@ async def _generate_and_patch(
135137

136138
llm_ok = True
137139
try:
138-
title, suggestions = await generate_title_and_suggestions(anchor_text, context_summary)
140+
title, suggestions = await generate_title_and_suggestions(anchor_text, context_summary, lang=lang)
139141
except Exception as e:
140142
# LLM failed: use first 20 chars of anchor as title; no suggestions
141143
llm_ok = False
@@ -160,7 +162,11 @@ async def _generate_and_patch(
160162

161163

162164
@router.get("/threads/{thread_id}/suggest")
163-
async def suggest_questions(thread_id: uuid.UUID, auth=Depends(get_current_user)):
165+
async def suggest_questions(
166+
thread_id: uuid.UUID,
167+
lang: str | None = None,
168+
auth=Depends(get_current_user),
169+
):
164170
"""
165171
Return suggested follow-up questions for a sub-thread (up to 3) plus the
166172
LLM-generated title.
@@ -248,7 +254,7 @@ async def suggest_questions(thread_id: uuid.UUID, auth=Depends(get_current_user)
248254

249255
sync_llm_ok = True
250256
try:
251-
new_title, questions = await generate_title_and_suggestions(anchor, context_summary)
257+
new_title, questions = await generate_title_and_suggestions(anchor, context_summary, lang=lang)
252258
if new_title:
253259
title = new_title
254260
except Exception as e:

backend/services/context_builder.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ def _messages_to_text(messages: list[dict]) -> str:
5252
"""
5353
lines = []
5454
for m in messages:
55-
role_label = "用户" if m["role"] == "user" else "AI"
56-
lines.append(f"{role_label}{m['content']}")
55+
role_label = "User" if m["role"] == "user" else "AI"
56+
lines.append(f"{role_label}: {m['content']}")
5757
return "\n".join(lines)
5858

5959

@@ -85,9 +85,10 @@ def _trim_context(messages: list[dict]) -> list[dict]:
8585
if len(m["content"]) > _MAX_SINGLE_MSG_CHARS:
8686
char_len = len(m["content"])
8787
placeholder = (
88-
f"[用户提供了长文本,共 {char_len} 字,已分块建立向量索引。"
89-
f"相关段落已由系统上下文注入,请根据上方 system 消息中的内容回答。"
90-
f"文本开头供参考:{m['content'][:200]}…]"
88+
f"[The user provided a long text of {char_len} characters; it has been chunked "
89+
f"and indexed. Relevant passages have been injected by the system context — "
90+
f"answer using the system messages above. Opening excerpt for reference: "
91+
f"{m['content'][:200]}…]"
9192
)
9293
m = {**m, "content": placeholder}
9394
result.append(m)
@@ -107,12 +108,18 @@ def _trim_context(messages: list[dict]) -> list[dict]:
107108
# _db is imported from db.supabase.run_db at the top; name preserved for test compatibility.
108109

109110

110-
async def _get_or_create_summary(thread_id: str, token_budget: int) -> str:
111+
async def _get_or_create_summary(
112+
thread_id: str,
113+
token_budget: int,
114+
lang: str | None = None,
115+
) -> str:
111116
"""
112117
Read the cached thread summary; if missing, generate it from scratch and write it to the DB.
113118
114119
In normal operation summaries are maintained by stream_manager at write time.
115120
This function only triggers full generation for historical data migration or first access (fallback path).
121+
122+
`lang` forces the output language of the summary when a new one is generated here.
116123
"""
117124
# Check cache
118125
cached = await _db(
@@ -141,7 +148,7 @@ async def _get_or_create_summary(thread_id: str, token_budget: int) -> str:
141148
if not messages:
142149
return ""
143150

144-
summary_text = await summarize(_messages_to_text(messages), token_budget)
151+
summary_text = await summarize(_messages_to_text(messages), token_budget, lang=lang)
145152

146153
await _db(
147154
lambda: get_supabase().table("thread_summaries").upsert({
@@ -159,13 +166,17 @@ async def build_context(
159166
thread_id: str,
160167
query_text: str = "",
161168
prefer_filename: str | None = None,
169+
lang: str | None = None,
162170
) -> list[dict]:
163171
"""
164172
Build the AI messages list for the specified thread.
165173
166174
query_text: The latest user message, used for RAG retrieval (passed in by stream_manager).
167175
168176
prefer_filename: Prefer RAG chunks from this file (passed when the user just uploaded it).
177+
178+
lang: UI locale forwarded to any lazy summary generation so historical
179+
cache-miss fallbacks also respect the user's language.
169180
"""
170181
thread_result = await _db(
171182
lambda: get_supabase().table("threads").select("*").eq("id", thread_id).maybe_single().execute(),
@@ -216,11 +227,14 @@ async def build_context(
216227
# Inject the main thread summary as compressed background when history exceeds the window
217228
summary_prefix: list[dict] = []
218229
if total > _THREAD_MSG_LIMIT:
219-
summary = await _get_or_create_summary(thread_id, _budget_for_depth(0))
230+
summary = await _get_or_create_summary(thread_id, _budget_for_depth(0), lang=lang)
220231
if summary:
221232
summary_prefix = [{
222233
"role": "system",
223-
"content": f"[对话历史摘要(第 {_THREAD_MSG_LIMIT + 1} 条之前)]\n{summary}",
234+
"content": (
235+
f"[Conversation history summary (older than the last {_THREAD_MSG_LIMIT} messages)]\n"
236+
f"{summary}"
237+
),
224238
}]
225239

226240
return _trim_context(summary_prefix + rag_items + history)
@@ -255,7 +269,7 @@ async def build_context(
255269

256270
# Concurrently fetch all ancestor summaries
257271
summaries: list[str] = await asyncio.gather(*[
258-
_get_or_create_summary(anc["id"], budget)
272+
_get_or_create_summary(anc["id"], budget, lang=lang)
259273
for anc, budget in zip(ancestors_root_first, budgets)
260274
])
261275

@@ -265,15 +279,22 @@ async def build_context(
265279
if not summary:
266280
continue
267281
depth_from_root = i
268-
label = "主线对话摘要" if depth_from_root == 0 else f"第 {depth_from_root} 层子线程摘要"
282+
label = (
283+
"Main-thread summary"
284+
if depth_from_root == 0
285+
else f"Sub-thread summary (depth {depth_from_root})"
286+
)
269287
prefix.append({"role": "system", "content": f"[{label}]\n{summary}"})
270288

271289
# Anchor text (kept in full; it is the core reference for the sub-thread)
272290
anchor = thread.get("anchor_text", "")
273291
if anchor:
274292
prefix.append({
275293
"role": "system",
276-
"content": f'用户在上述对话中选中了以下内容并提出追问,请围绕这段内容回答:\n"{anchor}"',
294+
"content": (
295+
"The user selected the following span in the conversation above and is asking a "
296+
f'follow-up about it. Focus your answer on this span:\n"{anchor}"'
297+
),
277298
})
278299

279300
# Most recent N messages in the current sub-thread (fetched desc, then reversed for chronological order)

0 commit comments

Comments
 (0)