zizhaof
diff --git a/‎backend/models/message.py‎
Lines changed: 4 additions & 0 deletions b/‎backend/models/message.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backend/models/thread.py‎
Lines changed: 4 additions & 0 deletions b/‎backend/models/thread.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backend/routers/stream.py‎
Lines changed: 1 addition & 0 deletions b/‎backend/routers/stream.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/routers/threads.py‎
Lines changed: 9 additions & 3 deletions b/‎backend/routers/threads.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎backend/services/context_builder.py‎
Lines changed: 33 additions & 12 deletions b/‎backend/services/context_builder.py‎
Lines changed: 33 additions & 12 deletions
@@ -20,6 +20,10 @@ class ChatRequest(BaseModel):
     # Passed by the frontend when the user has just uploaded a file, so RAG prioritizes its chunks.
     attachment_filename: str | None = None
 
+    # Current frontend UI locale; forces the output language of the assistant reply,
+    # the META-block summary/title, and any fallback summarization that runs afterwards.
+    lang: str | None = None
+
 
 class Message(BaseModel):
     """
 
@@ -33,6 +33,10 @@ class CreateThreadRequest(BaseModel):
     # Depth passed directly from the frontend to avoid an extra DB round-trip
     depth: int | None = None
 
+    # Current frontend UI locale; forces the output language of the LLM-generated
+    # title and suggested follow-up questions for this sub-thread.
+    lang: str | None = None
+
 
 class Thread(BaseModel):
     """
 
@@ -95,6 +95,7 @@ async def chat(
             body.attachment_filename,
             thread_meta=thread_meta,
             session_id=session_id,
+            lang=body.lang,
         ),
         media_type="text/event-stream",
         headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
 
@@ -98,6 +98,7 @@ async def create_thread(body: CreateThreadRequest, auth=Depends(get_current_user
                 thread_id_str,
                 body.anchor_text,
                 str(body.anchor_message_id) if body.anchor_message_id else None,
+                lang=body.lang,
             )
         )
 
@@ -108,6 +109,7 @@ async def _generate_and_patch(
     thread_id: str,
     anchor_text: str,
     anchor_message_id: str | None,
+    lang: str | None = None,
 ) -> None:
     """
     Background task: fetch the full message containing the anchor → generate title and suggestions → write back to DB.
@@ -135,7 +137,7 @@ async def _generate_and_patch(
 
     llm_ok = True
     try:
-        title, suggestions = await generate_title_and_suggestions(anchor_text, context_summary)
+        title, suggestions = await generate_title_and_suggestions(anchor_text, context_summary, lang=lang)
     except Exception as e:
         # LLM failed: use first 20 chars of anchor as title; no suggestions
         llm_ok = False
@@ -160,7 +162,11 @@ async def _generate_and_patch(
 
 
 @router.get("/threads/{thread_id}/suggest")
-async def suggest_questions(thread_id: uuid.UUID, auth=Depends(get_current_user)):
+async def suggest_questions(
+    thread_id: uuid.UUID,
+    lang: str | None = None,
+    auth=Depends(get_current_user),
+):
     """
     Return suggested follow-up questions for a sub-thread (up to 3) plus the
     LLM-generated title.
@@ -248,7 +254,7 @@ async def suggest_questions(thread_id: uuid.UUID, auth=Depends(get_current_user)
 
     sync_llm_ok = True
     try:
-        new_title, questions = await generate_title_and_suggestions(anchor, context_summary)
+        new_title, questions = await generate_title_and_suggestions(anchor, context_summary, lang=lang)
         if new_title:
             title = new_title
     except Exception as e:
 
@@ -52,8 +52,8 @@ def _messages_to_text(messages: list[dict]) -> str:
     """
     lines = []
     for m in messages:
-        role_label = "用户" if m["role"] == "user" else "AI"
-        lines.append(f"{role_label}：{m['content']}")
+        role_label = "User" if m["role"] == "user" else "AI"
+        lines.append(f"{role_label}: {m['content']}")
     return "\n".join(lines)
 
 
@@ -85,9 +85,10 @@ def _trim_context(messages: list[dict]) -> list[dict]:
             if len(m["content"]) > _MAX_SINGLE_MSG_CHARS:
                 char_len = len(m["content"])
                 placeholder = (
-                    f"[用户提供了长文本，共 {char_len} 字，已分块建立向量索引。"
-                    f"相关段落已由系统上下文注入，请根据上方 system 消息中的内容回答。"
-                    f"文本开头供参考：{m['content'][:200]}…]"
+                    f"[The user provided a long text of {char_len} characters; it has been chunked "
+                    f"and indexed. Relevant passages have been injected by the system context — "
+                    f"answer using the system messages above. Opening excerpt for reference: "
+                    f"{m['content'][:200]}…]"
                 )
                 m = {**m, "content": placeholder}
         result.append(m)
@@ -107,12 +108,18 @@ def _trim_context(messages: list[dict]) -> list[dict]:
 # _db is imported from db.supabase.run_db at the top; name preserved for test compatibility.
 
 
-async def _get_or_create_summary(thread_id: str, token_budget: int) -> str:
+async def _get_or_create_summary(
+    thread_id: str,
+    token_budget: int,
+    lang: str | None = None,
+) -> str:
     """
     Read the cached thread summary; if missing, generate it from scratch and write it to the DB.
 
     In normal operation summaries are maintained by stream_manager at write time.
     This function only triggers full generation for historical data migration or first access (fallback path).
+
+    `lang` forces the output language of the summary when a new one is generated here.
     """
     # Check cache
     cached = await _db(
@@ -141,7 +148,7 @@ async def _get_or_create_summary(thread_id: str, token_budget: int) -> str:
     if not messages:
         return ""
 
-    summary_text = await summarize(_messages_to_text(messages), token_budget)
+    summary_text = await summarize(_messages_to_text(messages), token_budget, lang=lang)
 
     await _db(
         lambda: get_supabase().table("thread_summaries").upsert({
@@ -159,13 +166,17 @@ async def build_context(
     thread_id: str,
     query_text: str = "",
     prefer_filename: str | None = None,
+    lang: str | None = None,
 ) -> list[dict]:
     """
     Build the AI messages list for the specified thread.
 
     query_text: The latest user message, used for RAG retrieval (passed in by stream_manager).
 
     prefer_filename: Prefer RAG chunks from this file (passed when the user just uploaded it).
+
+    lang: UI locale forwarded to any lazy summary generation so historical
+    cache-miss fallbacks also respect the user's language.
     """
     thread_result = await _db(
         lambda: get_supabase().table("threads").select("*").eq("id", thread_id).maybe_single().execute(),
@@ -216,11 +227,14 @@ async def build_context(
         # Inject the main thread summary as compressed background when history exceeds the window
         summary_prefix: list[dict] = []
         if total > _THREAD_MSG_LIMIT:
-            summary = await _get_or_create_summary(thread_id, _budget_for_depth(0))
+            summary = await _get_or_create_summary(thread_id, _budget_for_depth(0), lang=lang)
             if summary:
                 summary_prefix = [{
                     "role": "system",
-                    "content": f"[对话历史摘要（第 {_THREAD_MSG_LIMIT + 1} 条之前）]\n{summary}",
+                    "content": (
+                        f"[Conversation history summary (older than the last {_THREAD_MSG_LIMIT} messages)]\n"
+                        f"{summary}"
+                    ),
                 }]
 
         return _trim_context(summary_prefix + rag_items + history)
@@ -255,7 +269,7 @@ async def build_context(
 
     # Concurrently fetch all ancestor summaries
     summaries: list[str] = await asyncio.gather(*[
-        _get_or_create_summary(anc["id"], budget)
+        _get_or_create_summary(anc["id"], budget, lang=lang)
         for anc, budget in zip(ancestors_root_first, budgets)
     ])
 
@@ -265,15 +279,22 @@ async def build_context(
         if not summary:
             continue
         depth_from_root = i
-        label = "主线对话摘要" if depth_from_root == 0 else f"第 {depth_from_root} 层子线程摘要"
+        label = (
+            "Main-thread summary"
+            if depth_from_root == 0
+            else f"Sub-thread summary (depth {depth_from_root})"
+        )
         prefix.append({"role": "system", "content": f"[{label}]\n{summary}"})
 
     # Anchor text (kept in full; it is the core reference for the sub-thread)
     anchor = thread.get("anchor_text", "")
     if anchor:
         prefix.append({
             "role": "system",
-            "content": f'用户在上述对话中选中了以下内容并提出追问，请围绕这段内容回答：\n"{anchor}"',
+            "content": (
+                "The user selected the following span in the conversation above and is asking a "
+                f'follow-up about it. Focus your answer on this span:\n"{anchor}"'
+            ),
         })
 
     # Most recent N messages in the current sub-thread (fetched desc, then reversed for chronological order)