wananing
diff --git a/‎AGENTS.md‎
Lines changed: 37 additions & 0 deletions b/‎AGENTS.md‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎backend/agents/vision.py‎
Lines changed: 171 additions & 0 deletions b/‎backend/agents/vision.py‎
Lines changed: 171 additions & 0 deletions
@@ -0,0 +1,37 @@
+# Repository Guidelines
+
+## Project Structure & Module Organization
+
+This is a Smart Health Assistant with a Python FastAPI/LangGraph backend and React/Vite frontend. Backend lives in `backend/`: `main.py` is the API/SSE entrypoint, `agents/` contains the router and specialist agents, `skills/` contains auto-discovered tools, and `rag/` contains retrieval code plus knowledge documents in `rag/documents/`. Backend `test_*.py` files sit at its root.
+
+Frontend lives in `frontend/src/`: `screens/` are app views, `components/` contains domain UI, `store/` holds shared context, `services/` contains API clients, and `types/` defines shared contracts. Assets are in `frontend/public/` and `frontend/src/assets/`. Docs are under `docs/`, `backend/docs/`, and `frontend/docs/`.
+
+## Build, Test, and Development Commands
+
+- `cd backend && uv sync`: install Python 3.13 dependencies.
+- `cd backend && uv run python -m rag.ingest`: build the local RAG index; add `--rebuild` after document edits.
+- `cd backend && uv run uvicorn main:app --reload --port 8000`: start the API.
+- `cd backend && python -m pytest test_*.py`: run pytest-compatible backend tests.
+- `cd frontend && npm install`: install dependencies.
+- `cd frontend && npm run dev`: start Vite on `http://localhost:5173`.
+- `cd frontend && npm run build`: type-check and build assets.
+- `cd frontend && npm run lint`: run ESLint.
+- `cd frontend && node test_pw.cjs`: run the Playwright smoke check.
+
+## Coding Style & Naming Conventions
+
+Use 4-space indentation and type hints for Python. Keep agent modules named by role, such as `clinic.py` or `pharmacy.py`. Add skills as `backend/skills/<skill_name>/SKILL.md`, `skill.py`, and `__init__.py`. Use Pydantic models for schemas.
+
+Use TypeScript, React function components, and PascalCase filenames such as `ChatCardRenderer.tsx`. Keep shared types in `frontend/src/types/index.ts`, domain UI in matching component folders, and Tailwind/CSS aligned with the mobile-first design.
+
+## Testing Guidelines
+
+Backend tests may require `.env` values such as `ARK_API_KEY`. Prefer pytest-compatible `test_*.py` files for deterministic logic, and keep manual async checks executable with `python test_name.py`. For frontend changes, run `npm run lint` and `npm run build`; add Playwright checks for UI flow changes.
+
+## Commit & Pull Request Guidelines
+
+Recent history uses prefixes like `docs:`, `refactor:`, and `fix`, with occasional Chinese summaries. Keep commits focused and name the area. Pull requests should include the change, commands run, linked issues, and screenshots or recordings for visible mobile UI changes.
+
+## Security & Configuration Tips
+
+Do not commit `.env`, API keys, generated vector stores such as `backend/rag/chroma_db/`, virtual environments, or frontend build output. Keep CORS and service URLs aligned with local dev ports.
@@ -0,0 +1,171 @@
+"""
+Vision input adapter for report interpretation and pharmacy photo flows.
+
+This module keeps image recognition separate from medical/pharmacy reasoning:
+the Volcengine multimodal model extracts visible facts, then the existing
+report/pharmacy agents interpret those facts.
+"""
+from __future__ import annotations
+
+import base64
+import os
+import re
+from typing import Literal
+
+
+VisionScanType = Literal["report", "drug_box", "trace_code"]
+
+ALLOWED_IMAGE_TYPES = {"image/jpeg", "image/png", "image/webp"}
+MAX_IMAGE_BYTES = 8 * 1024 * 1024
+
+SCAN_TYPE_TO_AGENT = {
+    "report": "report_agent",
+    "drug_box": "pharmacy_agent",
+    "trace_code": "pharmacy_agent",
+}
+
+_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
+
+
+class VisionInputError(ValueError):
+    """Raised when an uploaded image or scan type is invalid."""
+
+
+def normalize_scan_type(scan_type: str) -> VisionScanType:
+    normalized = scan_type.strip().lower()
+    if normalized in SCAN_TYPE_TO_AGENT:
+        return normalized  # type: ignore[return-value]
+    raise VisionInputError("scan_type must be one of: report, drug_box, trace_code")
+
+
+def validate_image_upload(content_type: str | None, size_bytes: int) -> None:
+    if content_type not in ALLOWED_IMAGE_TYPES:
+        raise VisionInputError("Only JPEG, PNG, and WebP images are supported")
+    if size_bytes <= 0:
+        raise VisionInputError("Uploaded image is empty")
+    if size_bytes > MAX_IMAGE_BYTES:
+        raise VisionInputError("Uploaded image must be 8 MB or smaller")
+
+
+def build_vision_prompt(scan_type: VisionScanType) -> str:
+    if scan_type == "report":
+        return (
+            "请识别这张检查/检验报告图片中的医学信息。\n"
+            "只提取报告类型、检查项目、数值、单位、参考范围、异常标记、报告日期。\n"
+            "不要提取或输出姓名、身份证号、手机号、住址、就诊卡号、病历号、条形码号等个人身份信息。\n"
+            "如果这些信息出现在图片中，请统一写为「已隐藏」。\n"
+            "如果字段看不清，请写「无法确认」。\n"
+            "不要诊断疾病，不要给治疗方案，只输出可用于后续报告解读的结构化内容。"
+        )
+
+    if scan_type == "trace_code":
+        return (
+            "请识别这张药品追溯码或药品包装图片中的可见信息。\n"
+            "提取药品名称、通用名、规格、生产厂家、批准文号、有效期、批号、追溯码可见内容。\n"
+            "如果字段看不清，请写「无法确认」。\n"
+            "不要判断真伪，不要编造监管查询结果，只输出图片中可确认的信息。"
+        )
+
+    return (
+        "请识别这张药品包装图片中的药品信息。\n"
+        "提取药品名称、通用名、规格、生产厂家、批准文号、有效期、用法用量、是否处方药。\n"
+        "如果字段看不清，请写「无法确认」。\n"
+        "不要编造说明书内容，不要给超出图片和药品知识的结论。"
+    )
+
+
+_LABEL_PATTERN = re.compile(
+    r"(姓名|身份证号?|手机号|电话|住址|地址|就诊卡号|病历号|条形码号?|患者ID|门诊号|住院号)"
+    r"\s*[:：]\s*[^\n，,；;]+"
+)
+_PHONE_PATTERN = re.compile(r"(?<!\d)1[3-9]\d{9}(?!\d)")
+_ID_PATTERN = re.compile(r"(?<![0-9A-Za-z])\d{6}(?:19|20)?\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[12]\d|3[01])\d{3}[\dXx](?![0-9A-Za-z])")
+
+
+def redact_sensitive_text(text: str) -> str:
+    """Best-effort redaction for common PII that a vision model may return."""
+
+    def replace_label(match: re.Match[str]) -> str:
+        label = match.group(1)
+        return f"{label}：已隐藏"
+
+    redacted = _LABEL_PATTERN.sub(replace_label, text)
+    redacted = _PHONE_PATTERN.sub("已隐藏手机号", redacted)
+    redacted = _ID_PATTERN.sub("已隐藏身份证号", redacted)
+    return redacted
+
+
+def compose_agent_message(scan_type: VisionScanType, vision_text: str) -> str:
+    safe_text = redact_sensitive_text(vision_text).strip() or "无法确认"
+    if scan_type == "report":
+        return (
+            "用户上传了一张检验报告图片，个人身份信息已隐藏。火山视觉模型识别结果：\n"
+            f"{safe_text}\n\n"
+            "请基于以上内容进行报告解读。"
+        )
+
+    if scan_type == "trace_code":
+        return (
+            "用户上传了一张药品追溯码或药品包装图片。火山视觉模型识别结果：\n"
+            f"{safe_text}\n\n"
+            "请说明可识别出的药品信息、用药注意事项，并提醒用户真伪需以正规追溯平台查询为准。"
+        )
+
+    return (
+        "用户上传了一张药盒图片。火山视觉模型识别结果：\n"
+        f"{safe_text}\n\n"
+        "请说明这个药的用途、用法用量、禁忌、注意事项，以及是否适合当前用户。"
+    )
+
+
+def image_to_data_url(image_bytes: bytes, content_type: str) -> str:
+    encoded = base64.b64encode(image_bytes).decode("ascii")
+    return f"data:{content_type};base64,{encoded}"
+
+
+def _ark_api_key() -> str:
+    return os.environ.get("ARK_API_KEY", "")
+
+
+def _vision_model_id() -> str:
+    return os.environ.get("ARK_VISION_MODEL_ID") or os.environ.get("ARK_MODEL_ID", "doubao-seed-1-6-flash-250828")
+
+
+async def recognize_image(image_bytes: bytes, content_type: str, scan_type: VisionScanType) -> str:
+    """
+    Call Volcengine ARK multimodal model and return redacted visible facts.
+
+    The returned content is user-provided context for downstream agents, never a
+    system prompt.
+    """
+    validate_image_upload(content_type, len(image_bytes))
+
+    api_key = _ark_api_key()
+    if not api_key:
+        raise VisionInputError("ARK_API_KEY is not configured")
+
+    from openai import AsyncOpenAI
+
+    client = AsyncOpenAI(api_key=api_key, base_url=_BASE_URL)
+    response = await client.chat.completions.create(
+        model=_vision_model_id(),
+        temperature=0,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": build_vision_prompt(scan_type)},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": image_to_data_url(image_bytes, content_type)},
+                    },
+                ],
+            }
+        ],
+        extra_body={"thinking": {"type": "disabled"}},
+    )
+
+    content = response.choices[0].message.content if response.choices else ""
+    if isinstance(content, list):
+        content = "\n".join(str(part) for part in content)
+    return redact_sensitive_text(str(content))