|
| 1 | +""" |
| 2 | +Vision input adapter for report interpretation and pharmacy photo flows. |
| 3 | +
|
| 4 | +This module keeps image recognition separate from medical/pharmacy reasoning: |
| 5 | +the Volcengine multimodal model extracts visible facts, then the existing |
| 6 | +report/pharmacy agents interpret those facts. |
| 7 | +""" |
| 8 | +from __future__ import annotations |
| 9 | + |
| 10 | +import base64 |
| 11 | +import os |
| 12 | +import re |
| 13 | +from typing import Literal |
| 14 | + |
| 15 | + |
| 16 | +VisionScanType = Literal["report", "drug_box", "trace_code"] |
| 17 | + |
| 18 | +ALLOWED_IMAGE_TYPES = {"image/jpeg", "image/png", "image/webp"} |
| 19 | +MAX_IMAGE_BYTES = 8 * 1024 * 1024 |
| 20 | + |
| 21 | +SCAN_TYPE_TO_AGENT = { |
| 22 | + "report": "report_agent", |
| 23 | + "drug_box": "pharmacy_agent", |
| 24 | + "trace_code": "pharmacy_agent", |
| 25 | +} |
| 26 | + |
| 27 | +_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3" |
| 28 | + |
| 29 | + |
| 30 | +class VisionInputError(ValueError): |
| 31 | + """Raised when an uploaded image or scan type is invalid.""" |
| 32 | + |
| 33 | + |
| 34 | +def normalize_scan_type(scan_type: str) -> VisionScanType: |
| 35 | + normalized = scan_type.strip().lower() |
| 36 | + if normalized in SCAN_TYPE_TO_AGENT: |
| 37 | + return normalized # type: ignore[return-value] |
| 38 | + raise VisionInputError("scan_type must be one of: report, drug_box, trace_code") |
| 39 | + |
| 40 | + |
| 41 | +def validate_image_upload(content_type: str | None, size_bytes: int) -> None: |
| 42 | + if content_type not in ALLOWED_IMAGE_TYPES: |
| 43 | + raise VisionInputError("Only JPEG, PNG, and WebP images are supported") |
| 44 | + if size_bytes <= 0: |
| 45 | + raise VisionInputError("Uploaded image is empty") |
| 46 | + if size_bytes > MAX_IMAGE_BYTES: |
| 47 | + raise VisionInputError("Uploaded image must be 8 MB or smaller") |
| 48 | + |
| 49 | + |
| 50 | +def build_vision_prompt(scan_type: VisionScanType) -> str: |
| 51 | + if scan_type == "report": |
| 52 | + return ( |
| 53 | + "请识别这张检查/检验报告图片中的医学信息。\n" |
| 54 | + "只提取报告类型、检查项目、数值、单位、参考范围、异常标记、报告日期。\n" |
| 55 | + "不要提取或输出姓名、身份证号、手机号、住址、就诊卡号、病历号、条形码号等个人身份信息。\n" |
| 56 | + "如果这些信息出现在图片中,请统一写为「已隐藏」。\n" |
| 57 | + "如果字段看不清,请写「无法确认」。\n" |
| 58 | + "不要诊断疾病,不要给治疗方案,只输出可用于后续报告解读的结构化内容。" |
| 59 | + ) |
| 60 | + |
| 61 | + if scan_type == "trace_code": |
| 62 | + return ( |
| 63 | + "请识别这张药品追溯码或药品包装图片中的可见信息。\n" |
| 64 | + "提取药品名称、通用名、规格、生产厂家、批准文号、有效期、批号、追溯码可见内容。\n" |
| 65 | + "如果字段看不清,请写「无法确认」。\n" |
| 66 | + "不要判断真伪,不要编造监管查询结果,只输出图片中可确认的信息。" |
| 67 | + ) |
| 68 | + |
| 69 | + return ( |
| 70 | + "请识别这张药品包装图片中的药品信息。\n" |
| 71 | + "提取药品名称、通用名、规格、生产厂家、批准文号、有效期、用法用量、是否处方药。\n" |
| 72 | + "如果字段看不清,请写「无法确认」。\n" |
| 73 | + "不要编造说明书内容,不要给超出图片和药品知识的结论。" |
| 74 | + ) |
| 75 | + |
| 76 | + |
| 77 | +_LABEL_PATTERN = re.compile( |
| 78 | + r"(姓名|身份证号?|手机号|电话|住址|地址|就诊卡号|病历号|条形码号?|患者ID|门诊号|住院号)" |
| 79 | + r"\s*[::]\s*[^\n,,;;]+" |
| 80 | +) |
| 81 | +_PHONE_PATTERN = re.compile(r"(?<!\d)1[3-9]\d{9}(?!\d)") |
| 82 | +_ID_PATTERN = re.compile(r"(?<![0-9A-Za-z])\d{6}(?:19|20)?\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[12]\d|3[01])\d{3}[\dXx](?![0-9A-Za-z])") |
| 83 | + |
| 84 | + |
| 85 | +def redact_sensitive_text(text: str) -> str: |
| 86 | + """Best-effort redaction for common PII that a vision model may return.""" |
| 87 | + |
| 88 | + def replace_label(match: re.Match[str]) -> str: |
| 89 | + label = match.group(1) |
| 90 | + return f"{label}:已隐藏" |
| 91 | + |
| 92 | + redacted = _LABEL_PATTERN.sub(replace_label, text) |
| 93 | + redacted = _PHONE_PATTERN.sub("已隐藏手机号", redacted) |
| 94 | + redacted = _ID_PATTERN.sub("已隐藏身份证号", redacted) |
| 95 | + return redacted |
| 96 | + |
| 97 | + |
| 98 | +def compose_agent_message(scan_type: VisionScanType, vision_text: str) -> str: |
| 99 | + safe_text = redact_sensitive_text(vision_text).strip() or "无法确认" |
| 100 | + if scan_type == "report": |
| 101 | + return ( |
| 102 | + "用户上传了一张检验报告图片,个人身份信息已隐藏。火山视觉模型识别结果:\n" |
| 103 | + f"{safe_text}\n\n" |
| 104 | + "请基于以上内容进行报告解读。" |
| 105 | + ) |
| 106 | + |
| 107 | + if scan_type == "trace_code": |
| 108 | + return ( |
| 109 | + "用户上传了一张药品追溯码或药品包装图片。火山视觉模型识别结果:\n" |
| 110 | + f"{safe_text}\n\n" |
| 111 | + "请说明可识别出的药品信息、用药注意事项,并提醒用户真伪需以正规追溯平台查询为准。" |
| 112 | + ) |
| 113 | + |
| 114 | + return ( |
| 115 | + "用户上传了一张药盒图片。火山视觉模型识别结果:\n" |
| 116 | + f"{safe_text}\n\n" |
| 117 | + "请说明这个药的用途、用法用量、禁忌、注意事项,以及是否适合当前用户。" |
| 118 | + ) |
| 119 | + |
| 120 | + |
| 121 | +def image_to_data_url(image_bytes: bytes, content_type: str) -> str: |
| 122 | + encoded = base64.b64encode(image_bytes).decode("ascii") |
| 123 | + return f"data:{content_type};base64,{encoded}" |
| 124 | + |
| 125 | + |
| 126 | +def _ark_api_key() -> str: |
| 127 | + return os.environ.get("ARK_API_KEY", "") |
| 128 | + |
| 129 | + |
| 130 | +def _vision_model_id() -> str: |
| 131 | + return os.environ.get("ARK_VISION_MODEL_ID") or os.environ.get("ARK_MODEL_ID", "doubao-seed-1-6-flash-250828") |
| 132 | + |
| 133 | + |
| 134 | +async def recognize_image(image_bytes: bytes, content_type: str, scan_type: VisionScanType) -> str: |
| 135 | + """ |
| 136 | + Call Volcengine ARK multimodal model and return redacted visible facts. |
| 137 | +
|
| 138 | + The returned content is user-provided context for downstream agents, never a |
| 139 | + system prompt. |
| 140 | + """ |
| 141 | + validate_image_upload(content_type, len(image_bytes)) |
| 142 | + |
| 143 | + api_key = _ark_api_key() |
| 144 | + if not api_key: |
| 145 | + raise VisionInputError("ARK_API_KEY is not configured") |
| 146 | + |
| 147 | + from openai import AsyncOpenAI |
| 148 | + |
| 149 | + client = AsyncOpenAI(api_key=api_key, base_url=_BASE_URL) |
| 150 | + response = await client.chat.completions.create( |
| 151 | + model=_vision_model_id(), |
| 152 | + temperature=0, |
| 153 | + messages=[ |
| 154 | + { |
| 155 | + "role": "user", |
| 156 | + "content": [ |
| 157 | + {"type": "text", "text": build_vision_prompt(scan_type)}, |
| 158 | + { |
| 159 | + "type": "image_url", |
| 160 | + "image_url": {"url": image_to_data_url(image_bytes, content_type)}, |
| 161 | + }, |
| 162 | + ], |
| 163 | + } |
| 164 | + ], |
| 165 | + extra_body={"thinking": {"type": "disabled"}}, |
| 166 | + ) |
| 167 | + |
| 168 | + content = response.choices[0].message.content if response.choices else "" |
| 169 | + if isinstance(content, list): |
| 170 | + content = "\n".join(str(part) for part in content) |
| 171 | + return redact_sensitive_text(str(content)) |
0 commit comments