v0.13.1: 修两个测试中发现的 bug（利润质量字段 + 行业 ROE 映射）

cindylui479-create · claude · cindylui479-create · commit 8298bd0bf939 · 2026-05-19T11:08:41.000-04:00
实测 v0.10-v0.13 时发现两个未渲染的报告章节，均为数据接口适配问题：

bug #1：#52 利润质量深度分解节空白
- 现象：报告"应收账款 / 合同负债"节未渲染（fp.accounts_receivable 等都是 None）
- 根因：stock_financial_abstract 返回的"常用指标"DataFrame 只含 70 个指标
  （主要是周转率、ROE、毛利率等），不含资产负债表科目本身
- 修复：新增 _a_balance_sheet_enrich 用 stock_balance_sheet_by_report_em 二次拉取
  按 REPORT_DATE (YYYY-MM-DD) 匹配 period (YYYYMMDD)，填充
  ACCOUNTS_RECE / CONTRACT_LIAB / ADVANCE_RECEIVABLES
- 验证：茅台 2025 应收 ¥260 万（极低，强势）+ 合同负债 ¥80 亿（经销商打款先行）

bug #2：#51 行业 ROE 横截面分位 0 peer
- 现象：报告显示"同行业有效样本不足 5 只（实际 0）"
- 根因：baostock industry 字段是证监会大类（'C15酒、饮料和精制茶制造业'），
  与 INDUSTRYCSRC1 的细分名"白酒"完全不匹配，原 industry[:2] 关键词不命中
- 修复：新增 _BAOSTOCK_INDUSTRY_MAP（20+ 行业）+ _map_to_baostock_industry
  白酒 → '酒、饮料'；煤炭 → '煤炭开采'；汽车 → '汽车制造'；银行 → '货币金融' 等
- 验证：茅台 ROE 30% 在白酒 29 同行中排第 4 名，分位 89.7%

测试：76/76 pass。

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "stockwise"
-version = "0.13.0"
+version = "0.13.1"
 description = "巴菲特/林奇范式 A 股 + 港股价值投资分析工具：Web UI + HKEx 链接 + 主营构成 + mkdocs 文档"
 license = { text = "MIT" }
 requires-python = ">=3.10"
diff --git a/stockwise/data/fetcher.py b/stockwise/data/fetcher.py
@@ -244,6 +244,46 @@ def _a_sina_daily(code: str) -> Optional[dict]:
 }
 
 
+def _a_balance_sheet_enrich(code: str, fin: Financials) -> None:
+    """v0.11 #52 fix：从东财资产负债表接口拉应收账款 / 合同负债填充到 fin。
+
+    stock_financial_abstract 返回的"常用指标"不含资产负债表科目，需要
+    stock_balance_sheet_by_report_em（英文列名：ACCOUNTS_RECE / CONTRACT_LIAB）。
+    """
+    from stockwise.data.cache import cached_call, TTL_FINANCIALS
+    if not fin.annual:
+        return
+    try:
+        prefix = "SH" if code.startswith("6") else "SZ"
+        symbol = f"{prefix}{code}"
+        df = cached_call(
+            "em:balance_sheet", symbol, TTL_FINANCIALS,
+            lambda: ak.stock_balance_sheet_by_report_em(symbol=symbol),
+        )
+    except Exception:
+        return
+    if df is None or df.empty:
+        return
+
+    # 建立 REPORT_DATE (YYYY-MM-DD) → row 索引（只取 12-31 年报）
+    import pandas as pd
+    df = df.copy()
+    df["REPORT_DATE_STR"] = df["REPORT_DATE"].astype(str).str[:10]
+    annual_rows = df[df["REPORT_DATE_STR"].str.endswith("-12-31")]
+    by_date = {row["REPORT_DATE_STR"]: row for _, row in annual_rows.iterrows()}
+
+    for p in fin.annual:
+        # period 是 "20251231"，转换为 "2025-12-31"
+        if len(p.period) >= 8:
+            date_str = f"{p.period[:4]}-{p.period[4:6]}-{p.period[6:8]}"
+            row = by_date.get(date_str)
+            if row is None:
+                continue
+            p.accounts_receivable = _to_float(row.get("ACCOUNTS_RECE"))
+            p.contract_liabilities = _to_float(row.get("CONTRACT_LIAB"))
+            p.prepayments = _to_float(row.get("ADVANCE_RECEIVABLES"))
+
+
 def _a_financials(code: str, years: int = 10) -> Financials:
     from stockwise.data.cache import cached_call, TTL_FINANCIALS
     df = cached_call(
@@ -286,7 +326,10 @@ def _a_financials(code: str, years: int = 10) -> Financials:
             period.revenue_yoy = _yoy(by_indicator.get("revenue"), col, prev_col)
             period.profit_yoy = _yoy(by_indicator.get("net_profit"), col, prev_col)
         annual.append(period)
-    return Financials(annual=annual)
+    fin = Financials(annual=annual)
+    # v0.11 #52：补充资产负债表科目（应收账款 / 合同负债）
+    _a_balance_sheet_enrich(code, fin)
+    return fin
 
 
 def _a_valuation(code: str) -> Valuation:
diff --git a/stockwise/data/industry_roe.py b/stockwise/data/industry_roe.py
@@ -31,6 +31,51 @@ class IndustryRoeRank:
 _ENABLED_VIEWS = {"default", "bank", "insurance", "cyclical", "semi_growth", "growth"}
 
 
+# INDUSTRYCSRC1（akshare 细分名）→ baostock 证监会大类关键词（包含匹配）
+_BAOSTOCK_INDUSTRY_MAP = {
+    # 食品 / 酒
+    "白酒": "酒、饮料",
+    "饮料": "酒、饮料",
+    "食品": "食品制造",
+    "农副食品": "农副食品加工",
+    # 资源 / 周期
+    "煤炭": "煤炭开采",
+    "钢铁": "黑色金属",
+    "有色金属": "有色金属",
+    "石油": "石油",
+    "化工": "化学原料",
+    "化学制品": "化学原料",
+    # 制造
+    "汽车": "汽车制造",
+    "家电": "电气机械",
+    "家用电器": "电气机械",
+    # 医药
+    "医药": "医药制造",
+    # 金融
+    "银行": "货币金融",
+    "货币金融": "货币金融",
+    "保险": "保险",
+    "证券": "资本市场",
+    # 房地产
+    "房地产": "房地产",
+    # 公用
+    "电力": "电力",
+    "燃气": "燃气",
+}
+
+
+def _map_to_baostock_industry(industry: str) -> Optional[str]:
+    """INDUSTRYCSRC1 细分名 → baostock 证监会大类的关键词（用于 contains 匹配）。"""
+    if not industry:
+        return None
+    if industry in _BAOSTOCK_INDUSTRY_MAP:
+        return _BAOSTOCK_INDUSTRY_MAP[industry]
+    for key, mapped in _BAOSTOCK_INDUSTRY_MAP.items():
+        if key in industry:
+            return mapped
+    return None
+
+
 def fetch_industry_roe_rank(code: str, industry: Optional[str],
                               company_roe: Optional[float]) -> IndustryRoeRank:
     """对 code 计算其在行业内的 ROE 分位。
@@ -78,21 +123,27 @@ def fetch_industry_roe_rank(code: str, industry: Optional[str],
 def _peers_roe(industry: str) -> list[tuple[str, Optional[float]]]:
     """从 baostock 拉同行业成员的近 5 年 ROE 均值。
 
-    简化：先拉行业表（按 INDUSTRYCSRC1 字段匹配），取前 30 个代码（baostock 行业代码无市值），
-    然后对每只查 profit_data 取近 5 年 ROE 均值。
+    baostock industry 字段是证监会大类格式（如 'C15酒、饮料和精制茶制造业'），
+    与 INDUSTRYCSRC1 的细分名（如"白酒"）不直接匹配。
+    映射策略：用关键词匹配 INDUSTRYCSRC1 → baostock 大类，覆盖主要行业。
     """
     from stockwise.industry import _ensure_baostock_login
     import baostock as bs
     _ensure_baostock_login()
 
-    # 同行业成员
     rs = bs.query_stock_industry()
     df = rs.get_data()
     if df is None or df.empty:
         return []
-    # baostock industry 字段是简化名（如"采掘业"），需要关键词包含匹配
-    members = df[df["industry"].str.contains(industry[:2], na=False)] \
-              if len(industry) >= 2 else df[df["industry"] == industry]
+
+    # INDUSTRYCSRC1 → baostock 大类关键词映射（取 baostock industry 字符串需含的关键字）
+    bs_keyword = _map_to_baostock_industry(industry)
+    if bs_keyword:
+        members = df[df["industry"].str.contains(bs_keyword, na=False, regex=False)]
+    else:
+        # fallback：用 INDUSTRYCSRC1 头 2 字
+        members = df[df["industry"].str.contains(industry[:2], na=False, regex=False)] \
+                  if len(industry) >= 2 else df[df["industry"] == industry]
     members = members.head(30)
 
     peers: list[tuple[str, Optional[float]]] = []