Merge pull request #46 from google-ai-edge:refactor/setup-instructions-and-robust-venv

copybara-github · copybara-github · commit abe98b86be8f · 2026-05-19T13:55:13.000-07:00
PiperOrigin-RevId: 918011862
diff --git a/examples/commands/convert_test.sh b/examples/commands/convert_test.sh
@@ -59,7 +59,7 @@ run_case "Convert: Qwen1.5-0.5B-Chat without Bundle" \
 
 # 2.4 Non-CausalLM Architecture Rejection
 run_case "Convert: bert-base-uncased (Verify Non-CausalLM Rejection)" \
-    bash -c "litert convert google-bert/bert-base-uncased --output 'models/bert_fail' 2>&1 | grep -q 'Currently only AutoModelForCausalLM is supported'"
+    bash -c "litert convert google-bert/bert-base-uncased --output 'models/bert_fail' 2>&1 | grep -q 'CausalLM'"
 
 # --- Summary Report ---
 print_summary_report "Convert Commands"
diff --git a/examples/models/gemma3.sh b/examples/models/gemma3.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright 2026 The LiteRT CLI Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# LiteRT CLI Gemma3 LLM Demo & Test Script
+set -e
+
+# Source shared utilities relative to script
+source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"
+
+setup_test_env "gemma3" "Gemma3 LLM Demo Script"
+
+# --- Part 1: Convert from HuggingFace Hub, then Run & Benchmark ---
+# Convert HuggingFace Model google/gemma-3-1b-it
+run_case "Convert: HuggingFace google/gemma-3-1b-it" \
+    litert convert google/gemma-3-1b-it --output "models/gemma3_converted"
+
+# Run the converted model
+run_case "Run Gemma3: Converted Model" \
+    litert lm run "models/gemma3_converted/model.litertlm" --prompt="What is the capital of France?"
+
+# Benchmark the converted model
+run_case "Benchmark Gemma3: Converted Model" \
+    litert lm benchmark "models/gemma3_converted/model.litertlm" -p 128 -d 128
+
+
+# --- Part 2: Directly Download, Run, and Benchmark Pre-converted Model ---
+# Run pre-converted Gemma3 directly from huggingface repo
+run_case "Run Gemma3: Direct HuggingFace execution" \
+    litert lm run \
+        --from-huggingface-repo=litert-community/Gemma3-1B-IT \
+        gemma3-1b-it-int4.litertlm \
+        --prompt="What is the capital of France?"
+
+# Benchmark pre-converted Gemma3 directly
+run_case "Benchmark Gemma3: Direct HuggingFace execution" \
+    litert lm benchmark \
+        --from-huggingface-repo=litert-community/Gemma3-1B-IT \
+        gemma3-1b-it-int4.litertlm \
+        -p 128 -d 128
+
+# --- Summary Report ---
+print_summary_report "Gemma3"
diff --git a/examples/models/gemma4.sh b/examples/models/gemma4.sh
@@ -23,12 +23,14 @@ source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"
 
 setup_test_env "gemma4" "Gemma4 LLM Demo Script"
 
-# --- 1. Convert HuggingFace Model google/gemma-4-E2B-it ---
-# Wait for LiteRT Torch release.
-# run_case "Convert: HuggingFace google/gemma-4-E2B-it" \
-#    litert convert google/gemma-4-E2B-it --output "models/gemma4"
+# --- 1. Convert and run HuggingFace Model google/gemma-4-E2B-it ---
+run_case "Convert: HuggingFace google/gemma-4-E2B-it" \
+    litert convert google/gemma-4-E2B-it --output "models/gemma4"
 
-# --- 2. Run Gemma4 Generative LLM Model ---
+run_case "Run converted Gemma4 model google/gemma-4-E2B-it" \
+    litert lm run models/gemma4/model.litertlm --prompt="What is the capital of France?"
+
+# --- 2. Download and Run existing Gemma4 Model from HuggingFace ---
 run_case "Run Gemma4: Generative inference with custom prompt" \
     litert lm run --from-huggingface-repo=litert-community/gemma-4-E2B-it-litert-lm gemma-4-E2B-it.litertlm --prompt="What is the capital of France?"
 
diff --git a/litert_cli/commands/convert/huggingface.py b/litert_cli/commands/convert/huggingface.py
@@ -62,6 +62,12 @@ def convert_huggingface(
   click.echo(f"Starting conversion for model '{model}''")
 
   try:
+    is_causal_lm = False
+    is_gemma3 = False
+    is_gemma3n = False
+    is_gemma4 = False
+    is_gemma_vlm = False
+
     # Verify AutoModelForCausalLM architecture
     try:
       config = transformers.AutoConfig.from_pretrained(
@@ -103,6 +109,10 @@ def convert_huggingface(
       task = "image_text_to_text"
       export_kwargs["export_vision_encoder"] = True
       export_kwargs["externalize_embedder"] = True
+      if is_gemma3 or is_gemma3n:
+        export_kwargs["vision_encoder_quantization_recipe"] = (
+            "weight_only_wi8_afp32"
+        )
       if is_gemma4:
         export_kwargs["jinja_chat_template_override"] = (
             "litert-community/gemma-4-E2B-it-litert-lm"