Spaces:

lulavc
/

BubbleScribe

Runtime error

App Files Files Community

lulavc commited on 7 days ago

Commit

eefdc40

verified ·

1 Parent(s): d0bcc31

Revert to GLM-4.6V (stable)

Browse files

Files changed (1) hide show

app.py +31 -29

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 BubbleScribe - AI Manga & Comic Translator
-Translate manga/comics using Qwen2-VL for OCR + Translation and LaMa for inpainting.
 Optimized for NVIDIA T4 GPU
 """
@@ -13,7 +13,7 @@ import re
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 from io import BytesIO
-from huggingface_hub import InferenceClient
 from concurrent.futures import ThreadPoolExecutor
 import threading
@@ -90,20 +90,20 @@ def get_font(size: int):
         return font
 # ============================================================
-# QWEN2-VL CLIENT (HuggingFace Inference API)
 # ============================================================
-_hf_client = None
-def get_hf_client():
-    """Get or create HuggingFace Inference client."""
-    global _hf_client
-    if _hf_client is None:
-        api_key = os.environ.get("HF_TOKEN")
         if not api_key:
             return None
-        _hf_client = InferenceClient(api_key=api_key)
-    return _hf_client
 # ============================================================
 # IMAGE UTILITIES
@@ -225,12 +225,12 @@ def safe_parse_json(text: str) -> list:
 # ============================================================
 def detect_and_translate(image: Image.Image, source_lang: str, target_lang: str, progress=gr.Progress()):
-    """Use Qwen2-VL to detect text regions and translate."""
-    client = get_hf_client()
     if not client:
-        return None, "Error: HF_TOKEN not set in Space secrets"
-    progress(0.1, desc="Analyzing image with Qwen2-VL...")
     original_size = image.size
@@ -281,7 +281,7 @@ CRITICAL: Find at least 20-50 text regions. This image has many text elements. S
     try:
         response = client.chat.completions.create(
-            model="Qwen/Qwen2-VL-7B-Instruct",
             messages=[
                 {
                     "role": "user",
@@ -300,17 +300,19 @@ CRITICAL: Find at least 20-50 text regions. This image has many text elements. S
         progress(0.4, desc="Processing response...")
         result_text = ""
-        if response.choices and len(response.choices) > 0:
-            msg = response.choices[0].message
-            if hasattr(msg, 'content') and msg.content:
-                result_text = msg.content
-        # Strip any special tokens
         result_text = result_text.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '')
-        result_text = result_text.replace('<|im_start|>', '').replace('<|im_end|>', '')
-        print(f"📝 Qwen2-VL Response length: {len(result_text)} chars")
-        print(f"📝 Qwen2-VL Response preview: {result_text[:500] if result_text else 'EMPTY'}...")
         # Parse JSON from response with robust error handling
         detections = safe_parse_json(result_text)
@@ -531,7 +533,7 @@ def translate_manga(image, source_lang, target_lang, show_boxes, apply_inpaint,
         image = image.convert('RGB')
     # Step 1: Detect and translate
-    progress(0.1, desc="🔍 Detecting text with Qwen2-VL...")
     detections, status = detect_and_translate(image, source_lang, target_lang, progress)
     if detections is None:
@@ -652,13 +654,13 @@ with gr.Blocks(title="BubbleScribe", css=css, theme=gr.themes.Soft()) as demo:
     gr.HTML("""
     <div class="header">
         <h1>✍️ BubbleScribe</h1>
-        <p>AI-powered manga & comic translator using Qwen2-VL + LaMa</p>
     </div>
     """)
     gr.HTML("""
     <div class="stats">
-        ⚡ <strong>Models:</strong> Qwen2-VL (OCR & Translation) + LaMa (Inpainting)
     </div>
     """)
@@ -754,13 +756,13 @@ with gr.Blocks(title="BubbleScribe", css=css, theme=gr.themes.Soft()) as demo:
     - Sound effects may not always be detected
     ### 🔧 Powered By
-    - **Qwen2-VL** - Text detection & translation (HuggingFace Inference API)
     - **LaMa** - Text removal inpainting (GPU-accelerated)
     """)
     gr.HTML("""
     <div style="text-align: center; margin-top: 20px; padding: 10px; background: rgba(0,0,0,0.05); border-radius: 8px;">
-        <strong>Models:</strong> <a href="https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct" target="_blank">Qwen2-VL-7B</a> (OCR & Translation) •
         <a href="https://github.com/advimman/lama" target="_blank">LaMa</a> (Inpainting) •
         <strong>Created by:</strong> <a href="https://huggingface.co/lulavc" target="_blank">@lulavc</a>
     </div>

 """
 BubbleScribe - AI Manga & Comic Translator
+Translate manga/comics using GLM-4.6V for OCR + Translation and LaMa for inpainting.
 Optimized for NVIDIA T4 GPU
 """
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 from io import BytesIO
+from openai import OpenAI
 from concurrent.futures import ThreadPoolExecutor
 import threading
         return font
 # ============================================================
+# GLM-4.6V CLIENT (Z.ai API)
 # ============================================================
+_glm_client = None
+def get_glm_client():
+    """Get or create GLM client."""
+    global _glm_client
+    if _glm_client is None:
+        api_key = os.environ.get("GLM_API_KEY")
         if not api_key:
             return None
+        _glm_client = OpenAI(api_key=api_key, base_url="https://api.z.ai/api/paas/v4")
+    return _glm_client
 # ============================================================
 # IMAGE UTILITIES
 # ============================================================
 def detect_and_translate(image: Image.Image, source_lang: str, target_lang: str, progress=gr.Progress()):
+    """Use GLM-4.6V to detect text regions and translate."""
+    client = get_glm_client()
     if not client:
+        return None, "Error: GLM_API_KEY not set in Space secrets"
+    progress(0.1, desc="Analyzing image with GLM-4.6V...")
     original_size = image.size
     try:
         response = client.chat.completions.create(
+            model="glm-4.6v-flash",
             messages=[
                 {
                     "role": "user",
         progress(0.4, desc="Processing response...")
         result_text = ""
+        msg = response.choices[0].message
+        # Try multiple response fields
+        if hasattr(msg, 'content') and msg.content:
+            result_text = msg.content
+        if hasattr(msg, 'reasoning_content') and msg.reasoning_content:
+            result_text = result_text + "\n" + msg.reasoning_content if result_text else msg.reasoning_content
+        # Strip GLM special tokens
         result_text = result_text.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '')
+        print(f"📝 GLM-4.6V Response length: {len(result_text)} chars")
+        print(f"📝 GLM-4.6V Response preview: {result_text[:500] if result_text else 'EMPTY'}...")
         # Parse JSON from response with robust error handling
         detections = safe_parse_json(result_text)
         image = image.convert('RGB')
     # Step 1: Detect and translate
+    progress(0.1, desc="🔍 Detecting text with GLM-4.6V...")
     detections, status = detect_and_translate(image, source_lang, target_lang, progress)
     if detections is None:
     gr.HTML("""
     <div class="header">
         <h1>✍️ BubbleScribe</h1>
+        <p>AI-powered manga & comic translator using GLM-4.6V + LaMa</p>
     </div>
     """)
     gr.HTML("""
     <div class="stats">
+        ⚡ <strong>Models:</strong> GLM-4.6V (OCR & Translation) + LaMa (Inpainting)
     </div>
     """)
     - Sound effects may not always be detected
     ### 🔧 Powered By
+    - **GLM-4.6V** - Text detection & translation (Z.ai API)
     - **LaMa** - Text removal inpainting (GPU-accelerated)
     """)
     gr.HTML("""
     <div style="text-align: center; margin-top: 20px; padding: 10px; background: rgba(0,0,0,0.05); border-radius: 8px;">
+        <strong>Models:</strong> <a href="https://huggingface.co/zai-org/GLM-4.6V" target="_blank">GLM-4.6V</a> (OCR & Translation) •
         <a href="https://github.com/advimman/lama" target="_blank">LaMa</a> (Inpainting) •
         <strong>Created by:</strong> <a href="https://huggingface.co/lulavc" target="_blank">@lulavc</a>
     </div>