Spaces:
Runtime error
Runtime error
Revert to GLM-4.6V (stable)
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
BubbleScribe - AI Manga & Comic Translator
|
| 3 |
-
Translate manga/comics using
|
| 4 |
Optimized for NVIDIA T4 GPU
|
| 5 |
"""
|
| 6 |
|
|
@@ -13,7 +13,7 @@ import re
|
|
| 13 |
import numpy as np
|
| 14 |
from PIL import Image, ImageDraw, ImageFont
|
| 15 |
from io import BytesIO
|
| 16 |
-
from
|
| 17 |
from concurrent.futures import ThreadPoolExecutor
|
| 18 |
import threading
|
| 19 |
|
|
@@ -90,20 +90,20 @@ def get_font(size: int):
|
|
| 90 |
return font
|
| 91 |
|
| 92 |
# ============================================================
|
| 93 |
-
#
|
| 94 |
# ============================================================
|
| 95 |
|
| 96 |
-
|
| 97 |
|
| 98 |
-
def
|
| 99 |
-
"""Get or create
|
| 100 |
-
global
|
| 101 |
-
if
|
| 102 |
-
api_key = os.environ.get("
|
| 103 |
if not api_key:
|
| 104 |
return None
|
| 105 |
-
|
| 106 |
-
return
|
| 107 |
|
| 108 |
# ============================================================
|
| 109 |
# IMAGE UTILITIES
|
|
@@ -225,12 +225,12 @@ def safe_parse_json(text: str) -> list:
|
|
| 225 |
# ============================================================
|
| 226 |
|
| 227 |
def detect_and_translate(image: Image.Image, source_lang: str, target_lang: str, progress=gr.Progress()):
|
| 228 |
-
"""Use
|
| 229 |
-
client =
|
| 230 |
if not client:
|
| 231 |
-
return None, "Error:
|
| 232 |
|
| 233 |
-
progress(0.1, desc="Analyzing image with
|
| 234 |
|
| 235 |
original_size = image.size
|
| 236 |
|
|
@@ -281,7 +281,7 @@ CRITICAL: Find at least 20-50 text regions. This image has many text elements. S
|
|
| 281 |
|
| 282 |
try:
|
| 283 |
response = client.chat.completions.create(
|
| 284 |
-
model="
|
| 285 |
messages=[
|
| 286 |
{
|
| 287 |
"role": "user",
|
|
@@ -300,17 +300,19 @@ CRITICAL: Find at least 20-50 text regions. This image has many text elements. S
|
|
| 300 |
progress(0.4, desc="Processing response...")
|
| 301 |
|
| 302 |
result_text = ""
|
| 303 |
-
|
| 304 |
-
msg = response.choices[0].message
|
| 305 |
-
if hasattr(msg, 'content') and msg.content:
|
| 306 |
-
result_text = msg.content
|
| 307 |
|
| 308 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
result_text = result_text.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '')
|
| 310 |
-
result_text = result_text.replace('<|im_start|>', '').replace('<|im_end|>', '')
|
| 311 |
|
| 312 |
-
print(f"📝
|
| 313 |
-
print(f"📝
|
| 314 |
|
| 315 |
# Parse JSON from response with robust error handling
|
| 316 |
detections = safe_parse_json(result_text)
|
|
@@ -531,7 +533,7 @@ def translate_manga(image, source_lang, target_lang, show_boxes, apply_inpaint,
|
|
| 531 |
image = image.convert('RGB')
|
| 532 |
|
| 533 |
# Step 1: Detect and translate
|
| 534 |
-
progress(0.1, desc="🔍 Detecting text with
|
| 535 |
detections, status = detect_and_translate(image, source_lang, target_lang, progress)
|
| 536 |
|
| 537 |
if detections is None:
|
|
@@ -652,13 +654,13 @@ with gr.Blocks(title="BubbleScribe", css=css, theme=gr.themes.Soft()) as demo:
|
|
| 652 |
gr.HTML("""
|
| 653 |
<div class="header">
|
| 654 |
<h1>✍️ BubbleScribe</h1>
|
| 655 |
-
<p>AI-powered manga & comic translator using
|
| 656 |
</div>
|
| 657 |
""")
|
| 658 |
|
| 659 |
gr.HTML("""
|
| 660 |
<div class="stats">
|
| 661 |
-
⚡ <strong>Models:</strong>
|
| 662 |
</div>
|
| 663 |
""")
|
| 664 |
|
|
@@ -754,13 +756,13 @@ with gr.Blocks(title="BubbleScribe", css=css, theme=gr.themes.Soft()) as demo:
|
|
| 754 |
- Sound effects may not always be detected
|
| 755 |
|
| 756 |
### 🔧 Powered By
|
| 757 |
-
- **
|
| 758 |
- **LaMa** - Text removal inpainting (GPU-accelerated)
|
| 759 |
""")
|
| 760 |
|
| 761 |
gr.HTML("""
|
| 762 |
<div style="text-align: center; margin-top: 20px; padding: 10px; background: rgba(0,0,0,0.05); border-radius: 8px;">
|
| 763 |
-
<strong>Models:</strong> <a href="https://huggingface.co/
|
| 764 |
<a href="https://github.com/advimman/lama" target="_blank">LaMa</a> (Inpainting) •
|
| 765 |
<strong>Created by:</strong> <a href="https://huggingface.co/lulavc" target="_blank">@lulavc</a>
|
| 766 |
</div>
|
|
|
|
| 1 |
"""
|
| 2 |
BubbleScribe - AI Manga & Comic Translator
|
| 3 |
+
Translate manga/comics using GLM-4.6V for OCR + Translation and LaMa for inpainting.
|
| 4 |
Optimized for NVIDIA T4 GPU
|
| 5 |
"""
|
| 6 |
|
|
|
|
| 13 |
import numpy as np
|
| 14 |
from PIL import Image, ImageDraw, ImageFont
|
| 15 |
from io import BytesIO
|
| 16 |
+
from openai import OpenAI
|
| 17 |
from concurrent.futures import ThreadPoolExecutor
|
| 18 |
import threading
|
| 19 |
|
|
|
|
| 90 |
return font
|
| 91 |
|
| 92 |
# ============================================================
|
| 93 |
+
# GLM-4.6V CLIENT (Z.ai API)
|
| 94 |
# ============================================================
|
| 95 |
|
| 96 |
+
_glm_client = None
|
| 97 |
|
| 98 |
+
def get_glm_client():
|
| 99 |
+
"""Get or create GLM client."""
|
| 100 |
+
global _glm_client
|
| 101 |
+
if _glm_client is None:
|
| 102 |
+
api_key = os.environ.get("GLM_API_KEY")
|
| 103 |
if not api_key:
|
| 104 |
return None
|
| 105 |
+
_glm_client = OpenAI(api_key=api_key, base_url="https://api.z.ai/api/paas/v4")
|
| 106 |
+
return _glm_client
|
| 107 |
|
| 108 |
# ============================================================
|
| 109 |
# IMAGE UTILITIES
|
|
|
|
| 225 |
# ============================================================
|
| 226 |
|
| 227 |
def detect_and_translate(image: Image.Image, source_lang: str, target_lang: str, progress=gr.Progress()):
|
| 228 |
+
"""Use GLM-4.6V to detect text regions and translate."""
|
| 229 |
+
client = get_glm_client()
|
| 230 |
if not client:
|
| 231 |
+
return None, "Error: GLM_API_KEY not set in Space secrets"
|
| 232 |
|
| 233 |
+
progress(0.1, desc="Analyzing image with GLM-4.6V...")
|
| 234 |
|
| 235 |
original_size = image.size
|
| 236 |
|
|
|
|
| 281 |
|
| 282 |
try:
|
| 283 |
response = client.chat.completions.create(
|
| 284 |
+
model="glm-4.6v-flash",
|
| 285 |
messages=[
|
| 286 |
{
|
| 287 |
"role": "user",
|
|
|
|
| 300 |
progress(0.4, desc="Processing response...")
|
| 301 |
|
| 302 |
result_text = ""
|
| 303 |
+
msg = response.choices[0].message
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
+
# Try multiple response fields
|
| 306 |
+
if hasattr(msg, 'content') and msg.content:
|
| 307 |
+
result_text = msg.content
|
| 308 |
+
if hasattr(msg, 'reasoning_content') and msg.reasoning_content:
|
| 309 |
+
result_text = result_text + "\n" + msg.reasoning_content if result_text else msg.reasoning_content
|
| 310 |
+
|
| 311 |
+
# Strip GLM special tokens
|
| 312 |
result_text = result_text.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '')
|
|
|
|
| 313 |
|
| 314 |
+
print(f"📝 GLM-4.6V Response length: {len(result_text)} chars")
|
| 315 |
+
print(f"📝 GLM-4.6V Response preview: {result_text[:500] if result_text else 'EMPTY'}...")
|
| 316 |
|
| 317 |
# Parse JSON from response with robust error handling
|
| 318 |
detections = safe_parse_json(result_text)
|
|
|
|
| 533 |
image = image.convert('RGB')
|
| 534 |
|
| 535 |
# Step 1: Detect and translate
|
| 536 |
+
progress(0.1, desc="🔍 Detecting text with GLM-4.6V...")
|
| 537 |
detections, status = detect_and_translate(image, source_lang, target_lang, progress)
|
| 538 |
|
| 539 |
if detections is None:
|
|
|
|
| 654 |
gr.HTML("""
|
| 655 |
<div class="header">
|
| 656 |
<h1>✍️ BubbleScribe</h1>
|
| 657 |
+
<p>AI-powered manga & comic translator using GLM-4.6V + LaMa</p>
|
| 658 |
</div>
|
| 659 |
""")
|
| 660 |
|
| 661 |
gr.HTML("""
|
| 662 |
<div class="stats">
|
| 663 |
+
⚡ <strong>Models:</strong> GLM-4.6V (OCR & Translation) + LaMa (Inpainting)
|
| 664 |
</div>
|
| 665 |
""")
|
| 666 |
|
|
|
|
| 756 |
- Sound effects may not always be detected
|
| 757 |
|
| 758 |
### 🔧 Powered By
|
| 759 |
+
- **GLM-4.6V** - Text detection & translation (Z.ai API)
|
| 760 |
- **LaMa** - Text removal inpainting (GPU-accelerated)
|
| 761 |
""")
|
| 762 |
|
| 763 |
gr.HTML("""
|
| 764 |
<div style="text-align: center; margin-top: 20px; padding: 10px; background: rgba(0,0,0,0.05); border-radius: 8px;">
|
| 765 |
+
<strong>Models:</strong> <a href="https://huggingface.co/zai-org/GLM-4.6V" target="_blank">GLM-4.6V</a> (OCR & Translation) •
|
| 766 |
<a href="https://github.com/advimman/lama" target="_blank">LaMa</a> (Inpainting) •
|
| 767 |
<strong>Created by:</strong> <a href="https://huggingface.co/lulavc" target="_blank">@lulavc</a>
|
| 768 |
</div>
|