Hug0endob's picture
Update app.py
b3b505f verified
#!/usr/bin/env python3
import os, io, re, sys, subprocess, hashlib, pathlib, time
from typing import Optional
import requests
from PIL import Image, ImageSequence
import gradio as gr
MODEL_DIR = pathlib.Path("model")
MODEL_DIR.mkdir(exist_ok=True, parents=True)
# Public mradermacher GGUF links (no tokens)
PRIMARY_URL = "https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-q4_k_s.gguf"
FALLBACK_URL = "https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-q4_k_m.gguf"
PRIMARY_NAME = MODEL_DIR / "llama-joycaption-q4_k_s.gguf"
FALLBACK_NAME = MODEL_DIR / "llama-joycaption-q4_k_m.gguf"
# Generation params
MAX_TOKENS = 128
TEMPERATURE = 0.2
TOP_P = 0.95
STOP = ["\n"]
def download_file(url: str, dest: pathlib.Path, timeout=120):
if dest.exists():
return
print("Downloading", url)
with requests.get(url, stream=True, timeout=timeout) as r:
r.raise_for_status()
total = int(r.headers.get("content-length", 0) or 0)
done = 0
with open(dest, "wb") as f:
for chunk in r.iter_content(8192):
if not chunk: continue
f.write(chunk)
done += len(chunk)
if total:
pct = done * 100 // total
print(f"\r{dest.name}: {pct}% ", end="", flush=True)
print()
def mp4_to_gif(mp4_bytes: bytes) -> bytes:
files = {"new-file": ("video.mp4", mp4_bytes, "video/mp4")}
resp = requests.post("https://s.ezgif.com/video-to-gif", files=files, data={"file":"video.mp4"}, timeout=120)
resp.raise_for_status()
m = re.search(r'<img[^>]+src="([^"]+\.gif)"', resp.text) or re.search(r'src="([^"]+?/tmp/[^"]+\.gif)"', resp.text)
if not m:
raise RuntimeError("GIF URL not found")
gif_url = m.group(1)
if gif_url.startswith("//"): gif_url = "https:" + gif_url
elif gif_url.startswith("/"): gif_url = "https://s.ezgif.com" + gif_url
r2 = requests.get(gif_url, timeout=60); r2.raise_for_status(); return r2.content
def load_first_frame(raw: bytes):
img = Image.open(io.BytesIO(raw))
if getattr(img, "is_animated", False):
img = next(ImageSequence.Iterator(img))
if img.mode != "RGB": img = img.convert("RGB")
return img
def rebuild_llama_cpp():
env = os.environ.copy()
env["PIP_NO_BINARY"] = "llama-cpp-python"
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"], env=env)
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "cmake", "wheel", "setuptools"], env=env)
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "llama-cpp-python"], env=env)
_llama = None
def ensure_model():
global _llama
if _llama is not None:
return
# try primary then fallback
for url, path in ((PRIMARY_URL, PRIMARY_NAME), (FALLBACK_URL, FALLBACK_NAME)):
try:
download_file(url, path)
import importlib
llama_cpp = importlib.import_module("llama_cpp")
Llama = getattr(llama_cpp, "Llama")
print("Loading", path)
_llama = Llama(model_path=str(path), n_ctx=2048, n_gpu_layers=0, verbose=False)
print("Loaded model:", path.name)
return
except Exception as e:
print("Load failed for", path.name, ":", e)
# rebuild once
try:
print("Rebuilding llama-cpp-python from source...")
rebuild_llama_cpp()
except Exception as e:
raise RuntimeError("Rebuild failed: " + str(e))
# retry primary
try:
import importlib
download_file(PRIMARY_URL, PRIMARY_NAME)
llama_cpp = importlib.reload(importlib.import_module("llama_cpp"))
Llama = getattr(llama_cpp, "Llama")
_llama = Llama(model_path=str(PRIMARY_NAME), n_ctx=2048, n_gpu_layers=0, verbose=False)
print("Loaded after rebuild.")
return
except Exception as e:
raise RuntimeError("Load after rebuild failed: " + str(e))
def build_prompt(img_tag: str, user_prompt: str):
# Minimal prompt: image placeholder and the user request
return f"<image>{img_tag}</image>\n{user_prompt}\nAnswer:"
def generate_caption_from_url(url: str, prompt: str="Describe the image."):
if not url:
return "No URL provided."
try:
r = requests.get(url, timeout=30); r.raise_for_status(); raw = r.content
except Exception as e:
return "Download error: " + str(e)
try:
lower = url.lower().split("?")[0]
if lower.endswith(".mp4") or raw[:16].lower().find(b"ftyp") != -1:
try:
raw = mp4_to_gif(raw)
except Exception as e:
return "MP4→GIF conversion failed: " + str(e)
img = load_first_frame(raw)
except Exception as e:
return "Image processing error: " + str(e)
try:
img = img.resize((512,512), resample=Image.BICUBIC)
except Exception:
pass
# create a tiny base64 tag to signal image presence (model must understand this format)
import base64
buf = io.BytesIO()
img.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode()
img_tag = b64 # minimal
prompt_text = build_prompt(img_tag, prompt or "Describe the image.")
try:
ensure_model()
# call llama-cpp model
out = _llama(prompt_text, max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, stop=STOP)
text = out.get("choices", [{}])[0].get("text", "")
return text.strip()
except Exception as e:
return "Inference error: " + str(e)
iface = gr.Interface(
fn=generate_caption_from_url,
inputs=[gr.Textbox(label="Image / GIF / MP4 URL"), gr.Textbox(label="Prompt", value="Describe the image.")],
outputs=gr.Textbox(label="Generated caption"),
title="JoyCaption (minimal GGUF, auto-rebuild)",
description="No tokens required. Downloads a public GGUF and runs locally via llama-cpp."
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)