Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| import os, io, re, sys, subprocess, hashlib, pathlib, time | |
| from typing import Optional | |
| import requests | |
| from PIL import Image, ImageSequence | |
| import gradio as gr | |
| MODEL_DIR = pathlib.Path("model") | |
| MODEL_DIR.mkdir(exist_ok=True, parents=True) | |
| # Public mradermacher GGUF links (no tokens) | |
| PRIMARY_URL = "https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-q4_k_s.gguf" | |
| FALLBACK_URL = "https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-q4_k_m.gguf" | |
| PRIMARY_NAME = MODEL_DIR / "llama-joycaption-q4_k_s.gguf" | |
| FALLBACK_NAME = MODEL_DIR / "llama-joycaption-q4_k_m.gguf" | |
| # Generation params | |
| MAX_TOKENS = 128 | |
| TEMPERATURE = 0.2 | |
| TOP_P = 0.95 | |
| STOP = ["\n"] | |
| def download_file(url: str, dest: pathlib.Path, timeout=120): | |
| if dest.exists(): | |
| return | |
| print("Downloading", url) | |
| with requests.get(url, stream=True, timeout=timeout) as r: | |
| r.raise_for_status() | |
| total = int(r.headers.get("content-length", 0) or 0) | |
| done = 0 | |
| with open(dest, "wb") as f: | |
| for chunk in r.iter_content(8192): | |
| if not chunk: continue | |
| f.write(chunk) | |
| done += len(chunk) | |
| if total: | |
| pct = done * 100 // total | |
| print(f"\r{dest.name}: {pct}% ", end="", flush=True) | |
| print() | |
| def mp4_to_gif(mp4_bytes: bytes) -> bytes: | |
| files = {"new-file": ("video.mp4", mp4_bytes, "video/mp4")} | |
| resp = requests.post("https://s.ezgif.com/video-to-gif", files=files, data={"file":"video.mp4"}, timeout=120) | |
| resp.raise_for_status() | |
| m = re.search(r'<img[^>]+src="([^"]+\.gif)"', resp.text) or re.search(r'src="([^"]+?/tmp/[^"]+\.gif)"', resp.text) | |
| if not m: | |
| raise RuntimeError("GIF URL not found") | |
| gif_url = m.group(1) | |
| if gif_url.startswith("//"): gif_url = "https:" + gif_url | |
| elif gif_url.startswith("/"): gif_url = "https://s.ezgif.com" + gif_url | |
| r2 = requests.get(gif_url, timeout=60); r2.raise_for_status(); return r2.content | |
| def load_first_frame(raw: bytes): | |
| img = Image.open(io.BytesIO(raw)) | |
| if getattr(img, "is_animated", False): | |
| img = next(ImageSequence.Iterator(img)) | |
| if img.mode != "RGB": img = img.convert("RGB") | |
| return img | |
| def rebuild_llama_cpp(): | |
| env = os.environ.copy() | |
| env["PIP_NO_BINARY"] = "llama-cpp-python" | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"], env=env) | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "cmake", "wheel", "setuptools"], env=env) | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "llama-cpp-python"], env=env) | |
| _llama = None | |
| def ensure_model(): | |
| global _llama | |
| if _llama is not None: | |
| return | |
| # try primary then fallback | |
| for url, path in ((PRIMARY_URL, PRIMARY_NAME), (FALLBACK_URL, FALLBACK_NAME)): | |
| try: | |
| download_file(url, path) | |
| import importlib | |
| llama_cpp = importlib.import_module("llama_cpp") | |
| Llama = getattr(llama_cpp, "Llama") | |
| print("Loading", path) | |
| _llama = Llama(model_path=str(path), n_ctx=2048, n_gpu_layers=0, verbose=False) | |
| print("Loaded model:", path.name) | |
| return | |
| except Exception as e: | |
| print("Load failed for", path.name, ":", e) | |
| # rebuild once | |
| try: | |
| print("Rebuilding llama-cpp-python from source...") | |
| rebuild_llama_cpp() | |
| except Exception as e: | |
| raise RuntimeError("Rebuild failed: " + str(e)) | |
| # retry primary | |
| try: | |
| import importlib | |
| download_file(PRIMARY_URL, PRIMARY_NAME) | |
| llama_cpp = importlib.reload(importlib.import_module("llama_cpp")) | |
| Llama = getattr(llama_cpp, "Llama") | |
| _llama = Llama(model_path=str(PRIMARY_NAME), n_ctx=2048, n_gpu_layers=0, verbose=False) | |
| print("Loaded after rebuild.") | |
| return | |
| except Exception as e: | |
| raise RuntimeError("Load after rebuild failed: " + str(e)) | |
| def build_prompt(img_tag: str, user_prompt: str): | |
| # Minimal prompt: image placeholder and the user request | |
| return f"<image>{img_tag}</image>\n{user_prompt}\nAnswer:" | |
| def generate_caption_from_url(url: str, prompt: str="Describe the image."): | |
| if not url: | |
| return "No URL provided." | |
| try: | |
| r = requests.get(url, timeout=30); r.raise_for_status(); raw = r.content | |
| except Exception as e: | |
| return "Download error: " + str(e) | |
| try: | |
| lower = url.lower().split("?")[0] | |
| if lower.endswith(".mp4") or raw[:16].lower().find(b"ftyp") != -1: | |
| try: | |
| raw = mp4_to_gif(raw) | |
| except Exception as e: | |
| return "MP4→GIF conversion failed: " + str(e) | |
| img = load_first_frame(raw) | |
| except Exception as e: | |
| return "Image processing error: " + str(e) | |
| try: | |
| img = img.resize((512,512), resample=Image.BICUBIC) | |
| except Exception: | |
| pass | |
| # create a tiny base64 tag to signal image presence (model must understand this format) | |
| import base64 | |
| buf = io.BytesIO() | |
| img.save(buf, format="PNG") | |
| b64 = base64.b64encode(buf.getvalue()).decode() | |
| img_tag = b64 # minimal | |
| prompt_text = build_prompt(img_tag, prompt or "Describe the image.") | |
| try: | |
| ensure_model() | |
| # call llama-cpp model | |
| out = _llama(prompt_text, max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, stop=STOP) | |
| text = out.get("choices", [{}])[0].get("text", "") | |
| return text.strip() | |
| except Exception as e: | |
| return "Inference error: " + str(e) | |
| iface = gr.Interface( | |
| fn=generate_caption_from_url, | |
| inputs=[gr.Textbox(label="Image / GIF / MP4 URL"), gr.Textbox(label="Prompt", value="Describe the image.")], | |
| outputs=gr.Textbox(label="Generated caption"), | |
| title="JoyCaption (minimal GGUF, auto-rebuild)", | |
| description="No tokens required. Downloads a public GGUF and runs locally via llama-cpp." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch(server_name="0.0.0.0", server_port=7860) | |