#!/usr/bin/env python3
import os, io, re, sys, subprocess, hashlib, pathlib, time
from typing import Optional
import requests
from PIL import Image, ImageSequence
import gradio as gr

MODEL_DIR = pathlib.Path("model")
MODEL_DIR.mkdir(exist_ok=True, parents=True)

# Public mradermacher GGUF links (no tokens)
PRIMARY_URL = "https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-q4_k_s.gguf"
FALLBACK_URL = "https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-q4_k_m.gguf"

PRIMARY_NAME = MODEL_DIR / "llama-joycaption-q4_k_s.gguf"
FALLBACK_NAME = MODEL_DIR / "llama-joycaption-q4_k_m.gguf"

# Generation params
MAX_TOKENS = 128
TEMPERATURE = 0.2
TOP_P = 0.95
STOP = ["\n"]

def download_file(url: str, dest: pathlib.Path, timeout=120):
    if dest.exists():
        return
    print("Downloading", url)
    with requests.get(url, stream=True, timeout=timeout) as r:
        r.raise_for_status()
        total = int(r.headers.get("content-length", 0) or 0)
        done = 0
        with open(dest, "wb") as f:
            for chunk in r.iter_content(8192):
                if not chunk: continue
                f.write(chunk)
                done += len(chunk)
                if total:
                    pct = done * 100 // total
                    print(f"\r{dest.name}: {pct}% ", end="", flush=True)
    print()

def mp4_to_gif(mp4_bytes: bytes) -> bytes:
    files = {"new-file": ("video.mp4", mp4_bytes, "video/mp4")}
    resp = requests.post("https://s.ezgif.com/video-to-gif", files=files, data={"file":"video.mp4"}, timeout=120)
    resp.raise_for_status()
    m = re.search(r'<img[^>]+src="([^"]+\.gif)"', resp.text) or re.search(r'src="([^"]+?/tmp/[^"]+\.gif)"', resp.text)
    if not m:
        raise RuntimeError("GIF URL not found")
    gif_url = m.group(1)
    if gif_url.startswith("//"): gif_url = "https:" + gif_url
    elif gif_url.startswith("/"): gif_url = "https://s.ezgif.com" + gif_url
    r2 = requests.get(gif_url, timeout=60); r2.raise_for_status(); return r2.content

def load_first_frame(raw: bytes):
    img = Image.open(io.BytesIO(raw))
    if getattr(img, "is_animated", False):
        img = next(ImageSequence.Iterator(img))
    if img.mode != "RGB": img = img.convert("RGB")
    return img

def rebuild_llama_cpp():
    env = os.environ.copy()
    env["PIP_NO_BINARY"] = "llama-cpp-python"
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"], env=env)
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "cmake", "wheel", "setuptools"], env=env)
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "llama-cpp-python"], env=env)

_llama = None

def ensure_model():
    global _llama
    if _llama is not None:
        return
    # try primary then fallback
    for url, path in ((PRIMARY_URL, PRIMARY_NAME), (FALLBACK_URL, FALLBACK_NAME)):
        try:
            download_file(url, path)
            import importlib
            llama_cpp = importlib.import_module("llama_cpp")
            Llama = getattr(llama_cpp, "Llama")
            print("Loading", path)
            _llama = Llama(model_path=str(path), n_ctx=2048, n_gpu_layers=0, verbose=False)
            print("Loaded model:", path.name)
            return
        except Exception as e:
            print("Load failed for", path.name, ":", e)
    # rebuild once
    try:
        print("Rebuilding llama-cpp-python from source...")
        rebuild_llama_cpp()
    except Exception as e:
        raise RuntimeError("Rebuild failed: " + str(e))
    # retry primary
    try:
        import importlib
        download_file(PRIMARY_URL, PRIMARY_NAME)
        llama_cpp = importlib.reload(importlib.import_module("llama_cpp"))
        Llama = getattr(llama_cpp, "Llama")
        _llama = Llama(model_path=str(PRIMARY_NAME), n_ctx=2048, n_gpu_layers=0, verbose=False)
        print("Loaded after rebuild.")
        return
    except Exception as e:
        raise RuntimeError("Load after rebuild failed: " + str(e))

def build_prompt(img_tag: str, user_prompt: str):
    # Minimal prompt: image placeholder and the user request
    return f"<image>{img_tag}</image>\n{user_prompt}\nAnswer:"

def generate_caption_from_url(url: str, prompt: str="Describe the image."):
    if not url:
        return "No URL provided."
    try:
        r = requests.get(url, timeout=30); r.raise_for_status(); raw = r.content
    except Exception as e:
        return "Download error: " + str(e)
    try:
        lower = url.lower().split("?")[0]
        if lower.endswith(".mp4") or raw[:16].lower().find(b"ftyp") != -1:
            try:
                raw = mp4_to_gif(raw)
            except Exception as e:
                return "MP4→GIF conversion failed: " + str(e)
        img = load_first_frame(raw)
    except Exception as e:
        return "Image processing error: " + str(e)

    try:
        img = img.resize((512,512), resample=Image.BICUBIC)
    except Exception:
        pass

    # create a tiny base64 tag to signal image presence (model must understand this format)
    import base64
    buf = io.BytesIO()
    img.save(buf, format="PNG")
    b64 = base64.b64encode(buf.getvalue()).decode()
    img_tag = b64  # minimal

    prompt_text = build_prompt(img_tag, prompt or "Describe the image.")
    try:
        ensure_model()
        # call llama-cpp model
        out = _llama(prompt_text, max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, stop=STOP)
        text = out.get("choices", [{}])[0].get("text", "")
        return text.strip()
    except Exception as e:
        return "Inference error: " + str(e)

iface = gr.Interface(
    fn=generate_caption_from_url,
    inputs=[gr.Textbox(label="Image / GIF / MP4 URL"), gr.Textbox(label="Prompt", value="Describe the image.")],
    outputs=gr.Textbox(label="Generated caption"),
    title="JoyCaption (minimal GGUF, auto-rebuild)",
    description="No tokens required. Downloads a public GGUF and runs locally via llama-cpp."
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)