Spaces:

Hug0endob
/

Joycaption-basic

Build error

App Files Files Community

Joycaption-basic / app.py

Hug0endob

Update app.py

b3b505f verified about 1 month ago

raw

history blame contribute delete

6.17 kB

	#!/usr/bin/env python3
	import os, io, re, sys, subprocess, hashlib, pathlib, time
	from typing import Optional
	import requests
	from PIL import Image, ImageSequence
	import gradio as gr

	MODEL_DIR = pathlib.Path("model")
	MODEL_DIR.mkdir(exist_ok=True, parents=True)

	# Public mradermacher GGUF links (no tokens)
	PRIMARY_URL = "https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-q4_k_s.gguf"
	FALLBACK_URL = "https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-q4_k_m.gguf"

	PRIMARY_NAME = MODEL_DIR / "llama-joycaption-q4_k_s.gguf"
	FALLBACK_NAME = MODEL_DIR / "llama-joycaption-q4_k_m.gguf"

	# Generation params
	MAX_TOKENS = 128
	TEMPERATURE = 0.2
	TOP_P = 0.95
	STOP = ["\n"]

	def download_file(url: str, dest: pathlib.Path, timeout=120):
	if dest.exists():
	return
	print("Downloading", url)
	with requests.get(url, stream=True, timeout=timeout) as r:
	r.raise_for_status()
	total = int(r.headers.get("content-length", 0) or 0)
	done = 0
	with open(dest, "wb") as f:
	for chunk in r.iter_content(8192):
	if not chunk: continue
	f.write(chunk)
	done += len(chunk)
	if total:
	pct = done * 100 // total
	print(f"\r{dest.name}: {pct}% ", end="", flush=True)
	print()

	def mp4_to_gif(mp4_bytes: bytes) -> bytes:
	files = {"new-file": ("video.mp4", mp4_bytes, "video/mp4")}
	resp = requests.post("https://s.ezgif.com/video-to-gif", files=files, data={"file":"video.mp4"}, timeout=120)
	resp.raise_for_status()
	m = re.search(r'<img[^>]+src="([^"]+\.gif)"', resp.text) or re.search(r'src="([^"]+?/tmp/[^"]+\.gif)"', resp.text)
	if not m:
	raise RuntimeError("GIF URL not found")
	gif_url = m.group(1)
	if gif_url.startswith("//"): gif_url = "https:" + gif_url
	elif gif_url.startswith("/"): gif_url = "https://s.ezgif.com" + gif_url
	r2 = requests.get(gif_url, timeout=60); r2.raise_for_status(); return r2.content

	def load_first_frame(raw: bytes):
	img = Image.open(io.BytesIO(raw))
	if getattr(img, "is_animated", False):
	img = next(ImageSequence.Iterator(img))
	if img.mode != "RGB": img = img.convert("RGB")
	return img

	def rebuild_llama_cpp():
	env = os.environ.copy()
	env["PIP_NO_BINARY"] = "llama-cpp-python"
	subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"], env=env)
	subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "cmake", "wheel", "setuptools"], env=env)
	subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "llama-cpp-python"], env=env)

	_llama = None

	def ensure_model():
	global _llama
	if _llama is not None:
	return
	# try primary then fallback
	for url, path in ((PRIMARY_URL, PRIMARY_NAME), (FALLBACK_URL, FALLBACK_NAME)):
	try:
	download_file(url, path)
	import importlib
	llama_cpp = importlib.import_module("llama_cpp")
	Llama = getattr(llama_cpp, "Llama")
	print("Loading", path)
	_llama = Llama(model_path=str(path), n_ctx=2048, n_gpu_layers=0, verbose=False)
	print("Loaded model:", path.name)
	return
	except Exception as e:
	print("Load failed for", path.name, ":", e)
	# rebuild once
	try:
	print("Rebuilding llama-cpp-python from source...")
	rebuild_llama_cpp()
	except Exception as e:
	raise RuntimeError("Rebuild failed: " + str(e))
	# retry primary
	try:
	import importlib
	download_file(PRIMARY_URL, PRIMARY_NAME)
	llama_cpp = importlib.reload(importlib.import_module("llama_cpp"))
	Llama = getattr(llama_cpp, "Llama")
	_llama = Llama(model_path=str(PRIMARY_NAME), n_ctx=2048, n_gpu_layers=0, verbose=False)
	print("Loaded after rebuild.")
	return
	except Exception as e:
	raise RuntimeError("Load after rebuild failed: " + str(e))

	def build_prompt(img_tag: str, user_prompt: str):
	# Minimal prompt: image placeholder and the user request
	return f"<image>{img_tag}</image>\n{user_prompt}\nAnswer:"

	def generate_caption_from_url(url: str, prompt: str="Describe the image."):
	if not url:
	return "No URL provided."
	try:
	r = requests.get(url, timeout=30); r.raise_for_status(); raw = r.content
	except Exception as e:
	return "Download error: " + str(e)
	try:
	lower = url.lower().split("?")[0]
	if lower.endswith(".mp4") or raw[:16].lower().find(b"ftyp") != -1:
	try:
	raw = mp4_to_gif(raw)
	except Exception as e:
	return "MP4→GIF conversion failed: " + str(e)
	img = load_first_frame(raw)
	except Exception as e:
	return "Image processing error: " + str(e)

	try:
	img = img.resize((512,512), resample=Image.BICUBIC)
	except Exception:
	pass

	# create a tiny base64 tag to signal image presence (model must understand this format)
	import base64
	buf = io.BytesIO()
	img.save(buf, format="PNG")
	b64 = base64.b64encode(buf.getvalue()).decode()
	img_tag = b64 # minimal

	prompt_text = build_prompt(img_tag, prompt or "Describe the image.")
	try:
	ensure_model()
	# call llama-cpp model
	out = _llama(prompt_text, max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, stop=STOP)
	text = out.get("choices", [{}])[0].get("text", "")
	return text.strip()
	except Exception as e:
	return "Inference error: " + str(e)

	iface = gr.Interface(
	fn=generate_caption_from_url,
	inputs=[gr.Textbox(label="Image / GIF / MP4 URL"), gr.Textbox(label="Prompt", value="Describe the image.")],
	outputs=gr.Textbox(label="Generated caption"),
	title="JoyCaption (minimal GGUF, auto-rebuild)",
	description="No tokens required. Downloads a public GGUF and runs locally via llama-cpp."
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)