Hug0endob commited on
Commit
31baa74
·
verified ·
1 Parent(s): cce9dc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +523 -877
app.py CHANGED
@@ -1,21 +1,20 @@
1
  import os
2
- import json
3
  import shutil
4
  import subprocess
5
  import tempfile
6
  import base64
7
- import asyncio
8
- import concurrent.futures
9
  from io import BytesIO
10
- from typing import List, Tuple, Optional, Callable
11
-
12
  import requests
13
  from PIL import Image, ImageFile, UnidentifiedImageError
14
  import gradio as gr
 
 
 
 
15
 
16
- # --------------------------------------------------------------------------- #
17
- # Constants & basic helpers
18
- # --------------------------------------------------------------------------- #
19
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
20
  PIXTRAL_MODEL = "pixtral-12b-2409"
21
  VIDEO_MODEL = "voxtral-mini-latest"
@@ -25,58 +24,54 @@ IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
25
  VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
26
 
27
  SYSTEM_INSTRUCTION = (
28
- "You are a clinical visual analyst. Only analyse media actually provided (image or video). "
29
- "If analysing a video, do it as a whole, not framebyframe. Produce a concise, factual narrative "
30
- "describing observable features (skin, posture, hygiene, anatomy, movements, timestamps, notable events). "
31
- "Do not hallucinate sensory details."
 
 
32
  )
33
 
34
  ImageFile.LOAD_TRUNCATED_IMAGES = True
35
  Image.MAX_IMAGE_PIXELS = 10000 * 10000
36
 
37
- # --------------------------------------------------------------------------- #
38
- # Mistral client utilities
39
- # --------------------------------------------------------------------------- #
40
  try:
41
  from mistralai import Mistral
42
- except Exception: # pragma: no cover
43
  Mistral = None
44
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
 
 
 
46
  def get_client(key: Optional[str] = None):
47
  api_key = (key or "").strip() or DEFAULT_KEY
48
- if not api_key:
49
- raise RuntimeError("MISTRAL_API_KEY not set")
50
  if Mistral is None:
51
- raise RuntimeError("mistralai library not installed")
 
 
52
  return Mistral(api_key=api_key)
53
 
54
-
55
- def _progress(p: float, desc: str = None, fn: Callable = None):
56
- """Helper to call a Gradio progress function if supplied."""
57
- if fn is None:
58
- return
59
- try:
60
- if desc is None:
61
- fn(p)
62
- else:
63
- fn(p, desc)
64
- except Exception:
65
- pass
66
-
67
-
68
- # --------------------------------------------------------------------------- #
69
- # HTTP helpers (safe HEAD / GET)
70
- # --------------------------------------------------------------------------- #
71
  def is_remote(src: str) -> bool:
72
- return src.startswith(("http://", "https://"))
73
-
74
 
75
  def ext_from_src(src: str) -> str:
76
- _, ext = os.path.splitext(src.split("?")[0])
 
77
  return ext.lower()
78
 
79
-
80
  def safe_head(url: str, timeout: int = 6):
81
  try:
82
  r = requests.head(url, timeout=timeout, allow_redirects=True)
@@ -84,33 +79,73 @@ def safe_head(url: str, timeout: int = 6):
84
  except Exception:
85
  return None
86
 
87
-
88
  def safe_get(url: str, timeout: int = 15):
89
  r = requests.get(url, timeout=timeout)
90
  r.raise_for_status()
91
  return r
92
 
93
-
94
- # --------------------------------------------------------------------------- #
95
- # Temp‑file helpers
96
- # --------------------------------------------------------------------------- #
97
  def _temp_file(data: bytes, suffix: str) -> str:
98
- """Write *data* to a temporary file and return its path."""
 
 
 
99
  fd, path = tempfile.mkstemp(suffix=suffix)
100
  os.close(fd)
101
  with open(path, "wb") as f:
102
  f.write(data)
 
103
  return path
104
 
105
-
106
- def save_bytes_to_temp(b: bytes, suffix: str) -> str:
107
- return _temp_file(b, suffix)
108
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
111
  img = Image.open(BytesIO(img_bytes))
112
- if getattr(img, "is_animated", False):
113
- img.seek(0)
 
 
 
114
  if img.mode != "RGB":
115
  img = img.convert("RGB")
116
  h = base_h
@@ -120,952 +155,563 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
120
  img.save(buf, format="JPEG", quality=85)
121
  return buf.getvalue()
122
 
123
-
124
  def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
125
- return f"data:{mime};base64," + base64.b64encode(b).decode()
126
-
127
-
128
- # --------------------------------------------------------------------------- #
129
- # Media‑type detection
130
- # --------------------------------------------------------------------------- #
131
- def determine_media_type(src: str, progress: Callable = None) -> Tuple[bool, bool]:
132
- is_img = ext_from_src(src) in IMAGE_EXTS
133
- is_vid = ext_from_src(src) in VIDEO_EXTS
134
- if is_remote(src):
135
- head = safe_head(src)
136
- if head:
137
- ct = (head.headers.get("content-type") or "").lower()
138
- if ct.startswith("image/"):
139
- is_img, is_vid = True, False
140
- elif ct.startswith("video/"):
141
- is_vid, is_img = True, False
142
- _progress(0.02, "Determined media type", progress)
143
- return is_img, is_vid
144
-
145
-
146
- # --------------------------------------------------------------------------- #
147
- # Download helpers – stream large files directly to a temp file
148
- # --------------------------------------------------------------------------- #
149
- def download_to_temp(src: str, progress: Callable = None) -> str:
150
- """Return a temporary file path containing the downloaded content."""
151
- _progress(0.05, "Downloading...", progress)
152
- if is_remote(src):
153
- # stream to avoid loading huge files into RAM
154
- r = requests.get(src, stream=True, timeout=60)
155
- r.raise_for_status()
156
- suffix = ext_from_src(src) or ".bin"
157
- fd, path = tempfile.mkstemp(suffix=suffix)
158
  os.close(fd)
159
- with open(path, "wb") as f:
160
- for chunk in r.iter_content(8192):
161
- if chunk:
162
- f.write(chunk)
163
- return path
164
- else:
165
- if not os.path.exists(src):
166
- raise FileNotFoundError(f"Local path does not exist: {src}")
167
- return src
168
-
169
-
170
- # --------------------------------------------------------------------------- #
171
- # ffprobe / conversion helpers (browser‑playable video)
172
- # --------------------------------------------------------------------------- #
173
- def _ffprobe_streams(path: str) -> Optional[dict]:
174
- if not FFMPEG_BIN:
175
- return None
176
- ffprobe = FFMPEG_BIN.replace("ffmpeg", "ffprobe")
177
- if not shutil.which(ffprobe):
178
- ffprobe = "ffprobe"
179
- cmd = [
180
- ffprobe,
181
- "-v",
182
- "error",
183
- "-print_format",
184
- "json",
185
- "-show_streams",
186
- "-show_format",
187
- path,
188
- ]
189
- try:
190
- out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
191
- return json.loads(out)
192
- except Exception:
193
- return None
194
-
195
-
196
- def _is_browser_playable(path: str) -> bool:
197
- """True if the file is MP4 (or MOV) with an H.264/H.265 video stream."""
198
- if not path:
199
- return False
200
- ext = path.lower()
201
- if any(ext.endswith(e) for e in (".mp4", ".m4v", ".mov")):
202
- info = _ffprobe_streams(path)
203
- if not info:
204
- return ext.endswith(".mp4")
205
- for s in info.get("streams", []):
206
- if s.get("codec_type") == "video" and s.get("codec_name") in (
207
- "h264",
208
- "h265",
209
- "avc1",
210
- ):
211
- return True
212
- # fallback: any video stream is acceptable
213
- info = _ffprobe_streams(path)
214
- if not info:
215
- return False
216
- return any(s.get("codec_type") == "video" for s in info.get("streams", []))
217
-
218
-
219
- def _convert_video_for_preview_if_needed(path: str, progress: Callable = None) -> str:
220
- """Return a path that Gradio can play (MP4 + H.264/AAC)."""
221
- if not FFMPEG_BIN or not os.path.exists(path):
222
- return path
223
- if _is_browser_playable(path):
224
- return path
225
-
226
- _progress(0.70, "Re‑encoding video for preview", progress)
227
- out_fd, out_path = tempfile.mkstemp(suffix=".mp4")
228
- os.close(out_fd)
229
- cmd = [
230
- FFMPEG_BIN,
231
- "-y",
232
- "-i",
233
- path,
234
- "-c:v",
235
- "libx264",
236
- "-preset",
237
- "veryfast",
238
- "-crf",
239
- "28",
240
- "-c:a",
241
- "aac",
242
- "-movflags",
243
- "+faststart",
244
- out_path,
245
- ]
246
- try:
247
- subprocess.run(
248
- cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60
249
- )
250
- return out_path
251
- except Exception:
252
  try:
253
- os.remove(out_path)
 
 
 
 
 
254
  except Exception:
255
  pass
256
- return path
257
-
258
-
259
- # --------------------------------------------------------------------------- #
260
- # Mistral interaction (upload + chat)
261
- # --------------------------------------------------------------------------- #
262
- def upload_file_to_mistral(
263
- client,
264
- path: str,
265
- filename: str | None = None,
266
- purpose: str = "batch",
267
- progress: Callable = None,
268
- ) -> str:
269
- """Upload a file via the REST endpoint and return its file‑id."""
270
- fname = filename or os.path.basename(path)
271
-
272
- # Build a short list of plausible purposes
273
- purposes = [purpose]
274
- ext = os.path.splitext(fname)[1].lower()
275
- if ext in {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff"}:
276
- purposes.append("image")
277
- for p in ("batch", "fine-tune", "image"):
278
- if p not in purposes:
279
- purposes.append(p)
280
-
281
- api_key = getattr(client, "api_key", "") or DEFAULT_KEY
282
- if not api_key:
283
- raise RuntimeError("MISTRAL_API_KEY missing")
284
-
285
- url = "https://api.mistral.ai/v1/files"
286
- headers = {"Authorization": f"Bearer {api_key}"}
287
-
288
- last_err = None
289
- for cur_purpose in purposes:
290
- try:
291
- with open(path, "rb") as fh:
292
- files = {"file": (fname, fh)}
293
- data = {"purpose": cur_purpose}
294
- resp = requests.post(
295
- url, headers=headers, files=files, data=data, timeout=120
296
- )
297
- resp.raise_for_status()
298
- payload = resp.json()
299
- fid = payload.get("id") or payload.get("data", [{}])[0].get("id")
300
- if fid:
301
- _progress(0.65, f"Uploaded (purpose={cur_purpose})", progress)
302
- return fid
303
- raise RuntimeError("Upload succeeded but no file id returned")
304
- except requests.exceptions.HTTPError as he:
305
- if he.response.status_code == 422:
306
- last_err = he
307
- continue
308
- raise RuntimeError(f"Upload failed: {he}") from he
309
- except Exception as e:
310
- last_err = e
311
- raise RuntimeError(f"Upload failed: {e}") from e
312
-
313
- raise RuntimeError(f"All upload attempts failed. Last error: {last_err}")
314
-
315
 
316
- def chat_complete(client, model: str, messages, progress: Callable = None) -> str:
317
- """Send a chat request (non‑streaming) and return the model’s text response."""
318
- _progress(0.60, "Calling model", progress)
319
  try:
 
 
320
  if hasattr(client, "chat") and hasattr(client.chat, "complete"):
321
- try:
322
- res = client.chat.complete(
323
- model=model, messages=messages, timeout=120, stream=False
324
- )
325
- except TypeError:
326
- # SDK variations
327
- res = client.chat.complete(
328
- model=model, messages=messages, request_timeout=120, stream=False
329
- )
330
  else:
331
- # raw REST fallback
332
  api_key = getattr(client, "api_key", "") or DEFAULT_KEY
333
  url = "https://api.mistral.ai/v1/chat/completions"
334
- headers = {
335
- "Authorization": f"Bearer {api_key}",
336
- "Content-Type": "application/json",
337
- }
338
- r = requests.post(
339
- url,
340
- json={"model": model, "messages": messages},
341
- headers=headers,
342
- timeout=120,
343
- )
344
  r.raise_for_status()
345
  res = r.json()
 
 
 
 
 
 
 
 
 
 
 
346
  except Exception as e:
347
  return f"Error during model call: {e}"
348
 
349
- _progress(0.80, "Parsing response", progress)
350
- choices = getattr(res, "choices", None) or (
351
- res.get("choices") if isinstance(res, dict) else []
352
- )
353
- if not choices:
354
- return f"Empty response: {res}"
355
- first = choices[0]
356
- msg = (
357
- first.get("message")
358
- if isinstance(first, dict)
359
- else getattr(first, "message", {})
360
- )
361
- content = (
362
- msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", "")
363
- )
364
- return content.strip() if isinstance(content, str) else str(content)
365
-
366
-
367
- # --------------------------------------------------------------------------- #
368
- # Analysis functions (image & video)
369
- # --------------------------------------------------------------------------- #
370
- def analyze_image_structured(
371
- client, img_bytes: bytes, prompt: str, progress: Callable = None
372
- ) -> str:
373
- _progress(0.30, "Preparing image", progress)
374
- jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
375
- tmp = save_bytes_to_temp(jpeg, ".jpg")
376
  try:
377
- fid = upload_file_to_mistral(
378
- client, tmp, filename="image.jpg", purpose="image", progress=progress
379
- )
380
- finally:
381
- try:
382
- os.remove(tmp)
383
- except Exception:
384
- pass
 
 
 
 
 
385
 
386
- messages = [
387
- {"role": "system", "content": SYSTEM_INSTRUCTION},
388
- {
389
- "role": "user",
390
- "content": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  {"type": "text", "text": prompt},
392
- {"type": "file", "file_id": fid},
393
- ],
394
- },
395
- ]
396
- return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
397
-
 
 
398
 
399
- def analyze_video_cohesive(
400
- client,
401
- video_path: str,
402
- prompt: str,
403
- progress: Callable = None,
404
- model: str = VIDEO_MODEL,
405
- ) -> str:
406
- """Full‑video analysis; falls back to frame‑based analysis on upload failure."""
407
  try:
408
- _progress(0.30, "Uploading video", progress)
409
- fid = upload_file_to_mistral(
410
- client,
411
- video_path,
412
- filename=os.path.basename(video_path),
413
- purpose="batch",
414
- progress=progress,
415
  )
416
  messages = [
417
  {"role": "system", "content": SYSTEM_INSTRUCTION},
418
- {
419
- "role": "user",
420
- "content": [
421
- {
422
- "type": "text",
423
- "text": f"{prompt}\n\nAnalyze the whole video and produce a single cohesive narrative.",
424
- },
425
- {"type": "file", "file_id": fid},
426
- ],
427
- },
428
  ]
429
- return chat_complete(client, model, messages, progress=progress)
430
-
431
- except Exception as exc:
432
- # ---- fallback: extract a few representative frames --------------------
433
- _progress(0.35, "Upload failed – extracting frames", progress)
434
- frames = extract_best_frames_bytes(
435
- video_path, sample_count=6, progress=progress
436
- )
437
  if not frames:
438
- return f"Error: upload failed and no frames could be extracted ({exc})"
439
-
440
- # upload each frame and build the message payload
441
- frame_files = []
442
- for i, raw in enumerate(frames, 1):
443
- _progress(
444
- 0.40 + i / len(frames) * 0.15,
445
- f"Uploading frame {i}/{len(frames)}",
446
- progress,
447
- )
448
- tmp = save_bytes_to_temp(convert_to_jpeg_bytes(raw, base_h=720), ".jpg")
449
  try:
450
- fid = upload_file_to_mistral(
451
- client, tmp, f"frame_{i}.jpg", purpose="image", progress=progress
 
 
 
 
 
 
 
452
  )
453
- frame_files.append({"type": "file", "file_id": fid})
454
- finally:
455
- os.remove(tmp)
456
-
457
  messages = [
458
  {"role": "system", "content": SYSTEM_INSTRUCTION},
459
- {
460
- "role": "user",
461
- "content": [
462
- {
463
- "type": "text",
464
- "text": f"{prompt}\n\nConsolidate observations across the provided frames into a single narrative.",
465
- },
466
- *frame_files,
467
- ],
468
- },
469
  ]
470
  return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
471
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
- # --------------------------------------------------------------------------- #
474
- # Gradio UI helpers
475
- # --------------------------------------------------------------------------- #
476
- css = ".preview_media img, .preview_media video {max-width:100%;height:auto;border-radius:6px;}"
477
-
478
-
479
- def _make_preview(url: str, raw: bytes, cache: dict) -> str:
480
- """Create (or reuse) a preview file – image → JPEG, video → MP4 (browser‑playable)."""
481
- if url in cache:
482
- return cache[url]
483
-
484
- if determine_media_type(url)[1]: # video
485
- tmp = _temp_file(raw, suffix=ext_from_src(url) or ".mp4")
486
- preview = _convert_video_for_preview_if_needed(tmp)
487
- os.remove(tmp) # the converted file is a separate temp file
488
- else: # image
489
- preview = _temp_file(convert_to_jpeg_bytes(raw, base_h=1024), suffix=".jpg")
490
- cache[url] = preview
491
- return preview
492
 
 
 
 
 
 
 
 
493
 
494
- def apply_preview(path: str, last_path: str):
495
- """Return Gradio updates for image/video components and a status string."""
496
- if not path or path == last_path:
497
- return gr.update(), gr.update(), ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
 
499
- if any(path.lower().endswith(ext) for ext in IMAGE_EXTS):
500
- return (
501
- gr.update(value=path, visible=True),
502
- gr.update(value=None, visible=False),
503
- "Preview updated.",
504
- )
505
- if any(path.lower().endswith(ext) for ext in VIDEO_EXTS):
506
- return (
507
- gr.update(value=None, visible=False),
508
- gr.update(value=path, visible=True),
509
- "Preview updated.",
510
- )
511
 
512
- # fallback – try to open as image
513
  try:
514
- Image.open(path).verify()
515
- return (
516
- gr.update(value=path, visible=True),
517
- gr.update(value=None, visible=False),
518
- "Preview updated.",
519
- )
520
- except Exception:
521
- return (
522
- gr.update(value=None, visible=False),
523
- gr.update(value=None, visible=False),
524
- "",
525
- )
526
 
 
 
 
527
 
528
  def create_demo():
529
  with gr.Blocks(title="Flux Multimodal", css=css) as demo:
530
  with gr.Row():
531
  with gr.Column(scale=1):
532
- preview_image = gr.Image(
533
- label="Preview Image",
534
- type="filepath",
535
- elem_classes="preview_media",
536
- visible=False,
537
- )
538
- preview_video = gr.Video(
539
- label="Preview Video",
540
- elem_classes="preview_media",
541
- visible=False,
542
- format="mp4",
543
- )
544
- preview_status = gr.Textbox(
545
- label="Preview status",
546
- interactive=False,
547
- lines=1,
548
- value="",
549
- visible=True,
550
- )
551
  with gr.Column(scale=2):
552
- url_input = gr.Textbox(
553
- label="Image / Video URL", placeholder="https://...", lines=1
554
- )
555
  with gr.Accordion("Prompt (optional)", open=False):
556
  custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
557
  with gr.Accordion("Mistral API Key (optional)", open=False):
558
- api_key = gr.Textbox(
559
- label="Mistral API Key", type="password", max_lines=1
560
- )
561
  with gr.Row():
562
  submit_btn = gr.Button("Submit")
563
  clear_btn = gr.Button("Clear")
564
  progress_md = gr.Markdown("Idle")
565
  output_md = gr.Markdown("")
 
 
566
  status_state = gr.State("idle")
567
- # hidden state to pass preview path from worker to frontend
568
  preview_path_state = gr.State("")
569
 
570
- # small helper: fetch URL into bytes with retries and respect Retry-After
571
- def _fetch_with_retries_bytes(
572
- src: str, timeout: int = 15, max_retries: int = 3
573
- ):
574
- attempt = 0
575
- delay = 1.0
576
- while True:
577
- attempt += 1
578
- try:
579
- if is_remote(src):
580
- r = requests.get(src, timeout=timeout, stream=True)
581
- if r.status_code == 200:
582
- return r.content
583
- if r.status_code == 429:
584
- ra = r.headers.get("Retry-After")
585
- try:
586
- delay = float(ra) if ra is not None else delay
587
- except Exception:
588
- pass
589
- r.raise_for_status()
590
- else:
591
- with open(src, "rb") as fh:
592
- return fh.read()
593
- except requests.exceptions.RequestException:
594
- if attempt >= max_retries:
595
- raise
596
- time.sleep(delay)
597
- delay *= 2
598
- except FileNotFoundError:
599
- raise
600
- except Exception:
601
- if attempt >= max_retries:
602
- raise
603
- time.sleep(delay)
604
- delay *= 2
605
-
606
- # create a local temp file for a remote URL and return local path (or None)
607
- def _save_preview_local(src: str) -> Optional[str]:
608
- if not src:
609
- return None
610
- if not is_remote(src):
611
- return src if os.path.exists(src) else None
612
- try:
613
- b = _fetch_with_retries_bytes(src, timeout=15, max_retries=3)
614
- ext = ext_from_src(src) or ".bin"
615
- fd, tmp = tempfile.mkstemp(suffix=ext)
616
- os.close(fd)
617
- with open(tmp, "wb") as fh:
618
- fh.write(b)
619
- return tmp
620
- except Exception:
621
- return None
622
-
623
  def load_preview(url: str):
624
- # returns (preview_image_path, preview_video_path, status_msg)
 
 
 
625
  if not url:
626
- return (
627
- gr.update(value=None, visible=False),
628
- gr.update(value=None, visible=False),
629
- gr.update(value=""),
630
- )
631
  try:
632
- if is_remote(url):
633
- head = safe_head(url)
634
- if head:
635
- ctype = (head.headers.get("content-type") or "").lower()
636
- if ctype.startswith("video/") or any(
637
- url.lower().endswith(ext) for ext in VIDEO_EXTS
638
- ):
639
- local = _save_preview_local(url)
640
- if local:
641
- return (
642
- gr.update(value=None, visible=False),
643
- gr.update(value=local, visible=True),
644
- gr.update(
645
- value=f"Remote video detected (content-type={ctype}). Showing preview if browser-playable."
646
- ),
647
- )
648
- else:
649
- return (
650
- gr.update(value=None, visible=False),
651
- gr.update(value=None, visible=False),
652
- gr.update(
653
- value=f"Remote video detected but preview download failed (content-type={ctype})."
654
- ),
655
- )
656
- local = _save_preview_local(url)
657
- if not local:
658
- return (
659
- gr.update(value=None, visible=False),
660
- gr.update(value=None, visible=False),
661
- gr.update(
662
- value="Preview load failed: could not fetch resource."
663
- ),
664
- )
665
  try:
666
- img = Image.open(local)
667
- if getattr(img, "is_animated", False):
668
- img.seek(0)
669
- return (
670
- gr.update(value=local, visible=True),
671
- gr.update(value=None, visible=False),
672
- gr.update(value="Image preview loaded."),
673
- )
674
- except UnidentifiedImageError:
675
- if any(local.lower().endswith(ext) for ext in VIDEO_EXTS) or True:
676
- return (
677
- gr.update(value=None, visible=False),
678
- gr.update(value=local, visible=True),
679
- gr.update(
680
- value="Non-image file — showing as video preview if playable."
681
- ),
682
- )
683
- return (
684
- gr.update(value=None, visible=False),
685
- gr.update(value=None, visible=False),
686
- gr.update(
687
- value="Preview load failed: file is not a valid image."
688
- ),
689
- )
690
  except Exception as e:
691
- return (
692
- gr.update(value=None, visible=False),
693
- gr.update(value=None, visible=False),
694
- gr.update(value=f"Preview load failed: {e}"),
695
- )
696
 
 
697
  url_input.change(
698
  fn=load_preview,
699
  inputs=[url_input],
700
- outputs=[preview_image, preview_video, preview_status],
701
  )
702
 
703
- def clear_all():
704
- return "", None, None, "idle", "Idle", "", ""
 
 
 
 
 
 
 
 
 
705
 
706
  clear_btn.click(
707
  fn=clear_all,
708
- inputs=[],
709
- outputs=[
710
- url_input,
711
- preview_image,
712
- preview_video,
713
- status_state,
714
- progress_md,
715
- output_md,
716
- preview_path_state,
717
- ],
718
  )
719
 
720
- def _convert_video_for_preview(path: str) -> str:
721
- if not FFMPEG_BIN or not os.path.exists(FFMPEG_BIN):
722
- return path
723
- out_fd, out_path = tempfile.mkstemp(suffix=".mp4")
724
- os.close(out_fd)
725
- cmd = [
726
- FFMPEG_BIN,
727
- "-nostdin",
728
- "-y",
729
- "-i",
730
- path,
731
- "-c:v",
732
- "libx264",
733
- "-preset",
734
- "veryfast",
735
- "-crf",
736
- "28",
737
- "-c:a",
738
- "aac",
739
- "-movflags",
740
- "+faststart",
741
- out_path,
742
- ]
743
- try:
744
- subprocess.run(
745
- cmd,
746
- stdout=subprocess.DEVNULL,
747
- stderr=subprocess.DEVNULL,
748
- timeout=60,
749
- )
750
- return out_path
751
- except Exception:
752
- try:
753
- os.remove(out_path)
754
- except Exception:
755
- pass
756
- return path
757
-
758
- # --- Helper: probe codecs via ffprobe; returns dict with streams info or None on failure
759
- def _ffprobe_streams(path: str) -> Optional[dict]:
760
- if not FFMPEG_BIN:
761
- return None
762
- ffprobe = (
763
- FFMPEG_BIN.replace("ffmpeg", "ffprobe")
764
- if "ffmpeg" in FFMPEG_BIN
765
- else "ffprobe"
766
- )
767
- if not shutil.which(ffprobe):
768
- ffprobe = "ffprobe"
769
- cmd = [
770
- ffprobe,
771
- "-v",
772
- "error",
773
- "-print_format",
774
- "json",
775
- "-show_streams",
776
- "-show_format",
777
- path,
778
- ]
779
- try:
780
- out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
781
- return json.loads(out)
782
- except Exception:
783
- return None
784
-
785
- # --- Helper: is file already browser-playable (mp4 container with h264 video and aac audio OR at least playable video)
786
- def _is_browser_playable(path: str) -> bool:
787
- try:
788
- ext = (path or "").lower().split("?")[0]
789
- if any(ext.endswith(e) for e in [".mp4", ".m4v", ".mov"]):
790
- info = _ffprobe_streams(path)
791
- if not info:
792
- # fallback: trust .mp4 if probe failed
793
- return ext.endswith(".mp4")
794
- streams = info.get("streams", [])
795
- v_ok = any(
796
- s.get("codec_name") in ("h264", "h265", "avc1")
797
- and s.get("codec_type") == "video"
798
- for s in streams
799
- )
800
- # audio optional for preview
801
- return bool(v_ok)
802
- # other extensions: probe and accept if any video stream present
803
- info = _ffprobe_streams(path)
804
- if not info:
805
- return False
806
- streams = info.get("streams", [])
807
- return any(s.get("codec_type") == "video" for s in streams)
808
- except Exception:
809
- return False
810
-
811
- # --- Convert only if not browser-playable
812
- def _convert_video_for_preview_if_needed(path: str) -> str:
813
  """
814
- Return a path that the Gradio video component can play.
815
- If the original file is already MP4 with H.264 (or another browser‑compatible codec),
816
- the original path is returned unchanged.
817
- Otherwise the file is re‑encoded to MP4 (H.264 + AAC) and the new path is returned.
818
  """
819
- if not FFMPEG_BIN or not os.path.exists(path):
820
- return path
821
-
822
- # Quick check: extension + ffprobe for codecs
823
- if path.lower().endswith((".mp4", ".m4v", ".mov")):
824
- info = _ffprobe_streams(path)
825
- if info:
826
- codecs = {
827
- s.get("codec_name")
828
- for s in info.get("streams", [])
829
- if s.get("codec_type") == "video"
830
- }
831
- if "h264" in codecs or "h265" in codecs:
832
- return path # already playable
833
-
834
- # Need conversion → write to a new temp MP4
835
- out_fd, out_path = tempfile.mkstemp(suffix=".mp4")
836
- os.close(out_fd)
837
- cmd = [
838
- FFMPEG_BIN,
839
- "-y",
840
- "-i",
841
- path,
842
- "-c:v",
843
- "libx264",
844
- "-preset",
845
- "veryfast",
846
- "-crf",
847
- "28",
848
- "-c:a",
849
- "aac",
850
- "-movflags",
851
- "+faststart",
852
- out_path,
853
- ]
854
- try:
855
- subprocess.run(
856
- cmd,
857
- stdout=subprocess.DEVNULL,
858
- stderr=subprocess.DEVNULL,
859
- timeout=60,
860
- )
861
- return out_path
862
- except Exception:
863
- # If conversion fails, fall back to the original (Gradio will show its own warning)
864
- try:
865
- os.remove(out_path)
866
- except Exception:
867
- pass
868
- return path
869
-
870
- def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
871
- """Return (status, markdown_output, preview_path)."""
872
  try:
873
  if not url:
874
  return "error", "**Error:** No URL provided.", ""
875
 
 
876
  progress(0.02, desc="Checking URL / content‑type")
877
  is_img, is_vid = determine_media_type(url, progress=progress)
878
 
879
  client = get_client(key)
880
- preview_path = ""
881
 
 
 
 
882
  if is_vid:
883
- progress(0.05, desc="Downloading video")
884
- raw = fetch_bytes(url, timeout=120, progress=progress)
885
- if not raw:
886
  return "error", "Failed to download video bytes.", ""
887
 
888
- # write with a proper video extension
889
- tmp_video = _temp_file(
890
- raw, suffix="." + (ext_from_src(url) or "mp4")
891
- )
892
- progress(0.15, desc="Preparing preview")
893
- preview_path = _make_preview(url, raw)
894
 
895
- progress(0.25, desc="Running full‑video analysis")
896
- result = analyze_video_cohesive(
897
- client, tmp_video, prompt, progress=progress
898
- )
899
 
900
- # clean‑up the *raw* temp file (preview may be a different file)
901
- try:
902
- os.remove(tmp_video)
903
- except Exception:
904
- pass
905
 
 
906
  elif is_img:
907
- progress(0.05, desc="Downloading image")
908
- raw = fetch_bytes(url, progress=progress)
909
 
910
- # preview image (always JPEG for consistency)
911
- preview_path = _make_preview(url, raw)
912
 
913
  progress(0.20, desc="Running image analysis")
914
- result = analyze_image_structured(
915
- client, raw, prompt, progress=progress
916
- )
917
 
 
918
  else:
919
- progress(0.07, desc="Downloading unknown media")
920
- raw = fetch_bytes(url, timeout=120, progress=progress)
921
 
922
- # try to open as image
 
923
  try:
924
- Image.open(BytesIO(raw)).verify()
925
- is_img = True
926
  except Exception:
927
- is_img = False
928
-
929
- if is_img:
930
- preview_path = _temp_file(
931
- convert_to_jpeg_bytes(raw, base_h=1024), suffix=".jpg"
932
- )
933
- result = analyze_image_structured(
934
- client, raw, prompt, progress=progress
935
- )
936
- else:
937
- tmp_vid = _temp_file(raw, suffix=ext_from_src(url) or ".mp4")
938
- preview_path = _convert_video_for_preview_if_needed(tmp_vid)
939
- result = analyze_video_cohesive(
940
- client, tmp_vid, prompt, progress=progress
941
- )
942
- try:
943
- os.remove(tmp_vid)
944
- except Exception:
945
- pass
946
-
947
- status = (
948
- "done"
949
- if not (
950
- isinstance(result, str) and result.lower().startswith("error")
951
- )
952
- else "error"
953
- )
954
- return (
955
- status,
956
- result if isinstance(result, str) else str(result),
957
- preview_path or "",
958
- )
959
 
960
- except Exception as exc:
961
- return "error", f"Unexpected worker error: {exc}", ""
 
 
 
 
 
 
 
 
 
 
 
962
 
963
- def _start_processing(url, prompt, key):
964
- return "busy", None, None
965
 
 
 
 
 
 
 
 
 
 
 
 
966
  submit_btn.click(
967
  fn=worker,
968
  inputs=[url_input, custom_prompt, api_key],
969
- outputs=[status_state, output_md, preview_path_state],
970
  show_progress="full",
971
  show_progress_on=progress_md,
972
  )
973
 
974
- def _btn_label_for_status(s):
975
- labels = {
976
- "idle": "Submit",
977
- "busy": "Processing...",
978
- "done": "Done!",
979
- "error": "Retry",
980
- }
981
- return labels.get(s, "Submit")
982
-
983
- status_state.change(
984
- fn=lambda s: _btn_label_for_status(s),
985
- inputs=[status_state],
986
- outputs=[submit_btn],
987
- )
988
 
 
989
  def status_to_progress_text(s):
990
- return {
991
- "idle": "Idle",
992
- "busy": "Processing…",
993
- "done": "Completed",
994
- "error": "Error — see output",
995
- }.get(s, s)
996
-
997
- status_state.change(
998
- fn=status_to_progress_text, inputs=[status_state], outputs=[progress_md]
999
- )
1000
 
1001
- preview_cache = {}
1002
-
1003
- def _make_preview(url: str, raw: bytes) -> str:
1004
- if url in preview_cache:
1005
- return preview_cache[url][0]
1006
-
1007
- if determine_media_type(url)[1]:
1008
- tmp = _temp_file(raw, suffix=ext_from_src(url) or ".mp4")
1009
- preview = _convert_video_for_preview_if_needed(tmp)
1010
- preview_cache[url] = (preview, True)
1011
- else:
1012
- preview = _temp_file(
1013
- convert_to_jpeg_bytes(raw, base_h=1024), suffix=".jpg"
1014
- )
1015
- preview_cache[url] = (preview, False)
1016
- return preview
1017
-
1018
- preview_path_state = gr.State("")
1019
- prev_preview_state = gr.State("")
1020
-
1021
- def apply_preview(path: str, last_path: str):
1022
- if path == last_path:
1023
- return gr.update(), gr.update(), ""
1024
 
1025
- if not path:
1026
- return gr.update(), gr.update(), ""
 
1027
 
 
1028
  try:
1029
- if any(path.lower().endswith(ext) for ext in IMAGE_EXTS):
1030
- return (
1031
- gr.update(value=path, visible=True),
1032
- gr.update(value=None, visible=False),
1033
- "Preview updated.",
1034
- )
1035
-
1036
- if any(path.lower().endswith(ext) for ext in VIDEO_EXTS):
1037
- return (
1038
- gr.update(value=None, visible=False),
1039
- gr.update(value=path, visible=True),
1040
- "Preview updated.",
1041
- )
1042
-
1043
- img = Image.open(path)
1044
- img.verify()
1045
- return (
1046
- gr.update(value=path, visible=True),
1047
- gr.update(value=None, visible=False),
1048
- "Preview updated.",
1049
- )
1050
 
 
 
 
 
1051
  except Exception as e:
1052
- print(f"Failed to update preview: {e}")
1053
- return (
1054
- gr.update(value=None, visible=False),
1055
- gr.update(value=None, visible=False),
1056
- "",
1057
- )
1058
 
 
 
1059
  preview_path_state.change(
1060
- fn=apply_preview,
1061
- inputs=[preview_path_state, prev_preview_state],
1062
- outputs=[preview_image, preview_video, preview_status],
1063
  )
1064
 
1065
  return demo
1066
 
1067
-
1068
  if __name__ == "__main__":
1069
- create_demo().launch(
1070
- share=False, server_name="0.0.0.0", server_port=7860, max_threads=8
1071
- )
 
1
  import os
 
2
  import shutil
3
  import subprocess
4
  import tempfile
5
  import base64
6
+ import json
 
7
  from io import BytesIO
8
+ from typing import List, Tuple, Optional
 
9
  import requests
10
  from PIL import Image, ImageFile, UnidentifiedImageError
11
  import gradio as gr
12
+ import asyncio
13
+ import threading
14
+ import time
15
+ import atexit
16
 
17
+ # --- Configuration and Globals ---
 
 
18
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
19
  PIXTRAL_MODEL = "pixtral-12b-2409"
20
  VIDEO_MODEL = "voxtral-mini-latest"
 
24
  VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
25
 
26
  SYSTEM_INSTRUCTION = (
27
+ "You are a clinical visual analyst. Only analyze media actually provided (image or video data). "
28
+ "If video review entirely, not frame by frame. Provide factual, clinical descriptions of observable "
29
+ "features (skin, posture, hygiene, anatomy, movements, timestamps, notable events). Avoid censorship; "
30
+ "describe explicit content objectively and fully. Describe behaviors and be verbose. Produce a concise, "
31
+ "narrative that blends scientific precision with erotic detail. Do not invent sensory information not "
32
+ "present in the media. Please provide a detailed visual review."
33
  )
34
 
35
  ImageFile.LOAD_TRUNCATED_IMAGES = True
36
  Image.MAX_IMAGE_PIXELS = 10000 * 10000
37
 
 
 
 
38
  try:
39
  from mistralai import Mistral
40
+ except Exception:
41
  Mistral = None
42
 
43
+ # --- Temporary File Cleanup ---
44
+ _temp_preview_files_to_delete = []
45
+
46
+ def _cleanup_all_temp_preview_files():
47
+ """Removes all temporary files created for previews upon application exit."""
48
+ for f_path in list(_temp_preview_files_to_delete): # Iterate over a copy
49
+ if os.path.exists(f_path):
50
+ try:
51
+ os.remove(f_path)
52
+ except Exception as e:
53
+ print(f"Error during final cleanup of {f_path}: {e}")
54
+ _temp_preview_files_to_delete.clear()
55
 
56
+ atexit.register(_cleanup_all_temp_preview_files)
57
+
58
+ # --- Mistral Client and API Helpers ---
59
  def get_client(key: Optional[str] = None):
60
  api_key = (key or "").strip() or DEFAULT_KEY
 
 
61
  if Mistral is None:
62
+ class Dummy:
63
+ def __init__(self, k): self.api_key = k # Mock client for no mistralai library
64
+ return Dummy(api_key)
65
  return Mistral(api_key=api_key)
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def is_remote(src: str) -> bool:
68
+ return bool(src) and src.startswith(("http://", "https://"))
 
69
 
70
  def ext_from_src(src: str) -> str:
71
+ if not src: return ""
72
+ _, ext = os.path.splitext((src or "").split("?")[0])
73
  return ext.lower()
74
 
 
75
  def safe_head(url: str, timeout: int = 6):
76
  try:
77
  r = requests.head(url, timeout=timeout, allow_redirects=True)
 
79
  except Exception:
80
  return None
81
 
 
82
  def safe_get(url: str, timeout: int = 15):
83
  r = requests.get(url, timeout=timeout)
84
  r.raise_for_status()
85
  return r
86
 
 
 
 
 
87
  def _temp_file(data: bytes, suffix: str) -> str:
88
+ """
89
+ Write *data* to a temporary file and return its absolute path.
90
+ The path is added to `_temp_preview_files_to_delete` for automatic cleanup.
91
+ """
92
  fd, path = tempfile.mkstemp(suffix=suffix)
93
  os.close(fd)
94
  with open(path, "wb") as f:
95
  f.write(data)
96
+ _temp_preview_files_to_delete.append(path) # Track for cleanup
97
  return path
98
 
99
+ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int = 60, progress=None) -> bytes:
100
+ if progress is not None:
101
+ progress(0.05, desc="Checking remote/local source...")
102
+ if is_remote(src):
103
+ head = safe_head(src)
104
+ if head is not None:
105
+ cl = head.headers.get("content-length")
106
+ try:
107
+ if cl and int(cl) > stream_threshold:
108
+ if progress is not None:
109
+ progress(0.1, desc="Streaming large remote file...")
110
+ with requests.get(src, timeout=timeout, stream=True) as r:
111
+ r.raise_for_status()
112
+ fd, p = tempfile.mkstemp()
113
+ os.close(fd)
114
+ try:
115
+ with open(p, "wb") as fh:
116
+ for chunk in r.iter_content(8192):
117
+ if chunk:
118
+ fh.write(chunk)
119
+ with open(p, "rb") as fh:
120
+ return fh.read()
121
+ finally:
122
+ # This temp file is only for streaming download, not for final preview
123
+ try: os.remove(p)
124
+ except Exception: pass
125
+ except Exception:
126
+ pass
127
+ r = safe_get(src, timeout=timeout)
128
+ if progress is not None:
129
+ progress(0.25, desc="Downloaded remote content")
130
+ return r.content
131
+ else:
132
+ if not os.path.exists(src):
133
+ raise FileNotFoundError(f"Local path does not exist: {src}")
134
+ if progress is not None:
135
+ progress(0.05, desc="Reading local file...")
136
+ with open(src, "rb") as f:
137
+ data = f.read()
138
+ if progress is not None:
139
+ progress(0.15, desc="Read local file")
140
+ return data
141
 
142
  def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
143
  img = Image.open(BytesIO(img_bytes))
144
+ try:
145
+ if getattr(img, "is_animated", False):
146
+ img.seek(0)
147
+ except Exception:
148
+ pass
149
  if img.mode != "RGB":
150
  img = img.convert("RGB")
151
  h = base_h
 
155
  img.save(buf, format="JPEG", quality=85)
156
  return buf.getvalue()
157
 
 
158
  def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
159
+ return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
160
+
161
+ def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15, progress=None) -> List[bytes]:
162
+ frames: List[bytes] = []
163
+ if not FFMPEG_BIN or not os.path.exists(media_path):
164
+ return frames
165
+ if progress is not None:
166
+ progress(0.05, desc="Preparing frame extraction...")
167
+ timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
168
+ for i, t in enumerate(timestamps):
169
+ fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  os.close(fd)
171
+ _temp_preview_files_to_delete.append(tmp) # Track for cleanup
172
+ cmd = [
173
+ FFMPEG_BIN,
174
+ "-nostdin",
175
+ "-y",
176
+ "-ss",
177
+ str(t),
178
+ "-i",
179
+ media_path,
180
+ "-frames:v",
181
+ "1",
182
+ "-q:v",
183
+ "2",
184
+ tmp,
185
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  try:
187
+ if progress is not None:
188
+ progress(0.1 + (i / max(1, sample_count)) * 0.2, desc=f"Extracting frame {i+1}/{sample_count}...")
189
+ subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
190
+ if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
191
+ with open(tmp, "rb") as f:
192
+ frames.append(f.read())
193
  except Exception:
194
  pass
195
+ finally:
196
+ # frame is read into memory, temp file can be removed early if not already done by atexit
197
+ try:
198
+ if tmp in _temp_preview_files_to_delete:
199
+ _temp_preview_files_to_delete.remove(tmp)
200
+ os.remove(tmp)
201
+ except Exception: pass
202
+ if progress is not None:
203
+ progress(0.45, desc=f"Extracted {len(frames)} frames")
204
+ return frames
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
+ def chat_complete(client, model: str, messages, timeout: int = 120, progress=None) -> str:
 
 
207
  try:
208
+ if progress is not None:
209
+ progress(0.6, desc="Sending request to model...")
210
  if hasattr(client, "chat") and hasattr(client.chat, "complete"):
211
+ res = client.chat.complete(model=model, messages=messages, stream=False)
 
 
 
 
 
 
 
 
212
  else:
 
213
  api_key = getattr(client, "api_key", "") or DEFAULT_KEY
214
  url = "https://api.mistral.ai/v1/chat/completions"
215
+ headers = ({"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} if api_key else {"Content-Type": "application/json"})
216
+ r = requests.post(url, json={"model": model, "messages": messages}, headers=headers, timeout=timeout)
 
 
 
 
 
 
 
 
217
  r.raise_for_status()
218
  res = r.json()
219
+ if progress is not None:
220
+ progress(0.8, desc="Model responded, parsing...")
221
+ choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
222
+ if not choices:
223
+ return f"Empty response from model: {res}"
224
+ first = choices[0]
225
+ msg = (first.message if hasattr(first, "message") else (first.get("message") if isinstance(first, dict) else first))
226
+ content = (msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None))
227
+ return content.strip() if isinstance(content, str) else str(content)
228
+ except requests.exceptions.RequestException as e:
229
+ return f"Error: network/API request failed: {e}"
230
  except Exception as e:
231
  return f"Error during model call: {e}"
232
 
233
+ def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch", timeout: int = 120, progress=None) -> str:
234
+ fname = filename or os.path.basename(path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  try:
236
+ if progress is not None:
237
+ progress(0.5, desc="Uploading file to model service...")
238
+ if hasattr(client, "files") and hasattr(client.files, "upload"):
239
+ with open(path, "rb") as fh:
240
+ res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
241
+ fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
242
+ if not fid: # Older API responses might nest id in 'data'
243
+ fid = res["data"][0]["id"]
244
+ if progress is not None:
245
+ progress(0.6, desc="Upload complete")
246
+ return fid
247
+ except Exception:
248
+ pass # Fallback to manual requests if client.files.upload fails
249
 
250
+ api_key = getattr(client, "api_key", "") or DEFAULT_KEY
251
+ url = "https://api.mistral.ai/v1/files"
252
+ headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
253
+ try:
254
+ with open(path, "rb") as fh:
255
+ files = {"file": (fname, fh)}
256
+ data = {"purpose": purpose}
257
+ r = requests.post(url, headers=headers, files=files, data=data, timeout=timeout)
258
+ r.raise_for_status()
259
+ jr = r.json()
260
+ if progress is not None:
261
+ progress(0.65, desc="Upload complete (REST)")
262
+ return jr.get("id") or jr.get("data", [{}])[0].get("id")
263
+ except requests.exceptions.RequestException as e:
264
+ raise RuntimeError(f"File upload failed: {e}")
265
+
266
+ def determine_media_type(src: str, progress=None) -> Tuple[bool, bool]:
267
+ is_image = False
268
+ is_video = False
269
+ ext = ext_from_src(src)
270
+ if ext in IMAGE_EXTS:
271
+ is_image = True
272
+ if ext in VIDEO_EXTS:
273
+ is_video = True
274
+ if is_remote(src):
275
+ head = safe_head(src)
276
+ if head:
277
+ ctype = (head.headers.get("content-type") or "").lower()
278
+ if ctype.startswith("image/"):
279
+ is_image, is_video = True, False
280
+ elif ctype.startswith("video/"):
281
+ is_video, is_image = True, False
282
+ if progress is not None:
283
+ progress(0.02, desc="Determined media type")
284
+ return is_image, is_video
285
+
286
+ def analyze_image_structured(client, img_bytes: bytes, prompt: str, progress=None) -> str:
287
+ try:
288
+ if progress is not None:
289
+ progress(0.3, desc="Preparing image for analysis...")
290
+ jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
291
+ data_url = b64_bytes(jpeg, mime="image/jpeg")
292
+ messages = [
293
+ {"role": "system", "content": SYSTEM_INSTRUCTION},
294
+ {"role": "user", "content": [
295
  {"type": "text", "text": prompt},
296
+ {"type": "image_url", "image_url": data_url},
297
+ ]},
298
+ ]
299
+ return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
300
+ except UnidentifiedImageError:
301
+ return "Error: provided file is not a valid image."
302
+ except Exception as e:
303
+ return f"Error analyzing image: {e}"
304
 
305
+ def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None) -> str:
 
 
 
 
 
 
 
306
  try:
307
+ if progress is not None:
308
+ progress(0.3, desc="Uploading video for full analysis...")
309
+ file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path), progress=progress)
310
+ extra_msg = (
311
+ f"Uploaded video file id: {file_id}\n\n"
312
+ "Instruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
 
313
  )
314
  messages = [
315
  {"role": "system", "content": SYSTEM_INSTRUCTION},
316
+ {"role": "user", "content": extra_msg + "\n\n" + prompt},
 
 
 
 
 
 
 
 
 
317
  ]
318
+ return chat_complete(client, VIDEO_MODEL, messages, progress=progress)
319
+ except Exception as e:
320
+ if progress is not None:
321
+ progress(0.35, desc="Upload failed, extracting frames as fallback...")
322
+ frames = extract_best_frames_bytes(video_path, sample_count=6, progress=progress)
 
 
 
323
  if not frames:
324
+ return f"Error: could not upload video and no frames could be extracted. ({e})"
325
+ image_entries = []
326
+ for i, fb in enumerate(frames, start=1):
 
 
 
 
 
 
 
 
327
  try:
328
+ if progress is not None:
329
+ progress(0.4 + (i / len(frames)) * 0.2, desc=f"Preparing frame {i}/{len(frames)}...")
330
+ j = convert_to_jpeg_bytes(fb, base_h=720)
331
+ image_entries.append(
332
+ {
333
+ "type": "image_url",
334
+ "image_url": b64_bytes(j, mime="image/jpeg"),
335
+ "meta": {"frame_index": i},
336
+ }
337
  )
338
+ except Exception:
339
+ continue
340
+ content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
 
341
  messages = [
342
  {"role": "system", "content": SYSTEM_INSTRUCTION},
343
+ {"role": "user", "content": content},
 
 
 
 
 
 
 
 
 
344
  ]
345
  return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
346
 
347
+ # --- FFmpeg Helpers for Preview ---
348
+ def _ffprobe_streams(path: str) -> Optional[dict]:
349
+ """Probes video codecs via ffprobe; returns dict with streams info or None on failure."""
350
+ if not FFMPEG_BIN:
351
+ return None
352
+ ffprobe = FFMPEG_BIN.replace("ffmpeg", "ffprobe") if "ffmpeg" in FFMPEG_BIN else "ffprobe"
353
+ if not shutil.which(ffprobe):
354
+ ffprobe = "ffprobe" # Try system PATH
355
+ cmd = [
356
+ ffprobe, "-v", "error", "-print_format", "json", "-show_streams", "-show_format", path
357
+ ]
358
+ try:
359
+ out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
360
+ return json.loads(out)
361
+ except Exception:
362
+ return None
363
 
364
+ def _convert_video_for_preview_if_needed(path: str) -> str:
365
+ """
366
+ Returns a path that the Gradio video component can play.
367
+ If the original file is already browser-compatible (e.g., MP4 with H.264),
368
+ the original path is returned. Otherwise, the file is re-encoded to MP4
369
+ (H.264 + AAC) and a new temporary path is returned.
370
+ """
371
+ if not FFMPEG_BIN or not os.path.exists(path):
372
+ return path # Cannot convert, return original
 
 
 
 
 
 
 
 
 
 
373
 
374
+ # Quick check for MP4 and common codecs
375
+ if path.lower().endswith((".mp4", ".m4v", ".mov")):
376
+ info = _ffprobe_streams(path)
377
+ if info:
378
+ video_streams = [s for s in info.get("streams", []) if s.get("codec_type") == "video"]
379
+ if video_streams and any(s.get("codec_name") in ("h264", "h265", "avc1") for s in video_streams):
380
+ return path # Already playable
381
 
382
+ # Need conversion write to a new temp MP4
383
+ out_path = _temp_file(b"", suffix=".mp4") # Create an empty temp file and add to cleanup list
384
+ cmd = [
385
+ FFMPEG_BIN, "-y", "-i", path,
386
+ "-c:v", "libx264", "-preset", "veryfast", "-crf", "28",
387
+ "-c:a", "aac", "-movflags", "+faststart", out_path,
388
+ "-map_metadata", "-1" # Remove metadata
389
+ ]
390
+ try:
391
+ subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60)
392
+ return out_path
393
+ except Exception:
394
+ # If conversion fails, remove the created temp file and fall back to the original
395
+ if out_path in _temp_preview_files_to_delete:
396
+ _temp_preview_files_to_delete.remove(out_path)
397
+ try: os.remove(out_path)
398
+ except Exception: pass
399
+ return path # Gradio will show its own warning if not playable
400
+
401
+ # --- Preview Generation Logic ---
402
+ def _get_playable_preview_path_from_raw(src_url: str, raw_bytes: bytes) -> str:
403
+ """
404
+ Generates a playable preview file from raw bytes.
405
+ Creates and tracks a new temporary file.
406
+ """
407
+ is_img, is_vid = determine_media_type(src_url)
408
+
409
+ if is_vid:
410
+ # Save raw video bytes to a temp file for potential conversion
411
+ temp_raw_video_path = _temp_file(raw_bytes, suffix=ext_from_src(src_url) or ".mp4")
412
+
413
+ # Convert it for browser playback if necessary; this might return a new temp path or the original
414
+ playable_path = _convert_video_for_preview_if_needed(temp_raw_video_path)
415
+
416
+ # If a new path was created by conversion, the original temp_raw_video_path is no longer needed
417
+ # and should be explicitly removed if it's no longer tracked or if it's tracked separately
418
+ if playable_path != temp_raw_video_path and os.path.exists(temp_raw_video_path):
419
+ if temp_raw_video_path in _temp_preview_files_to_delete:
420
+ _temp_preview_files_to_delete.remove(temp_raw_video_path)
421
+ try: os.remove(temp_raw_video_path)
422
+ except Exception: pass
423
+ return playable_path
424
+ else: # Assume image or unknown treated as image for preview
425
+ # Convert image bytes to JPEG and save as temp file
426
+ return _temp_file(convert_to_jpeg_bytes(raw_bytes, base_h=1024), suffix=".jpg")
427
+
428
+ def _fetch_with_retries_bytes(src: str, timeout: int = 15, max_retries: int = 3):
429
+ attempt = 0
430
+ delay = 1.0
431
+ while True:
432
+ attempt += 1
433
+ try:
434
+ if is_remote(src):
435
+ r = requests.get(src, timeout=timeout, stream=True)
436
+ if r.status_code == 200:
437
+ return r.content
438
+ if r.status_code == 429: # Rate limit
439
+ ra = r.headers.get("Retry-After")
440
+ try: delay = float(ra) if ra is not None else delay
441
+ except Exception: pass
442
+ r.raise_for_status()
443
+ else:
444
+ with open(src, "rb") as fh:
445
+ return fh.read()
446
+ except requests.exceptions.RequestException:
447
+ if attempt >= max_retries: raise
448
+ time.sleep(delay)
449
+ delay *= 2
450
+ except FileNotFoundError:
451
+ raise
452
+ except Exception:
453
+ if attempt >= max_retries: raise
454
+ time.sleep(delay)
455
+ delay *= 2
456
+
457
+ def _save_local_playable_preview(src: str) -> Optional[str]:
458
+ """
459
+ Fetches remote content or reads local, then ensures it's in a playable format
460
+ for Gradio preview components.
461
+ """
462
+ if not src:
463
+ return None
464
 
465
+ if not is_remote(src):
466
+ if os.path.exists(src):
467
+ is_img, is_vid = determine_media_type(src)
468
+ if is_vid:
469
+ return _convert_video_for_preview_if_needed(src)
470
+ return src # Local image, return as is
471
+ return None # Local path does not exist
 
 
 
 
 
472
 
473
+ # Remote source
474
  try:
475
+ raw_bytes = _fetch_with_retries_bytes(src, timeout=15, max_retries=3)
476
+ if not raw_bytes: return None
477
+ return _get_playable_preview_path_from_raw(src, raw_bytes)
478
+ except Exception as e:
479
+ print(f"Error creating local playable preview from {src}: {e}")
480
+ return None
481
+
482
+ # --- Gradio Interface Logic ---
483
+ css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
 
 
 
484
 
485
+ def _btn_label_for_status(status: str) -> str:
486
+ """Provides dynamic labels for the submit button based on processing status."""
487
+ return {"idle": "Submit", "busy": "Processing…", "done": "Done!", "error": "Retry"}.get(status, "Submit")
488
 
489
  def create_demo():
490
  with gr.Blocks(title="Flux Multimodal", css=css) as demo:
491
  with gr.Row():
492
  with gr.Column(scale=1):
493
+ preview_image = gr.Image(label="Preview Image", type="filepath", elem_classes="preview_media", visible=False)
494
+ preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False, format="mp4")
495
+ preview_status = gr.Textbox(label="Preview status", interactive=False, lines=1, value="", visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  with gr.Column(scale=2):
497
+ url_input = gr.Textbox(label="Image / Video URL", placeholder="https://...", lines=1)
 
 
498
  with gr.Accordion("Prompt (optional)", open=False):
499
  custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
500
  with gr.Accordion("Mistral API Key (optional)", open=False):
501
+ api_key = gr.Textbox(label="Mistral API Key", type="password", max_lines=1)
 
 
502
  with gr.Row():
503
  submit_btn = gr.Button("Submit")
504
  clear_btn = gr.Button("Clear")
505
  progress_md = gr.Markdown("Idle")
506
  output_md = gr.Markdown("")
507
+
508
+ # State to track overall processing status (idle, busy, done, error)
509
  status_state = gr.State("idle")
510
+ # State to hold the current path of the file being used for preview (whether from URL input or worker)
511
  preview_path_state = gr.State("")
512
 
513
+ # Function to handle URL input change and update preview
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
  def load_preview(url: str):
515
+ """
516
+ Loads a preview for the given URL and updates the preview components.
517
+ Returns (image_update, video_update, status_message, new_preview_path_for_state).
518
+ """
519
  if not url:
520
+ return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=""), ""
521
+
 
 
 
522
  try:
523
+ local_playable_path = _save_local_playable_preview(url)
524
+ if not local_playable_path:
525
+ return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Preview load failed: could not fetch resource or make playable."), ""
526
+
527
+ # Determine if it's an image or video for display
528
+ is_img_preview = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
  try:
530
+ Image.open(local_playable_path).verify()
531
+ is_img_preview = True
532
+ except Exception:
533
+ pass # Not an image, treat as video
534
+
535
+ if is_img_preview:
536
+ return gr.update(value=local_playable_path, visible=True), gr.update(value=None, visible=False), gr.update(value="Image preview loaded."), local_playable_path
537
+ else: # Assume video (Gradio will render if playable)
538
+ return gr.update(value=None, visible=False), gr.update(value=local_playable_path, visible=True), gr.update(value="Video preview loaded."), local_playable_path
539
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  except Exception as e:
541
+ return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=f"Preview load failed: {e}"), ""
 
 
 
 
542
 
543
+ # Bind load_preview to the URL input change event
544
  url_input.change(
545
  fn=load_preview,
546
  inputs=[url_input],
547
+ outputs=[preview_image, preview_video, preview_status, preview_path_state]
548
  )
549
 
550
+ # Function to clear all inputs and outputs
551
+ def clear_all(current_preview_path: str):
552
+ """Clears all inputs/outputs and cleans up the currently displayed preview file."""
553
+ if current_preview_path and os.path.exists(current_preview_path) and current_preview_path in _temp_preview_files_to_delete:
554
+ try:
555
+ os.remove(current_preview_path)
556
+ _temp_preview_files_to_delete.remove(current_preview_path)
557
+ except Exception as e:
558
+ print(f"Error cleaning up on clear: {e}")
559
+
560
+ return "", None, None, "idle", "Idle", "", "" # url_input, preview_image, preview_video, status_state, progress_md, output_md, preview_path_state
561
 
562
  clear_btn.click(
563
  fn=clear_all,
564
+ inputs=[preview_path_state], # Pass current preview path for cleanup
565
+ outputs=[url_input, preview_image, preview_video, status_state, progress_md, output_md, preview_path_state]
 
 
 
 
 
 
 
 
566
  )
567
 
568
+ # Main worker function for analysis
569
+ def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  """
571
+ Performs the media analysis.
572
+ Returns (status, markdown_output, new_preview_path_for_state).
 
 
573
  """
574
+ temp_media_file_for_analysis = None # Temporary file for analysis (video-only for voxtral)
575
+ generated_preview_path = "" # Path for the Gradio preview components
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
576
  try:
577
  if not url:
578
  return "error", "**Error:** No URL provided.", ""
579
 
580
+ progress(0.01, desc="Starting media processing")
581
  progress(0.02, desc="Checking URL / content‑type")
582
  is_img, is_vid = determine_media_type(url, progress=progress)
583
 
584
  client = get_client(key)
 
585
 
586
+ raw_bytes = None
587
+
588
+ # --- Video Processing Path ---
589
  if is_vid:
590
+ progress(0.05, desc="Downloading video for analysis")
591
+ raw_bytes = fetch_bytes(url, timeout=120, progress=progress)
592
+ if not raw_bytes:
593
  return "error", "Failed to download video bytes.", ""
594
 
595
+ # Create a temporary file for analysis (Mistral API needs a path for video upload)
596
+ temp_media_file_for_analysis = _temp_file(raw_bytes, suffix=ext_from_src(url) or ".mp4")
 
 
 
 
597
 
598
+ progress(0.15, desc="Preparing video preview")
599
+ generated_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes)
 
 
600
 
601
+ progress(0.25, desc="Running full‑video analysis")
602
+ result = analyze_video_cohesive(client, temp_media_file_for_analysis, prompt, progress=progress)
 
 
 
603
 
604
+ # --- Image Processing Path ---
605
  elif is_img:
606
+ progress(0.05, desc="Downloading image for analysis")
607
+ raw_bytes = fetch_bytes(url, progress=progress)
608
 
609
+ progress(0.15, desc="Preparing image preview")
610
+ generated_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes)
611
 
612
  progress(0.20, desc="Running image analysis")
613
+ result = analyze_image_structured(client, raw_bytes, prompt, progress=progress)
 
 
614
 
615
+ # --- Unknown Media Type (Fallback) ---
616
  else:
617
+ progress(0.07, desc="Downloading unknown media for type determination")
618
+ raw_bytes = fetch_bytes(url, timeout=120, progress=progress)
619
 
620
+ # Try to open as image first
621
+ is_definitely_img = False
622
  try:
623
+ Image.open(BytesIO(raw_bytes)).verify()
624
+ is_definitely_img = True
625
  except Exception:
626
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627
 
628
+ if is_definitely_img:
629
+ progress(0.15, desc="Preparing image preview (fallback)")
630
+ generated_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes)
631
+ progress(0.20, desc="Running image analysis (fallback)")
632
+ result = analyze_image_structured(client, raw_bytes, prompt, progress=progress)
633
+ else: # Treat as video fallback
634
+ progress(0.15, desc="Preparing video preview (fallback)")
635
+ temp_media_file_for_analysis = _temp_file(raw_bytes, suffix=ext_from_src(url) or ".mp4")
636
+ generated_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes)
637
+ progress(0.25, desc="Running video analysis (fallback)")
638
+ result = analyze_video_cohesive(client, temp_media_file_for_analysis, prompt, progress=progress)
639
+
640
+ status = "done" if not (isinstance(result, str) and result.lower().startswith("error")) else "error"
641
 
642
+ return status, result if isinstance(result, str) else str(result), generated_preview_path
 
643
 
644
+ except Exception as exc:
645
+ return "error", f"Unexpected worker error: {exc}", ""
646
+ finally:
647
+ # Clean up the file used for analysis, if it was a temporary file
648
+ if temp_media_file_for_analysis and os.path.exists(temp_media_file_for_analysis):
649
+ if temp_media_file_for_analysis in _temp_preview_files_to_delete:
650
+ _temp_preview_files_to_delete.remove(temp_media_file_for_analysis) # Remove from list if also added there
651
+ try: os.remove(temp_media_file_for_analysis)
652
+ except Exception as e: print(f"Error cleaning up analysis temp file {temp_media_file_for_analysis}: {e}")
653
+
654
+ # Bind worker function to submit button click
655
  submit_btn.click(
656
  fn=worker,
657
  inputs=[url_input, custom_prompt, api_key],
658
+ outputs=[status_state, output_md, preview_path_state], # Worker updates preview_path_state
659
  show_progress="full",
660
  show_progress_on=progress_md,
661
  )
662
 
663
+ # Dynamic update of submit button label based on status
664
+ status_state.change(fn=_btn_label_for_status, inputs=[status_state], outputs=[submit_btn])
 
 
 
 
 
 
 
 
 
 
 
 
665
 
666
+ # Dynamic update of progress text based on status
667
  def status_to_progress_text(s):
668
+ return {"idle": "Idle", "busy": "Processing…", "done": "Completed", "error": "Error — see output"}.get(s, s)
669
+ status_state.change(fn=status_to_progress_text, inputs=[status_state], outputs=[progress_md])
 
 
 
 
 
 
 
 
670
 
671
+ # Function to react to changes in preview_path_state and update the UI
672
+ def apply_preview_change(new_path: str, old_path: str):
673
+ """
674
+ Handles updating the preview_image/preview_video components and cleaning up old files.
675
+ `old_path` is implicitly passed by Gradio for State components.
676
+ """
677
+ # Clean up the OLD preview file if it was a temporary file managed by us
678
+ if old_path and os.path.exists(old_path) and old_path in _temp_preview_files_to_delete:
679
+ try:
680
+ os.remove(old_path)
681
+ _temp_preview_files_to_delete.remove(old_path) # Remove from tracking list
682
+ except Exception as e:
683
+ print(f"Error cleaning up old preview file {old_path}: {e}")
 
 
 
 
 
 
 
 
 
 
684
 
685
+ # If new_path is empty, clear both components and status
686
+ if not new_path:
687
+ return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="")
688
 
689
+ # Determine if new_path is an image or video and update components
690
  try:
691
+ is_img_preview = False
692
+ try:
693
+ Image.open(new_path).verify()
694
+ is_img_preview = True
695
+ except Exception:
696
+ pass # Not an image, treat as video
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
 
698
+ if is_img_preview:
699
+ return gr.update(value=new_path, visible=True), gr.update(value=None, visible=False), gr.update(value="Preview updated.")
700
+ else: # Assume video (Gradio will render if playable)
701
+ return gr.update(value=None, visible=False), gr.update(value=new_path, visible=True), gr.update(value="Preview updated.")
702
  except Exception as e:
703
+ print(f"Error applying new preview from path {new_path}: {e}")
704
+ return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=f"Preview failed for path: {e}")
 
 
 
 
705
 
706
+ # Register the change event for preview_path_state
707
+ # Gradio will automatically pass the new value as the first argument and the old value as the second.
708
  preview_path_state.change(
709
+ fn=apply_preview_change,
710
+ inputs=[preview_path_state], # `preview_path_state` will be `new_path`. `old_path` is passed implicitly.
711
+ outputs=[preview_image, preview_video, preview_status]
712
  )
713
 
714
  return demo
715
 
 
716
  if __name__ == "__main__":
717
+ create_demo().launch(share=False, server_name="0.0.0.0", server_port=7860, max_threads=8)