Spaces:

fuaadabdullah1
/

GradeM8

Sleeping

App Files Files Community

GradeM8 Deploy commited on 11 days ago

Commit

28b7952

1 Parent(s): 997fd5a

refactor: replace DeepInfra with HuggingFace-only backend

Browse files

Files changed (4) hide show

.gitignore +3 -0
ai_router/__init__.py +3 -3
ai_router/client.py +56 -35
config.py +10 -10

.gitignore CHANGED Viewed

@@ -141,3 +141,6 @@ _types_backup.py
 # Gradio/Hugging Face
 gradio_cached_examples/

 # Gradio/Hugging Face
 gradio_cached_examples/
+# Local secrets
+.env

ai_router/__init__.py CHANGED Viewed

@@ -1,12 +1,12 @@
 """
-AI routing package for grading homework using DeepInfra.
-This package provides AI grading functionality using DeepInfra as the sole provider,
 with detailed, structured feedback including rubric breakdowns, strengths, and
 actionable improvements. Also includes OCR capabilities for scanned documents.
 Modules:
-    client: DeepInfra HTTP client and API configuration
     prompt: Prompt building functions for grading requests
     parsing: Response parsing and validation utilities
     orchestration: Batch grading with concurrent processing

 """
+AI routing package for grading homework using HuggingFace Inference API.
+This package provides AI grading functionality using HuggingFace as the provider,
 with detailed, structured feedback including rubric breakdowns, strengths, and
 actionable improvements. Also includes OCR capabilities for scanned documents.
 Modules:
+    client: HuggingFace HTTP client and API configuration
     prompt: Prompt building functions for grading requests
     parsing: Response parsing and validation utilities
     orchestration: Batch grading with concurrent processing

ai_router/client.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
-DeepInfra API client module for AI grading.
 This module provides HTTP client functionality for communicating with
-the DeepInfra API, including configuration management and request handling.
 """
 from __future__ import annotations
@@ -30,35 +30,40 @@ MAX_RETRIES = 3
 INITIAL_BACKOFF_SECONDS = 1.0
 BACKOFF_MULTIPLIER = 2.0
 def _get_deepinfra_config() -> tuple[str, str, int, float]:
-    """Get DeepInfra API configuration from environment or defaults.
     Returns:
         Tuple of (api_key, model, max_tokens, temperature)
     Raises:
-        APIKeyError: If DEEPINFRA_API_KEY is not set.
     """
-    api_key = os.getenv("DEEPINFRA_API_KEY")
     if not api_key:
         raise APIKeyError(
-            "DEEPINFRA_API_KEY environment variable is not set. "
-            "Please set your DeepInfra API key to use the grading feature."
         )
-    model = os.getenv("DEEPINFRA_MODEL", config.DEEPINFRA_MODEL_DEFAULT)
-    max_tokens = int(os.getenv("DEEPINFRA_MAX_TOKENS", config.MAX_TOKENS))
-    temperature = float(os.getenv("DEEPINFRA_TEMPERATURE", config.TEMPERATURE))
     return api_key, model, max_tokens, temperature
 async def generate_grading(content: str, rubric: str) -> GradingResult:
     """
-    Generate grading feedback using DeepInfra API with automatic retry logic.
-    This function sends the submission to DeepInfra for evaluation and
     returns a structured grading result with detailed feedback.
     Implements exponential backoff retry on transient failures (429, 5xx).
@@ -78,7 +83,7 @@ async def generate_grading(content: str, rubric: str) -> GradingResult:
             - details: Additional context
     Raises:
-        APIKeyError: If DeepInfra API key is not configured
         AIServiceError: If API returns error status or times out
         ResponseParseError: If response cannot be parsed
         InvalidResponseError: If response is missing required fields
@@ -89,23 +94,26 @@ async def generate_grading(content: str, rubric: str) -> GradingResult:
     # Build the prompt
     prompt = build_grading_prompt(content, rubric)
-    # Prepare the API request
     headers = {
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json",
     }
     payload = {
-        "model": model,
-        "messages": [
-            {"role": "system", "content": config.GRADING_SYSTEM_PROMPT},
-            {"role": "user", "content": prompt},
-        ],
-        "max_tokens": max_tokens,
-        "temperature": temperature,
-        "response_format": {"type": "json_object"},
     }
     # Implement retry logic with exponential backoff
     backoff_seconds = INITIAL_BACKOFF_SECONDS
@@ -114,7 +122,7 @@ async def generate_grading(content: str, rubric: str) -> GradingResult:
         try:
             async with httpx.AsyncClient(timeout=config.HTTP_TIMEOUT_SECONDS) as client:
                 response = await client.post(
-                    config.DEEPINFRA_API_URL,
                     headers=headers,
                     json=payload,
                 )
@@ -122,28 +130,29 @@ async def generate_grading(content: str, rubric: str) -> GradingResult:
         except httpx.HTTPStatusError as e:
             status_code = e.response.status_code
-            logger.warning(f"DeepInfra API error (attempt {attempt + 1}/{MAX_RETRIES}): {status_code}")
-            # Determine if error is retryable
             is_retryable = status_code in (429, 500, 502, 503, 504)
             if not is_retryable or attempt == MAX_RETRIES - 1:
-                logger.error(f"DeepInfra API error: {status_code} - {e.response.text}")
                 raise AIServiceError(
-                    f"DeepInfra API error: {status_code}",
                     status_code,
                 ) from e
-            # Backoff before retry
-            logger.info(f"Retrying in {backoff_seconds:.1f}s...")
-            await asyncio.sleep(backoff_seconds)
             backoff_seconds *= BACKOFF_MULTIPLIER
             continue
         except httpx.RequestError as e:
-            logger.error(f"DeepInfra request error (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
             if attempt == MAX_RETRIES - 1:
-                raise AIServiceError(f"Failed to connect to DeepInfra API: {e}") from e
             await asyncio.sleep(backoff_seconds)
             backoff_seconds *= BACKOFF_MULTIPLIER
             continue
@@ -151,14 +160,26 @@ async def generate_grading(content: str, rubric: str) -> GradingResult:
         # Successful response - parse it
         try:
             result = response.json()
-            generated_text = result["choices"][0]["message"]["content"]
         except (KeyError, IndexError, json.JSONDecodeError) as e:
-            logger.error(f"Unexpected API response format: {e}")
             raise AIServiceError(f"Invalid API response format: {e}") from e
         # Parse the grading response
         parsed_result = parse_grading_response(generated_text)
-        parsed_result["details"] = f"Graded using {model} via DeepInfra"
         return parsed_result

 """
+HuggingFace API client module for AI grading.
 This module provides HTTP client functionality for communicating with
+the HuggingFace Inference API for AI-powered grading.
 """
 from __future__ import annotations
 INITIAL_BACKOFF_SECONDS = 1.0
 BACKOFF_MULTIPLIER = 2.0
+# HuggingFace Inference API URL
+HF_API_URL = "https://api-inference.huggingface.co/models"
 def _get_deepinfra_config() -> tuple[str, str, int, float]:
+    """Get HuggingFace API configuration from environment or defaults.
+    Legacy name kept for backwards compatibility with tests.
     Returns:
         Tuple of (api_key, model, max_tokens, temperature)
     Raises:
+        APIKeyError: If HUGGINGFACE_API_KEY is not set.
     """
+    api_key = os.getenv("HUGGINGFACE_API_KEY")
     if not api_key:
         raise APIKeyError(
+            "HUGGINGFACE_API_KEY environment variable is not set. "
+            "Please set your HuggingFace API key to use the grading feature."
         )
+    model = os.getenv("HF_MODEL_PRIMARY", config.HF_MODEL_DEFAULT)
+    max_tokens = int(os.getenv("HF_MAX_TOKENS", config.MAX_TOKENS))
+    temperature = float(os.getenv("HF_TEMPERATURE", config.TEMPERATURE))
     return api_key, model, max_tokens, temperature
 async def generate_grading(content: str, rubric: str) -> GradingResult:
     """
+    Generate grading feedback using HuggingFace Inference API with automatic retry logic.
+    This function sends the submission to HuggingFace for evaluation and
     returns a structured grading result with detailed feedback.
     Implements exponential backoff retry on transient failures (429, 5xx).
             - details: Additional context
     Raises:
+        APIKeyError: If HuggingFace API key is not configured
         AIServiceError: If API returns error status or times out
         ResponseParseError: If response cannot be parsed
         InvalidResponseError: If response is missing required fields
     # Build the prompt
     prompt = build_grading_prompt(content, rubric)
+    # Build full prompt with system message
+    full_prompt = f"{config.GRADING_SYSTEM_PROMPT}\n\n{prompt}"
+    # Prepare the API request for HuggingFace Inference API
     headers = {
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json",
     }
     payload = {
+        "inputs": full_prompt,
+        "parameters": {
+            "max_new_tokens": max_tokens,
+            "temperature": temperature,
+            "return_full_text": False,
+        },
     }
+    api_url = f"{HF_API_URL}/{model}"
     # Implement retry logic with exponential backoff
     backoff_seconds = INITIAL_BACKOFF_SECONDS
         try:
             async with httpx.AsyncClient(timeout=config.HTTP_TIMEOUT_SECONDS) as client:
                 response = await client.post(
+                    api_url,
                     headers=headers,
                     json=payload,
                 )
         except httpx.HTTPStatusError as e:
             status_code = e.response.status_code
+            logger.warning("HuggingFace API error (attempt %d/%d): %s", attempt + 1, MAX_RETRIES, status_code)
+            # Determine if error is retryable (including 503 for model loading)
             is_retryable = status_code in (429, 500, 502, 503, 504)
             if not is_retryable or attempt == MAX_RETRIES - 1:
+                logger.error("HuggingFace API error: %s - %s", status_code, e.response.text)
                 raise AIServiceError(
+                    f"HuggingFace API error: {status_code}",
                     status_code,
                 ) from e
+            # Backoff before retry (longer for 503 model loading)
+            wait_time = backoff_seconds * 2 if status_code == 503 else backoff_seconds
+            logger.info("Retrying in %.1fs...", wait_time)
+            await asyncio.sleep(wait_time)
             backoff_seconds *= BACKOFF_MULTIPLIER
             continue
         except httpx.RequestError as e:
+            logger.error("HuggingFace request error (attempt %d/%d): %s", attempt + 1, MAX_RETRIES, e)
             if attempt == MAX_RETRIES - 1:
+                raise AIServiceError(f"Failed to connect to HuggingFace API: {e}") from e
             await asyncio.sleep(backoff_seconds)
             backoff_seconds *= BACKOFF_MULTIPLIER
             continue
         # Successful response - parse it
         try:
             result = response.json()
+            # HuggingFace returns a list with generated_text
+            if isinstance(result, list) and len(result) > 0:
+                generated_text = result[0].get("generated_text", "")
+            elif isinstance(result, dict):
+                generated_text = result.get("generated_text", result.get("text", ""))
+            else:
+                generated_text = str(result)
         except (KeyError, IndexError, json.JSONDecodeError) as e:
+            logger.error("Unexpected API response format: %s", e)
             raise AIServiceError(f"Invalid API response format: {e}") from e
+        # Extract JSON from response if wrapped in text
+        if "{" in generated_text and "}" in generated_text:
+            json_start = generated_text.find("{")
+            json_end = generated_text.rfind("}") + 1
+            generated_text = generated_text[json_start:json_end]
         # Parse the grading response
         parsed_result = parse_grading_response(generated_text)
+        parsed_result["details"] = f"Graded using {model} via HuggingFace"
         return parsed_result

config.py CHANGED Viewed

@@ -7,16 +7,16 @@ All values can be overridden via environment variables where applicable.
 """
 # =============================================================================
-# AI Model Configuration (DeepInfra Only)
 # =============================================================================
-# Default DeepInfra model to use for grading.
-# Using Llama 3.1 70B for high-quality grading with good instruction following.
 # Alternative options:
-#   - "meta-llama/Meta-Llama-3.1-8B-Instruct" (faster, cheaper)
-#   - "meta-llama/Meta-Llama-3.1-70B-Instruct" (better quality)
-#   - "microsoft/WizardLM-2-8x22B" (excellent for long-form feedback)
-DEEPINFRA_MODEL_DEFAULT: str = "meta-llama/Meta-Llama-3.1-70B-Instruct"
 # DeepSeek OCR model for extracting text from images and scanned documents
 DEEPSK_OCR_MODEL: str = "deepseek-ai/DeepSeek-OCR"
@@ -36,15 +36,15 @@ TEMPERATURE: float = 0.2
 # HTTP Client Settings
 # =============================================================================
-# Timeout for HTTP requests to DeepInfra API (in seconds).
 # Set to 180s to accommodate larger models and detailed responses.
 HTTP_TIMEOUT_SECONDS: float = 180.0
 # Timeout for OCR requests (longer due to image processing)
 OCR_TIMEOUT_SECONDS: float = 300.0
-# DeepInfra API endpoint URL
-DEEPINFRA_API_URL: str = "https://api.deepinfra.com/v1/openai/chat/completions"
 # =============================================================================
 # Concurrency Settings

 """
 # =============================================================================
+# AI Model Configuration (HuggingFace)
 # =============================================================================
+# Default HuggingFace model to use for grading.
+# Using Llama 2 70B for high-quality grading with good instruction following.
 # Alternative options:
+#   - "mistralai/Mistral-7B-Instruct-v0.1" (faster, smaller)
+#   - "meta-llama/Llama-2-70b-chat-hf" (high quality)
+#   - "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO" (excellent for long-form)
+HF_MODEL_DEFAULT: str = "meta-llama/Llama-2-70b-chat-hf"
 # DeepSeek OCR model for extracting text from images and scanned documents
 DEEPSK_OCR_MODEL: str = "deepseek-ai/DeepSeek-OCR"
 # HTTP Client Settings
 # =============================================================================
+# Timeout for HTTP requests to HuggingFace API (in seconds).
 # Set to 180s to accommodate larger models and detailed responses.
 HTTP_TIMEOUT_SECONDS: float = 180.0
 # Timeout for OCR requests (longer due to image processing)
 OCR_TIMEOUT_SECONDS: float = 300.0
+# HuggingFace Inference API endpoint URL
+HF_API_URL: str = "https://api-inference.huggingface.co/models"
 # =============================================================================
 # Concurrency Settings