Spaces:

langtech-innovation
/

wirag

Sleeping

App Files Files Community

nurasaki commited on May 2, 2025

Commit

a880965

1 Parent(s): d51834f

Added retrieval num chunks options

Browse files

Files changed (5) hide show

.gitignore +1 -0
app.py +24 -82
handler.py +0 -14
input_reader.py +0 -22
rag.py +12 -4

.gitignore CHANGED Viewed

@@ -3,3 +3,4 @@
 .env
 __pycache__
 __pycache__/*

 .env
 __pycache__
 __pycache__/*
+__DELETE__*

app.py CHANGED Viewed

@@ -65,6 +65,8 @@ def submit_input(input_, num_chunks, max_new_tokens, repetition_penalty, top_k,
         "temperature": temperature
     }
     output, context, source = generate(input_, model_parameters)
     sources_markup = ""
@@ -87,13 +89,7 @@ def clear():
         None,
         None,
         None,
-        gr.Slider(value=2.0),
-        gr.Slider(value=MAX_NEW_TOKENS),
-        gr.Slider(value=1.0),
-        gr.Slider(value=50),
-        gr.Slider(value=0.99),
-        gr.Checkbox(value=False),
-        gr.Slider(value=0.35),
     )
@@ -102,25 +98,12 @@ def gradio_app():
         # App Description
         # =====================================================================================================================================
         with gr.Row():
-            with gr.Column():
-                gr.Markdown(
-                    # """# Demo de Retrieval-Augmented Generation per la Viquipèdia
-                    # 🔍 **Retrieval-Augmented Generation** (RAG) és una tecnologia d'IA que permet interrogar un repositori de documents amb preguntes
-                    # en llenguatge natural, i combina tècniques de recuperació d'informació avançades amb models generatius per redactar una resposta
-                    # fent servir només la informació existent en els documents del repositori.
-                    # 🎯 **Objectiu:** Aquest és un demostrador amb Viquipèdia i genera la resposta fent servir el model salamandra-7b-instruct.
-                    # ⚠️ **Advertencies**: Aquesta versió és experimental. El contingut generat per aquest model no està supervisat i pot ser incorrecte.
-                    # Si us plau, tingueu-ho en compte quan exploreu aquest recurs.  El model en inferencia asociat a aquesta demo de desenvolupament no funciona continuament. Si vol fer proves,
-                    # contacteu amb nosaltres a Langtech.
-                    # """
-                )
-        # with gr.Row(equal_height=True):
         with gr.Row(equal_height=False):
             # User Input
             # =====================================================================================================================================
             with gr.Column(scale=2, variant="panel"):
@@ -131,69 +114,25 @@ def gradio_app():
                     placeholder="Qui va crear la guerra de les Galaxies ?",
                 )
-                # with gr.Column(variant="panel"):
                 with gr.Row(variant="default"):
-                # with gr.Row(variant="panel"):
                     clear_btn = Button("Clear",)
                     submit_btn = Button("Submit", variant="primary", interactive=False)
-                # with gr.Row(variant="panel"):
-                with gr.Row(variant="default"):
-                    with gr.Accordion("Model parameters (not used)", open=False, visible=SHOW_MODEL_PARAMETERS_IN_UI):
-                        num_chunks = Slider(
-                            minimum=1,
-                            maximum=6,
-                            step=1,
-                            value=5,
-                            label="Number of chunks"
-                        )
-                        max_new_tokens = Slider(
-                            minimum=50,
-                            maximum=2000,
-                            step=1,
-                            value=MAX_NEW_TOKENS,
-                            label="Max tokens"
-                        )
-                        repetition_penalty = Slider(
-                            minimum=0.1,
-                            maximum=2.0,
-                            step=0.1,
-                            value=1.0,
-                            label="Repetition penalty"
-                        )
-                        top_k = Slider(
-                            minimum=1,
-                            maximum=100,
-                            step=1,
-                            value=50,
-                            label="Top k"
-                        )
-                        top_p = Slider(
-                            minimum=0.01,
-                            maximum=0.99,
-                            value=0.99,
-                            label="Top p"
-                        )
-                        do_sample = Checkbox(
-                            value=False,
-                            label="Do sample"
-                        )
-                        temperature = Slider(
-                            minimum=0.1,
-                            maximum=1,
-                            value=0.35,
-                            label="Temperature"
-                        )
-                        parameters_compontents = [num_chunks, max_new_tokens, repetition_penalty, top_k, top_p, do_sample, temperature]
                 # Add Examples manually
-                gr.Examples(
-                    examples=[
                         ["Qui va crear la guerra de les Galaxies?"],
                         ["Quin era el nom real de Voltaire?"],
-                        ["Què fan al BSC?"]
                     ],
                     inputs=[input_],  # only inputs
                 )
@@ -246,14 +185,16 @@ def gradio_app():
         clear_btn.click(
             fn=clear,
             inputs=[],
-            outputs=[input_, output, source_context, context_evaluation] + parameters_compontents,
-              queue=False,
-              api_name=False
         )
         submit_btn.click(
             fn=submit_input,
-            inputs=[input_]+ parameters_compontents,
             outputs=[output, source_context, context_evaluation],
             api_name="get-results"
         )
@@ -269,6 +210,7 @@ def gradio_app():
         #             fn=submit_input,
         #         )
         demo.launch(show_api=True)

         "temperature": temperature
     }
+    print("Model parameters: ", model_parameters)
     output, context, source = generate(input_, model_parameters)
     sources_markup = ""
         None,
         None,
         None,
+        gr.Number(value=5, label="Num. Retrieved Chunks", minimum=1, interactive=True)
     )
         # App Description
         # =====================================================================================================================================
         with gr.Row():
+            with gr.Column():
+                gr.Markdown("""# Demo de Retrieval (only) Viquipèdia""")
         with gr.Row(equal_height=False):
             # User Input
             # =====================================================================================================================================
             with gr.Column(scale=2, variant="panel"):
                     placeholder="Qui va crear la guerra de les Galaxies ?",
                 )
                 with gr.Row(variant="default"):
                     clear_btn = Button("Clear",)
                     submit_btn = Button("Submit", variant="primary", interactive=False)
+                with gr.Row(variant="default"):
+                    num_chunks = gr.Number(value=5, label="Num. Retrieved Chunks", minimum=1, interactive=True)
                 # Add Examples manually
+                gr.Examples( examples=[
                         ["Qui va crear la guerra de les Galaxies?"],
                         ["Quin era el nom real de Voltaire?"],
+                        ["Què fan al BSC?"],
+                        # No existèix aquesta entrada a la VDB
+                        # https://ca.wikipedia.org/wiki/Imperi_Gal%C3%A0ctic
+                        # ["Què és un Imperi Galàctic?"],
+                        # ["Què és l'Imperi Galàctic d'Isaac Asimov?"],
+                        # ["Què és l'Imperi Galàctic de la Guerra de les Galàxies?"]
                     ],
                     inputs=[input_],  # only inputs
                 )
         clear_btn.click(
             fn=clear,
             inputs=[],
+            outputs=[input_, output, source_context, context_evaluation, num_chunks],
+            # outputs=[input_, output, source_context, context_evaluation] + parameters_compontents,
+            queue=False,
+            api_name=False
         )
         submit_btn.click(
             fn=submit_input,
+            # inputs=[input_] + parameters_compontents,
+            inputs=[input_] + [num_chunks],
             outputs=[output, source_context, context_evaluation],
             api_name="get-results"
         )
         #             fn=submit_input,
         #         )
+        # input_, output, source_context, context_evaluation, num_chunks = clear()
         demo.launch(show_api=True)

handler.py DELETED Viewed

@@ -1,14 +0,0 @@
-import json
-class ContentHandler():
-    content_type = "application/json"
-    accepts = "application/json"
-    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-        input_str = json.dumps({'inputs': prompt, 'parameters': model_kwargs})
-        return input_str.encode('utf-8')
-    def transform_output(self, output: bytes) -> str:
-        response_json = json.loads(output.read().decode("utf-8"))
-        return response_json[0]["generated_text"]

input_reader.py DELETED Viewed

@@ -1,22 +0,0 @@
-from typing import List
-from llama_index.core.constants import DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE
-from llama_index.core.readers import SimpleDirectoryReader
-from llama_index.core.schema import Document
-from llama_index.core import Settings
-class InputReader:
-    def __init__(self, input_dir: str) -> None:
-        self.reader = SimpleDirectoryReader(input_dir=input_dir)
-    def parse_documents(
-        self,
-        show_progress: bool = True,
-        chunk_size: int = DEFAULT_CHUNK_SIZE,
-        chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
-    ) -> List[Document]:
-        Settings.chunk_size = chunk_size
-        Settings.chunk_overlap = chunk_overlap
-        documents = self.reader.load_data(show_progress=show_progress)
-        return documents

rag.py CHANGED Viewed

@@ -42,6 +42,7 @@ class RAG:
         logging.info("RAG loaded!")
         logging.info( self.vectore_store)
     def rerank_contexts(self, instruction, contexts, number_of_contexts=1):
         """
         Rerank the contexts based on their relevance to the given instruction.
@@ -86,21 +87,28 @@ class RAG:
         logging.info("RETRIEVE DOCUMENTS")
         logging.info(f"Instruction: {instruction}")
         embedding = self.vectore_store._embed_query(instruction)
         logging.info(f"Query embedding generated: {len(embedding)}")
-        documents_retrieved = self.vectore_store.similarity_search_with_score_by_vector(
-            embedding,
-            k=self.rerank_number_contexts)
         logging.info(f"Documents retrieved: {len(documents_retrieved)}")
-        # documents_retrieved = self.vectore_store.similarity_search_with_score(instruction, k=self.rerank_number_contexts)
         if self.rerank_model:
             logging.info("RERANK DOCUMENTS")
             documents_reranked = self.rerank_contexts(instruction, documents_retrieved, number_of_contexts=number_of_contexts)
         else:
             logging.info("NO RERANKING")
             documents_reranked = documents_retrieved[:number_of_contexts]
         return documents_reranked

         logging.info("RAG loaded!")
         logging.info( self.vectore_store)
     def rerank_contexts(self, instruction, contexts, number_of_contexts=1):
         """
         Rerank the contexts based on their relevance to the given instruction.
         logging.info("RETRIEVE DOCUMENTS")
         logging.info(f"Instruction: {instruction}")
+        # Embed the query
+        # ==============================================================================================================
         embedding = self.vectore_store._embed_query(instruction)
         logging.info(f"Query embedding generated: {len(embedding)}")
+        # Retrieve documents
+        # ==============================================================================================================
+        documents_retrieved = self.vectore_store.similarity_search_with_score_by_vector(embedding, k=number_of_contexts)
         logging.info(f"Documents retrieved: {len(documents_retrieved)}")
+        # Reranking
+        # ==============================================================================================================
         if self.rerank_model:
             logging.info("RERANK DOCUMENTS")
             documents_reranked = self.rerank_contexts(instruction, documents_retrieved, number_of_contexts=number_of_contexts)
         else:
             logging.info("NO RERANKING")
             documents_reranked = documents_retrieved[:number_of_contexts]
+        # ==============================================================================================================
         return documents_reranked