Chat with your PDFs!

Chat with your PDFs!#

# Run this notebook from the terminal using:
#
# voila chat-with-docs.ipynb
#

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
import ipywidgets as widgets
from IPython.display import display
import numpy as np

local_model = False # todo: the following does't work.
if local_model:
    # pip install llama-index-embeddings-huggingface llama-index-llms-ollama
    from llama_index.core import Settings
    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
    from llama_index.llms.ollama import Ollama
    
    # bge-base embedding model
    Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
    
    # ollama
    Settings.llm = Ollama(model="llama3", request_timeout=360.0)

class Chat():
    def __init__(self):
        self.former_folder = None
        
    def load(self, folder):
        if self.former_folder == folder:
            return self.count_documents()
        self.former_folder = folder
        
        documents = SimpleDirectoryReader(folder, required_exts=[".pdf"]).load_data()
        index = VectorStoreIndex.from_documents(documents)
        self._query_engine = index.as_query_engine()
        self._documents = documents
        return self.count_documents()

    def count_documents(self):
        return len(np.unique([d.metadata["file_name"] for d in self._documents]))
        
    def query(self, question):
        return self._query_engine.query(question)

# Create user interface
docs_input = widgets.Text(value="./data", placeholder="Enter a directory here")
load_button = widgets.Button(description="Load")

output_label = widgets.HTML(value="")

text_input = widgets.Text(placeholder="Enter a question here")
submit_button = widgets.Button(description="Submit")

chat = Chat()

def on_load(e):    
    number_of_documents = chat.load(docs_input.value)
    output_label.value = f"""
    <div style='text-align:left; color: darkgrey; font-size: 20px'>{number_of_documents} documents loaded.</div>
    """
    if number_of_documents > 0:
        submit_button.disabled = False

def on_submit(e):
    question = text_input.value
    if len(question) == 0:
        return
    text_input.value = ""

    # submit prompt to LLM
    answer = chat.query(question)

    # Append question and answer to the existing HTML content
    output_label.value += f"""
    <div style='text-align:right; color: blue; font-size: 20px'>{question}</div>
    <div style='text-align:left; color: darkgreen; font-size: 20px'>{answer}</div>
    """

# Attach the event handler to the text field and the button
docs_input.continuous_update = False
docs_input.observe(on_load)
load_button.on_click(on_load)

text_input.continuous_update = False
text_input.observe(on_submit)
submit_button.on_click(on_submit)
submit_button.disabled = True

# Arrange the widgets for display
display(widgets.HBox([docs_input, load_button]), output_label, widgets.HBox([text_input, submit_button]))