From 2e68f1b2e101559de9bd62fddfabe7c691b84123 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Tue, 11 Jun 2024 10:33:29 -0400
Subject: [PATCH 01/18] Adding new file

---
 release_docs_api.py | 161 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100644 release_docs_api.py

diff --git a/release_docs_api.py b/release_docs_api.py
new file mode 100644
index 000000000..37271d967
--- /dev/null
+++ b/release_docs_api.py
@@ -0,0 +1,161 @@
+import os
+
+from langchain_community.graphs import Neo4jGraph
+from dotenv import load_dotenv
+from utils import (
+    create_vector_index,
+    BaseLogger,
+)
+from chains import (
+    load_embedding_model,
+    load_llm,
+    configure_llm_only_chain,
+    configure_qa_rag_chain,
+    generate_ticket,
+)
+from fastapi import FastAPI, Depends
+from pydantic import BaseModel
+from langchain.callbacks.base import BaseCallbackHandler
+from threading import Thread
+from queue import Queue, Empty
+from collections.abc import Generator
+from sse_starlette.sse import EventSourceResponse
+from fastapi.middleware.cors import CORSMiddleware
+import json
+
+load_dotenv(".env")
+
+url = os.getenv("NEO4J_URI")
+username = os.getenv("NEO4J_USERNAME")
+password = os.getenv("NEO4J_PASSWORD")
+ollama_base_url = os.getenv("OLLAMA_BASE_URL")
+embedding_model_name = os.getenv("EMBEDDING_MODEL")
+llm_name = os.getenv("LLM")
+# Remapping for Langchain Neo4j integration
+os.environ["NEO4J_URL"] = url
+
+embeddings, dimension = load_embedding_model(
+    embedding_model_name,
+    config={"ollama_base_url": ollama_base_url},
+    logger=BaseLogger(),
+)
+
+# if Neo4j is local, you can go to http://localhost:7474/ to browse the database
+neo4j_graph = Neo4jGraph(url=url, username=username, password=password)
+create_vector_index(neo4j_graph, dimension)
+
+llm = load_llm(
+    llm_name, logger=BaseLogger(), config={"ollama_base_url": ollama_base_url}
+)
+
+llm_chain = configure_llm_only_chain(llm)
+rag_chain = configure_qa_rag_chain(
+    llm, embeddings, embeddings_store_url=url, username=username, password=password
+)
+
+
+class QueueCallback(BaseCallbackHandler):
+    """Callback handler for streaming LLM responses to a queue."""
+
+    def __init__(self, q):
+        self.q = q
+
+    def on_llm_new_token(self, token: str, **kwargs) -> None:
+        self.q.put(token)
+
+    def on_llm_end(self, *args, **kwargs) -> None:
+        return self.q.empty()
+
+
+def stream(cb, q) -> Generator:
+    job_done = object()
+
+    def task():
+        x = cb()
+        q.put(job_done)
+
+    t = Thread(target=task)
+    t.start()
+
+    content = ""
+
+    # Get each new token from the queue and yield for our generator
+    while True:
+        try:
+            next_token = q.get(True, timeout=1)
+            if next_token is job_done:
+                break
+            content += next_token
+            yield next_token, content
+        except Empty:
+            continue
+
+
+app = FastAPI()
+origins = ["*"]
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+@app.get("/")
+async def root():
+    return {"message": "Hello World"}
+
+
+class Question(BaseModel):
+    text: str
+    rag: bool = False
+
+
+class BaseTicket(BaseModel):
+    text: str
+
+
+@app.get("/query-stream")
+def qstream(question: Question = Depends()):
+    output_function = llm_chain
+    if question.rag:
+        output_function = rag_chain
+
+    q = Queue()
+
+    def cb():
+        output_function(
+            {"question": question.text, "chat_history": []},
+            callbacks=[QueueCallback(q)],
+        )
+
+    def generate():
+        yield json.dumps({"init": True, "model": llm_name})
+        for token, _ in stream(cb, q):
+            yield json.dumps({"token": token})
+
+    return EventSourceResponse(generate(), media_type="text/event-stream")
+
+
+@app.get("/query")
+async def ask(question: Question = Depends()):
+    output_function = llm_chain
+    if question.rag:
+        output_function = rag_chain
+    result = output_function(
+        {"question": question.text, "chat_history": []}, callbacks=[]
+    )
+
+    return {"result": result["answer"], "model": llm_name}
+
+
+@app.get("/generate-ticket")
+async def generate_ticket_api(question: BaseTicket = Depends()):
+    new_title, new_question = generate_ticket(
+        neo4j_graph=neo4j_graph,
+        llm_chain=llm_chain,
+        input_question=question.text,
+    )
+    return {"result": {"title": new_title, "text": new_question}, "model": llm_name}

From ffa8331bdd6a7970c05bd3d0176a6c1273cc49b2 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Tue, 11 Jun 2024 11:16:13 -0400
Subject: [PATCH 02/18] Migrating to chroma for release docs

---
 release_docs_api.py | 97 +++++++++++++++++++++++++++++++++------------
 1 file changed, 72 insertions(+), 25 deletions(-)

diff --git a/release_docs_api.py b/release_docs_api.py
index 37271d967..753efea7d 100644
--- a/release_docs_api.py
+++ b/release_docs_api.py
@@ -1,18 +1,27 @@
 import os
 
-from langchain_community.graphs import Neo4jGraph
+import chromadb
+from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE, Settings
+from langchain.vectorstores import Chroma
+
 from dotenv import load_dotenv
-from utils import (
-    create_vector_index,
-    BaseLogger,
-)
+from utils import BaseLogger
 from chains import (
     load_embedding_model,
     load_llm,
     configure_llm_only_chain,
-    configure_qa_rag_chain,
-    generate_ticket,
+    RetrievalQAWithSourcesChain
+)
+
+from langchain.chains.qa_with_sources import load_qa_with_sources_chain
+
+from langchain.prompts import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate
 )
+
+
 from fastapi import FastAPI, Depends
 from pydantic import BaseModel
 from langchain.callbacks.base import BaseCallbackHandler
@@ -25,14 +34,12 @@
 
 load_dotenv(".env")
 
-url = os.getenv("NEO4J_URI")
-username = os.getenv("NEO4J_USERNAME")
-password = os.getenv("NEO4J_PASSWORD")
+chroma_collection = os.getenv("CHROMA_COLLECTION", "release-docs")
+chroma_host = os.getenv("CHROMA_HOST", "localhost") 
+chroma_port = int(os.getenv("CHROMA_PORT", 8000))
 ollama_base_url = os.getenv("OLLAMA_BASE_URL")
 embedding_model_name = os.getenv("EMBEDDING_MODEL")
 llm_name = os.getenv("LLM")
-# Remapping for Langchain Neo4j integration
-os.environ["NEO4J_URL"] = url
 
 embeddings, dimension = load_embedding_model(
     embedding_model_name,
@@ -40,17 +47,65 @@
     logger=BaseLogger(),
 )
 
-# if Neo4j is local, you can go to http://localhost:7474/ to browse the database
-neo4j_graph = Neo4jGraph(url=url, username=username, password=password)
-create_vector_index(neo4j_graph, dimension)
+# Initialize Chroma client
+chroma_client = chromadb.HttpClient(
+    host=chroma_host,
+    port=chroma_port,
+    ssl=False,
+    headers=None,
+    settings=Settings(),
+    tenant=DEFAULT_TENANT,
+    database=DEFAULT_DATABASE,
+)
+
+# create vector database if it doesn't exist
+chroma_client.get_or_create_collection(chroma_collection, metadata={"key": "value"})
 
 llm = load_llm(
     llm_name, logger=BaseLogger(), config={"ollama_base_url": ollama_base_url}
 )
 
 llm_chain = configure_llm_only_chain(llm)
-rag_chain = configure_qa_rag_chain(
-    llm, embeddings, embeddings_store_url=url, username=username, password=password
+
+# PROMPT TEMPLATE
+general_system_template = """
+Use the following pieces of context to answer the question at the end.
+The context contains question-answer pairs and their links from Stackoverflow.
+You should prefer information from accepted or more upvoted answers.
+Make sure to rely on information from the answers and not on questions to provide accurate responses.
+When you find particular answer in the context useful, make sure to cite it in the answer using the link.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+----
+{summaries}
+----
+Each answer you generate should contain a section at the end of links to
+Stackoverflow questions and answers you found useful, which are described under Source value.
+You can only use links to StackOverflow questions that are present in the context and always
+add links to the end of the answer in the style of citations.
+Generate concise answers with references sources section of links to
+relevant StackOverflow questions only at the end of the answer.
+"""
+general_user_template = "Question:```{question}```"
+messages = [
+    SystemMessagePromptTemplate.from_template(general_system_template),
+    HumanMessagePromptTemplate.from_template(general_user_template),
+]
+qa_prompt = ChatPromptTemplate.from_messages(messages)
+
+qa_chain = load_qa_with_sources_chain(
+    llm,
+    chain_type="stuff",
+    prompt=qa_prompt,
+)
+
+
+langchainChroma = Chroma(client=chroma_client, collection_name=chroma_collection, embedding_function=embeddings)
+
+rag_chain = RetrievalQAWithSourcesChain(
+    combine_documents_chain=qa_chain,
+    retriever=langchainChroma.as_retriever(search_kwargs={"k": 2}),
+    reduce_k_below_max_tokens=False,
+    max_tokens_limit=3375,
 )
 
 
@@ -151,11 +206,3 @@ async def ask(question: Question = Depends()):
     return {"result": result["answer"], "model": llm_name}
 
 
-@app.get("/generate-ticket")
-async def generate_ticket_api(question: BaseTicket = Depends()):
-    new_title, new_question = generate_ticket(
-        neo4j_graph=neo4j_graph,
-        llm_chain=llm_chain,
-        input_question=question.text,
-    )
-    return {"result": {"title": new_title, "text": new_question}, "model": llm_name}

From a8306559eb64c0c21e23c384ffd49bca90cabbf1 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Tue, 11 Jun 2024 14:27:42 -0400
Subject: [PATCH 03/18] Adding ingest script

---
 release-docs-ingest.py | 170 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 170 insertions(+)
 create mode 100644 release-docs-ingest.py

diff --git a/release-docs-ingest.py b/release-docs-ingest.py
new file mode 100644
index 000000000..0f71ccf07
--- /dev/null
+++ b/release-docs-ingest.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+import os
+import glob
+from typing import List
+from multiprocessing import Pool
+from tqdm import tqdm
+
+from langchain.document_loaders import (
+    CSVLoader,
+    EverNoteLoader,
+    PyMuPDFLoader,
+    TextLoader,
+    UnstructuredEmailLoader,
+    UnstructuredEPubLoader,
+    UnstructuredHTMLLoader,
+    UnstructuredMarkdownLoader,
+    UnstructuredODTLoader,
+    UnstructuredPowerPointLoader,
+    UnstructuredWordDocumentLoader,
+)
+
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.docstore.document import Document
+from constants import CHROMA_SETTINGS
+
+
+# Load environment variables
+persist_directory = os.environ.get('PERSIST_DIRECTORY', 'db')
+source_directory = os.environ.get('SOURCE_DIRECTORY', 'source_documents')
+embeddings_model_name = os.environ.get('EMBEDDINGS_MODEL_NAME', 'all-MiniLM-L6-v2')
+chunk_size = 500
+chunk_overlap = 50
+
+# Custom document loaders
+class MyElmLoader(UnstructuredEmailLoader):
+    """Wrapper to fallback to text/plain when default does not work"""
+
+    def load(self) -> List[Document]:
+        """Wrapper adding fallback for elm without html"""
+        try:
+            try:
+                doc = UnstructuredEmailLoader.load(self)
+            except ValueError as e:
+                if 'text/html content not found in email' in str(e):
+                    # Try plain text
+                    self.unstructured_kwargs["content_source"]="text/plain"
+                    doc = UnstructuredEmailLoader.load(self)
+                else:
+                    raise
+        except Exception as e:
+            # Add file_path to exception message
+            raise type(e)(f"{self.file_path}: {e}") from e
+
+        return doc
+
+
+# Map file extensions to document loaders and their arguments
+LOADER_MAPPING = {
+    ".csv": (CSVLoader, {}),
+    # ".docx": (Docx2txtLoader, {}),
+    ".doc": (UnstructuredWordDocumentLoader, {}),
+    ".docx": (UnstructuredWordDocumentLoader, {}),
+    ".enex": (EverNoteLoader, {}),
+    ".eml": (MyElmLoader, {}),
+    ".epub": (UnstructuredEPubLoader, {}),
+    ".html": (UnstructuredHTMLLoader, {}),
+    ".md": (UnstructuredMarkdownLoader, {}),
+    ".odt": (UnstructuredODTLoader, {}),
+    ".pdf": (PyMuPDFLoader, {}),
+    ".ppt": (UnstructuredPowerPointLoader, {}),
+    ".pptx": (UnstructuredPowerPointLoader, {}),
+    ".txt": (TextLoader, {"encoding": "utf8"}),
+    # Add more mappings for other file extensions and loaders as needed
+}
+
+
+def load_single_document(file_path: str) -> List[Document]:
+    if os.path.getsize(file_path) != 0:
+        filename, ext = os.path.splitext(file_path)
+        if ext in LOADER_MAPPING:
+            loader_class, loader_args = LOADER_MAPPING[ext]
+            try:
+                loader = loader_class(file_path, **loader_args)
+                if loader:
+                    return loader.load()
+            except:
+                print(f"Corrupted file {file_path}. Ignoring it.")
+        else:
+            print(f"Unsupported file {file_path}. Ignoring it.")
+    else:
+        print(f"Empty file {file_path}. Ignoring it.")
+
+
+def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]:
+    """
+    Loads all documents from the source documents directory, ignoring specified files
+    """
+    all_files = []
+    for ext in LOADER_MAPPING:
+        all_files.extend(
+            glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
+        )
+    filtered_files = [file_path for file_path in all_files if file_path not in ignored_files]
+
+    with Pool(processes=os.cpu_count()) as pool:
+        results = []
+        with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar:
+            for i, docs in enumerate(pool.imap_unordered(load_single_document, filtered_files)):
+                if docs:
+                    results.extend(docs)
+                pbar.update()
+
+    return results
+
+def process_documents(ignored_files: List[str] = []) -> List[Document]:
+    """
+    Load documents and split in chunks
+    """
+    print(f"Loading documents from {source_directory}")
+    documents = load_documents(source_directory, ignored_files)
+    if not documents:
+        print("No new documents to load")
+        exit(0)
+    print(f"Loaded {len(documents)} new documents from {source_directory}")
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    texts = text_splitter.split_documents(documents)
+    print(f"Split into {len(texts)} chunks of text (max. {chunk_size} tokens each)")
+    return texts
+
+def does_vectorstore_exist(persist_directory: str) -> bool:
+    """
+    Checks if vectorstore exists
+    """
+    if os.path.exists(os.path.join(persist_directory, 'index')):
+        if os.path.exists(os.path.join(persist_directory, 'chroma-collections.parquet')) and os.path.exists(os.path.join(persist_directory, 'chroma-embeddings.parquet')):
+            list_index_files = glob.glob(os.path.join(persist_directory, 'index/*.bin'))
+            list_index_files += glob.glob(os.path.join(persist_directory, 'index/*.pkl'))
+            # At least 3 documents are needed in a working vectorstore
+            if len(list_index_files) > 3:
+                return True
+    return False
+
+def main():
+    # Create embeddings
+    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
+
+    if does_vectorstore_exist(persist_directory):
+        # Update and store locally vectorstore
+        print(f"Appending to existing vectorstore at {persist_directory}")
+        db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
+        collection = db.get()
+        texts = process_documents([metadata['source'] for metadata in collection['metadatas']])
+        print(f"Creating embeddings. May take some minutes...")
+        db.add_documents(texts)
+    else:
+        # Create and store locally vectorstore
+        print("Creating new vectorstore")
+        texts = process_documents()
+        print(f"Creating embeddings. May take some minutes...")
+        db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
+    db.persist()
+    db = None
+
+    print(f"Ingestion complete! You can now run privateGPT.py to query your documents")
+
+
+if __name__ == "__main__":
+    main()

From dba3c993782d15a898d907dceed5a11d05184ebf Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Tue, 11 Jun 2024 16:54:19 -0400
Subject: [PATCH 04/18] Adding dockerfile

---
 release_docs_api.Dockerfile | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 release_docs_api.Dockerfile

diff --git a/release_docs_api.Dockerfile b/release_docs_api.Dockerfile
new file mode 100644
index 000000000..e126b4027
--- /dev/null
+++ b/release_docs_api.Dockerfile
@@ -0,0 +1,21 @@
+FROM langchain/langchain
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    software-properties-common \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+
+RUN pip install --upgrade -r requirements.txt
+
+COPY release_docs_api.py .
+COPY utils.py .
+COPY chains.py .
+
+HEALTHCHECK CMD curl --fail http://localhost:8504
+
+ENTRYPOINT [ "uvicorn", "release_docs_api:app", "--host", "0.0.0.0", "--port", "8504" ]

From 5ddc935be7c3cc4fe0f667bc20237502c6af11f3 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Tue, 11 Jun 2024 21:01:59 -0400
Subject: [PATCH 05/18] Making things work

---
 chains.py                  |  13 +++-
 release-docker-compose.yml | 118 +++++++++++++++++++++++++++++++++++++
 requirements.txt           |   1 +
 3 files changed, 129 insertions(+), 3 deletions(-)
 create mode 100644 release-docker-compose.yml

diff --git a/chains.py b/chains.py
index 9ad10f406..068b43be1 100644
--- a/chains.py
+++ b/chains.py
@@ -1,3 +1,4 @@
+import os
 
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.embeddings import OllamaEmbeddings
@@ -48,9 +49,15 @@ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config=
         dimension = 768
         logger.info("Embedding: Using Google Generative AI Embeddings")
     else:
-        embeddings = SentenceTransformerEmbeddings(
-            model_name="all-MiniLM-L6-v2", cache_folder="/embedding_model"
-        )
+        if os.path.exists("/embedding_model"):
+            embeddings = SentenceTransformerEmbeddings(
+                model_name="all-MiniLM-L6-v2", cache_folder="/embedding_model"
+            )
+        else:
+            embeddings = SentenceTransformerEmbeddings(
+                model_name="all-MiniLM-L6-v2", cache_folder="./embedding_model"
+            )
+
         dimension = 384
         logger.info("Embedding: Using SentenceTransformer")
     return embeddings, dimension
diff --git a/release-docker-compose.yml b/release-docker-compose.yml
new file mode 100644
index 000000000..82d6deed1
--- /dev/null
+++ b/release-docker-compose.yml
@@ -0,0 +1,118 @@
+services:
+
+  llm: &llm
+    image: ollama/ollama:latest
+    profiles: ["linux"]
+    networks:
+      - net
+
+  llm-gpu:
+    <<: *llm
+    profiles: ["linux-gpu"]
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+
+  chroma:
+    image: chromadb/chroma
+    volumes:
+      # Be aware that indexed data are located in "/chroma/chroma/"
+      # Default configuration for persist_directory in chromadb/config.py
+      # Read more about deployments: https://docs.trychroma.com/deployment
+      - chroma-data:/chroma/chroma
+    command: "--workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30"
+    environment:
+      - IS_PERSISTENT=TRUE
+      - CHROMA_SERVER_AUTHN_PROVIDER=${CHROMA_SERVER_AUTHN_PROVIDER}
+      - CHROMA_SERVER_AUTHN_CREDENTIALS_FILE=${CHROMA_SERVER_AUTHN_CREDENTIALS_FILE}
+      - CHROMA_SERVER_AUTHN_CREDENTIALS=${CHROMA_SERVER_AUTHN_CREDENTIALS}
+      - CHROMA_AUTH_TOKEN_TRANSPORT_HEADER=${CHROMA_AUTH_TOKEN_TRANSPORT_HEADER}
+      - PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma}
+      - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT}
+      - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS}
+      - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME}
+      - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY}
+      - CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE}
+    restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped"
+    ports:
+      - "8000:8000"
+    healthcheck:
+      # Adjust below to match your container port
+      test: [ "CMD", "curl", "-f", "http://localhost:8000/api/v1/heartbeat" ]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+    networks:
+      - net
+
+  api:
+    build:
+      context: .
+      dockerfile: release_docs_api.Dockerfile
+    volumes:
+      - $PWD/embedding_model:/embedding_model
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY}  
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL-http://host.docker.internal:11434}
+      - LLM=${LLM-llama2}
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL-sentence_transformer}
+      - LANGCHAIN_ENDPOINT=${LANGCHAIN_ENDPOINT-"https://api.smith.langchain.com"}
+      - LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2-false}
+      - LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT}
+      - LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
+    networks:
+      - net
+    depends_on:
+      chroma:
+        condition: service_healthy
+#      pull-model:
+#        condition: service_completed_successfully
+    x-develop:
+      watch:
+        - action: rebuild
+          path: .
+          ignore:
+            - loader.py
+            - bot.py
+            - pdf_bot.py
+            - front-end/
+    ports:
+      - 8504:8504
+    healthcheck:
+      test: ["CMD-SHELL", "wget --no-verbose --tries=1 http://localhost:8504/ || exit 1"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  front-end:
+    build:
+      context: .
+      dockerfile: front-end.Dockerfile
+    x-develop:
+      watch:
+        - action: sync
+          path: ./front-end
+          target: /app
+          ignore:
+            - ./front-end/node_modules/
+        - action: rebuild
+          path: ./front-end/package.json
+    depends_on:
+      api:
+        condition: service_healthy
+    networks:
+      - net
+    ports:
+      - 8505:8505
+
+volumes:
+  chroma-data:
+    driver: local
+
+networks:
+  net:
diff --git a/requirements.txt b/requirements.txt
index ad0f6f905..a62b27e50 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,3 +17,4 @@ boto3
 langchain-openai
 langchain-community
 langchain-google-genai
+chromadb==0.5.0

From 59bd3344a75237bf3933ce35605cb1d3c026a69d Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Tue, 11 Jun 2024 22:41:41 -0400
Subject: [PATCH 06/18] Making the ingest script work

---
 release-docs-ingest.py | 96 +++++++++++++++++++++++++++---------------
 1 file changed, 63 insertions(+), 33 deletions(-)
 mode change 100644 => 100755 release-docs-ingest.py

diff --git a/release-docs-ingest.py b/release-docs-ingest.py
old mode 100644
new mode 100755
index 0f71ccf07..1199ad03a
--- a/release-docs-ingest.py
+++ b/release-docs-ingest.py
@@ -1,11 +1,18 @@
 #!/usr/bin/env python3
 import os
+
+import chromadb
+from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE, Settings
+
+from dotenv import load_dotenv
+from utils import BaseLogger
+
 import glob
 from typing import List
 from multiprocessing import Pool
 from tqdm import tqdm
 
-from langchain.document_loaders import (
+from langchain_community.document_loaders import (
     CSVLoader,
     EverNoteLoader,
     PyMuPDFLoader,
@@ -21,18 +28,52 @@
 
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import Chroma
-from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.docstore.document import Document
-from constants import CHROMA_SETTINGS
+
+from chains import (
+    load_embedding_model,
+    load_llm,
+)
+
+
+load_dotenv(".env")
+
+chroma_collection = os.getenv("CHROMA_COLLECTION", "release-docs")
+chroma_host = os.getenv("CHROMA_HOST", "localhost")
+chroma_port = int(os.getenv("CHROMA_PORT", 8000))
+ollama_base_url = os.getenv("OLLAMA_BASE_URL")
+embedding_model_name = os.getenv("EMBEDDING_MODEL")
+llm_name = os.getenv("LLM")
+
+embeddings, dimension = load_embedding_model(
+    embedding_model_name,
+    config={"ollama_base_url": ollama_base_url},
+    logger=BaseLogger(),
+)
 
 
 # Load environment variables
-persist_directory = os.environ.get('PERSIST_DIRECTORY', 'db')
-source_directory = os.environ.get('SOURCE_DIRECTORY', 'source_documents')
-embeddings_model_name = os.environ.get('EMBEDDINGS_MODEL_NAME', 'all-MiniLM-L6-v2')
+source_directory = os.environ.get('SOURCE_DIRECTORY', 'documents')
 chunk_size = 500
 chunk_overlap = 50
 
+
+# Initialize Chroma client
+chroma_client = chromadb.HttpClient(
+    host=chroma_host,
+    port=chroma_port,
+    ssl=False,
+    headers=None,
+    settings=Settings(),
+    tenant=DEFAULT_TENANT,
+    database=DEFAULT_DATABASE,
+)
+
+# create vector database if it doesn't exist
+chroma_client.get_or_create_collection(chroma_collection, metadata={"key": "value"})
+
+
+
 # Custom document loaders
 class MyElmLoader(UnstructuredEmailLoader):
     """Wrapper to fallback to text/plain when default does not work"""
@@ -66,7 +107,7 @@ def load(self) -> List[Document]:
     ".eml": (MyElmLoader, {}),
     ".epub": (UnstructuredEPubLoader, {}),
     ".html": (UnstructuredHTMLLoader, {}),
-    ".md": (UnstructuredMarkdownLoader, {}),
+    ".md": (UnstructuredMarkdownLoader, { "mode": "elements" }),
     ".odt": (UnstructuredODTLoader, {}),
     ".pdf": (PyMuPDFLoader, {}),
     ".ppt": (UnstructuredPowerPointLoader, {}),
@@ -129,37 +170,26 @@ def process_documents(ignored_files: List[str] = []) -> List[Document]:
     print(f"Split into {len(texts)} chunks of text (max. {chunk_size} tokens each)")
     return texts
 
-def does_vectorstore_exist(persist_directory: str) -> bool:
+def does_vectorstore_exist() -> bool:
     """
     Checks if vectorstore exists
     """
-    if os.path.exists(os.path.join(persist_directory, 'index')):
-        if os.path.exists(os.path.join(persist_directory, 'chroma-collections.parquet')) and os.path.exists(os.path.join(persist_directory, 'chroma-embeddings.parquet')):
-            list_index_files = glob.glob(os.path.join(persist_directory, 'index/*.bin'))
-            list_index_files += glob.glob(os.path.join(persist_directory, 'index/*.pkl'))
-            # At least 3 documents are needed in a working vectorstore
-            if len(list_index_files) > 3:
-                return True
-    return False
+    chroma_client.get_or_create_collection(chroma_collection, metadata={"key": "value"})
+    return True
 
 def main():
-    # Create embeddings
-    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
-
-    if does_vectorstore_exist(persist_directory):
-        # Update and store locally vectorstore
-        print(f"Appending to existing vectorstore at {persist_directory}")
-        db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
-        collection = db.get()
-        texts = process_documents([metadata['source'] for metadata in collection['metadatas']])
-        print(f"Creating embeddings. May take some minutes...")
-        db.add_documents(texts)
-    else:
-        # Create and store locally vectorstore
-        print("Creating new vectorstore")
-        texts = process_documents()
-        print(f"Creating embeddings. May take some minutes...")
-        db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
+
+    does_vectorstore_exist()
+
+    # Update and store locally vectorstore
+    print(f"Appending to existing vectorstore")
+    db = Chroma(client=chroma_client, collection_name=chroma_collection, embedding_function=embeddings)
+    collection = db.get()
+
+    texts = process_documents([metadata['source'] for metadata in collection['metadatas']])
+    print(f"Creating embeddings. May take some minutes...")
+    db.add_documents(texts)
+
     db.persist()
     db = None
 

From 7a101ae0af91fe14ae8c5fcba74da867fadf0420 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Wed, 12 Jun 2024 11:00:26 -0400
Subject: [PATCH 07/18] Updating ingest script

---
 release-docs-ingest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/release-docs-ingest.py b/release-docs-ingest.py
index 1199ad03a..c96fe19fd 100755
--- a/release-docs-ingest.py
+++ b/release-docs-ingest.py
@@ -107,7 +107,8 @@ def load(self) -> List[Document]:
     ".eml": (MyElmLoader, {}),
     ".epub": (UnstructuredEPubLoader, {}),
     ".html": (UnstructuredHTMLLoader, {}),
-    ".md": (UnstructuredMarkdownLoader, { "mode": "elements" }),
+    # ".md": (UnstructuredMarkdownLoader, { "mode": "elements", "encoding": "utf8" }),
+    ".md": (TextLoader, {"encoding": "utf8"}),
     ".odt": (UnstructuredODTLoader, {}),
     ".pdf": (PyMuPDFLoader, {}),
     ".ppt": (UnstructuredPowerPointLoader, {}),
@@ -190,7 +191,6 @@ def main():
     print(f"Creating embeddings. May take some minutes...")
     db.add_documents(texts)
 
-    db.persist()
     db = None
 
     print(f"Ingestion complete! You can now run privateGPT.py to query your documents")

From 8b99fbb4dffbe7ea54e6b76a6cd437ec9aa70009 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Wed, 12 Jun 2024 13:36:50 -0400
Subject: [PATCH 08/18] Adding gitbook ingest script

---
 release-docs-gitbook-ingest.py | 110 +++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100755 release-docs-gitbook-ingest.py

diff --git a/release-docs-gitbook-ingest.py b/release-docs-gitbook-ingest.py
new file mode 100755
index 000000000..7a3e1206b
--- /dev/null
+++ b/release-docs-gitbook-ingest.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+import os
+
+import chromadb
+from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE, Settings
+
+from dotenv import load_dotenv
+from utils import BaseLogger
+
+import glob
+from typing import List
+from multiprocessing import Pool
+from tqdm import tqdm
+
+from langchain_community.document_loaders import (
+    GitbookLoader,
+)
+
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.docstore.document import Document
+
+from chains import (
+    load_embedding_model,
+    load_llm,
+)
+
+
+load_dotenv(".env")
+
+chroma_collection = os.getenv("CHROMA_COLLECTION", "release-docs")
+chroma_host = os.getenv("CHROMA_HOST", "localhost")
+chroma_port = int(os.getenv("CHROMA_PORT", 8000))
+ollama_base_url = os.getenv("OLLAMA_BASE_URL")
+embedding_model_name = os.getenv("EMBEDDING_MODEL")
+llm_name = os.getenv("LLM")
+
+embeddings, dimension = load_embedding_model(
+    embedding_model_name,
+    config={"ollama_base_url": ollama_base_url},
+    logger=BaseLogger(),
+)
+
+
+# Load environment variables
+gitbook_url = os.environ.get('GITBOOK_URL', 'https://docs.release.com')
+chunk_size = 500
+chunk_overlap = 50
+
+
+# Initialize Chroma client
+chroma_client = chromadb.HttpClient(
+    host=chroma_host,
+    port=chroma_port,
+    ssl=False,
+    headers=None,
+    settings=Settings(),
+    tenant=DEFAULT_TENANT,
+    database=DEFAULT_DATABASE,
+)
+
+# create vector database if it doesn't exist
+chroma_client.get_or_create_collection(chroma_collection, metadata={"key": "value"})
+
+
+def process_documents(ignored_files: List[str] = []) -> List[Document]:
+    """
+    Load documents and split in chunks
+    """
+    print(f"Loading documents from {gitbook_url}")
+
+    loader = GitbookLoader(gitbook_url, load_all_paths=True)
+    documents = loader.load()
+
+    if not documents:
+        print("No new documents to load")
+        exit(0)
+    print(f"Loaded {len(documents)} new documents from {source_directory}")
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    texts = text_splitter.split_documents(documents)
+    print(f"Split into {len(texts)} chunks of text (max. {chunk_size} tokens each)")
+    return texts
+
+def does_vectorstore_exist() -> bool:
+    """
+    Checks if vectorstore exists
+    """
+    chroma_client.get_or_create_collection(chroma_collection, metadata={"key": "value"})
+    return True
+
+def main():
+
+    does_vectorstore_exist()
+
+    # Update and store locally vectorstore
+    print(f"Appending to existing vectorstore")
+    db = Chroma(client=chroma_client, collection_name=chroma_collection, embedding_function=embeddings)
+    collection = db.get()
+
+    texts = process_documents([metadata['source'] for metadata in collection['metadatas']])
+    print(f"Creating embeddings. May take some minutes...")
+    db.add_documents(texts)
+
+    db = None
+
+    print(f"Ingestion complete! You can now run privateGPT.py to query your documents")
+
+
+if __name__ == "__main__":
+    main()

From 69900b2ee267814ee0067c3dcf028b41fe769a43 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Wed, 12 Jun 2024 13:48:26 -0400
Subject: [PATCH 09/18] fixing bug

---
 release-docs-gitbook-ingest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/release-docs-gitbook-ingest.py b/release-docs-gitbook-ingest.py
index 7a3e1206b..8c6b33b4f 100755
--- a/release-docs-gitbook-ingest.py
+++ b/release-docs-gitbook-ingest.py
@@ -75,7 +75,7 @@ def process_documents(ignored_files: List[str] = []) -> List[Document]:
     if not documents:
         print("No new documents to load")
         exit(0)
-    print(f"Loaded {len(documents)} new documents from {source_directory}")
+    print(f"Loaded {len(documents)} new documents from {gitbook_url}")
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
     texts = text_splitter.split_documents(documents)
     print(f"Split into {len(texts)} chunks of text (max. {chunk_size} tokens each)")

From f00228b25492a2cbef08b453dcdd5b2a69ec9845 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Wed, 12 Jun 2024 13:51:07 -0400
Subject: [PATCH 10/18] Adding dockerfile

---
 release_docs_gitbook_ingest.Dockerfile | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 release_docs_gitbook_ingest.Dockerfile

diff --git a/release_docs_gitbook_ingest.Dockerfile b/release_docs_gitbook_ingest.Dockerfile
new file mode 100644
index 000000000..fa7b240e5
--- /dev/null
+++ b/release_docs_gitbook_ingest.Dockerfile
@@ -0,0 +1,20 @@
+FROM langchain/langchain
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    software-properties-common \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+
+RUN pip install --upgrade -r requirements.txt
+
+COPY release-docs-gitbook-ingest.py .
+COPY utils.py .
+COPY chains.py .
+COPY images ./images
+
+ENTRYPOINT ["python", "release-docs-gitbook-ingest.py"]

From 61bf1465950f1e01321b5304bbce7d84e19f09be Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Wed, 12 Jun 2024 14:22:17 -0400
Subject: [PATCH 11/18] updating log message

---
 release-docs-gitbook-ingest.py | 2 +-
 release-docs-ingest.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/release-docs-gitbook-ingest.py b/release-docs-gitbook-ingest.py
index 8c6b33b4f..39f4281d5 100755
--- a/release-docs-gitbook-ingest.py
+++ b/release-docs-gitbook-ingest.py
@@ -103,7 +103,7 @@ def main():
 
     db = None
 
-    print(f"Ingestion complete! You can now run privateGPT.py to query your documents")
+    print(f"Ingestion complete! You can now query your documents")
 
 
 if __name__ == "__main__":
diff --git a/release-docs-ingest.py b/release-docs-ingest.py
index c96fe19fd..60129316a 100755
--- a/release-docs-ingest.py
+++ b/release-docs-ingest.py
@@ -193,7 +193,7 @@ def main():
 
     db = None
 
-    print(f"Ingestion complete! You can now run privateGPT.py to query your documents")
+    print(f"Ingestion complete! You can now query your documents")
 
 
 if __name__ == "__main__":

From 454d00985c8065877ad864d14208c5180b9ae365 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Wed, 12 Jun 2024 15:03:32 -0400
Subject: [PATCH 12/18] white space

---
 release_docs_gitbook_ingest.Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/release_docs_gitbook_ingest.Dockerfile b/release_docs_gitbook_ingest.Dockerfile
index fa7b240e5..102bbe8a3 100644
--- a/release_docs_gitbook_ingest.Dockerfile
+++ b/release_docs_gitbook_ingest.Dockerfile
@@ -18,3 +18,4 @@ COPY chains.py .
 COPY images ./images
 
 ENTRYPOINT ["python", "release-docs-gitbook-ingest.py"]
+

From a51a6ea0f7b94ecaad3a0fc3fa3bf40c2884e711 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Wed, 12 Jun 2024 15:07:38 -0400
Subject: [PATCH 13/18] remove white space

---
 release_docs_gitbook_ingest.Dockerfile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/release_docs_gitbook_ingest.Dockerfile b/release_docs_gitbook_ingest.Dockerfile
index 102bbe8a3..fa7b240e5 100644
--- a/release_docs_gitbook_ingest.Dockerfile
+++ b/release_docs_gitbook_ingest.Dockerfile
@@ -18,4 +18,3 @@ COPY chains.py .
 COPY images ./images
 
 ENTRYPOINT ["python", "release-docs-gitbook-ingest.py"]
-

From be36e71bfad9a5c4cb4a854199a1e4ad31e0c0cb Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Wed, 12 Jun 2024 17:51:23 -0400
Subject: [PATCH 14/18] Updating prompt

---
 release_docs_api.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/release_docs_api.py b/release_docs_api.py
index 753efea7d..82483807c 100644
--- a/release_docs_api.py
+++ b/release_docs_api.py
@@ -70,20 +70,17 @@
 # PROMPT TEMPLATE
 general_system_template = """
 Use the following pieces of context to answer the question at the end.
-The context contains question-answer pairs and their links from Stackoverflow.
-You should prefer information from accepted or more upvoted answers.
-Make sure to rely on information from the answers and not on questions to provide accurate responses.
-When you find particular answer in the context useful, make sure to cite it in the answer using the link.
+The context contains snippets from docs.release.com the Release documentation.
+When you find particular documentation snippet useful, make sure to cite it in the answer using the link.
 If you don't know the answer, just say that you don't know, don't try to make up an answer.
 ----
 {summaries}
 ----
 Each answer you generate should contain a section at the end of links to
-Stackoverflow questions and answers you found useful, which are described under Source value.
-You can only use links to StackOverflow questions that are present in the context and always
+You can only use links to docs.release.com that are present in the context and always
 add links to the end of the answer in the style of citations.
 Generate concise answers with references sources section of links to
-relevant StackOverflow questions only at the end of the answer.
+relevant docs.release.com documentation only at the end of the answer.
 """
 general_user_template = "Question:```{question}```"
 messages = [

From 48ae5d206a52ddf5cd6c239390497839650127d1 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Sat, 3 Aug 2024 14:15:40 -0400
Subject: [PATCH 15/18] remove prompt for tommy to do it live!

---
 release_docs_api.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/release_docs_api.py b/release_docs_api.py
index 82483807c..f0401ec77 100644
--- a/release_docs_api.py
+++ b/release_docs_api.py
@@ -69,18 +69,9 @@
 
 # PROMPT TEMPLATE
 general_system_template = """
-Use the following pieces of context to answer the question at the end.
-The context contains snippets from docs.release.com the Release documentation.
-When you find particular documentation snippet useful, make sure to cite it in the answer using the link.
-If you don't know the answer, just say that you don't know, don't try to make up an answer.
 ----
 {summaries}
 ----
-Each answer you generate should contain a section at the end of links to
-You can only use links to docs.release.com that are present in the context and always
-add links to the end of the answer in the style of citations.
-Generate concise answers with references sources section of links to
-relevant docs.release.com documentation only at the end of the answer.
 """
 general_user_template = "Question:```{question}```"
 messages = [

From f47dce175fc34cffabc92b0f648e6641a7fc5a8d Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Sat, 3 Aug 2024 15:19:19 -0400
Subject: [PATCH 16/18] add rawness

---
 release_docs_api.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/release_docs_api.py b/release_docs_api.py
index f0401ec77..2013d21a4 100644
--- a/release_docs_api.py
+++ b/release_docs_api.py
@@ -68,6 +68,7 @@
 llm_chain = configure_llm_only_chain(llm)
 
 # PROMPT TEMPLATE
+print("the dog is really raw!")
 general_system_template = """
 ----
 {summaries}

From 1e503fd2cf292f4319b3b887a47c835622d19807 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Sat, 3 Aug 2024 16:48:23 -0400
Subject: [PATCH 17/18] Updating app template

---
 .release/application_template.yaml | 295 +++++++++++++----------------
 1 file changed, 134 insertions(+), 161 deletions(-)

diff --git a/.release/application_template.yaml b/.release/application_template.yaml
index 3687c98fd..a091c4ab8 100644
--- a/.release/application_template.yaml
+++ b/.release/application_template.yaml
@@ -1,15 +1,12 @@
 ---
-auto_deploy: true
-context: release-ry6clz
+execution_type: server
+context: release-ai
 domain: ai-playground.releaseapp.io
 repo_name: awesome-release/genai-stack
 hostnames:
-- api: api-${env_id}.${domain}
-- bot: bot-${env_id}.${domain}
-- front-end: front-end-${env_id}.${domain}
-- loader: loader-${env_id}.${domain}
-- pdf-bot: pdf-bot-${env_id}.${domain}
-- ollama: ollama-${env_id}.${domain}
+- api: api-release-docs-rag-${env_id}.${domain}
+- chroma: chroma-release-docs-rag-${env_id}.${domain}
+- front-end: front-end-release-docs-rag-${env_id}.${domain}
 environment_templates:
 - name: ephemeral
 - name: permanent
@@ -21,85 +18,39 @@ resources:
     limits: 1Gi
     requests: 100Mi
   replicas: 1
-ingress:
-  proxy_body_size: 30m
-  proxy_buffer_size: 64k
-  proxy_buffering: true
-  proxy_buffers_number: 4
-  proxy_max_temp_file_size: 1024m
-  proxy_read_timeout: '180'
-  proxy_send_timeout: '180'
 shared_volumes:
 - name: models
-  size: 40Gi
+  size: 60Gi
   type: persistent
 parameters:
 - name: llm
   type: string
   description: Can be any Ollama model tag, or gpt-4 or gpt-3.5 or claudev2
-  default: llama2
-- name: embedding_model
-  type: string
-  description: Can be sentence_transformer, openai, aws, ollama or google-genai-embedding-001
-  default: sentence_transformer
+  default: llama3:8b
+  optional: true
 services:
-- name: ollama
-  image: ollama/ollama
-  command:
-  - "/bin/ollama"
-  args:
-  - serve
-  envs:
-  - key: LLM
-    value: "${parameters.llm}"
-  - key: EMBEDDING_MODEL
-    value: "${parameters.embedding_model}"
-  memory:
-    limits: 64Gi
-    requests: 16Gi
-  cpu:
-    limits: 128
-    requests: 12
-  ports:
-  - type: node_port
-    target_port: '11434'
-    port: '11434'
-    loadbalancer: false
-  node_selector:
-  - key: nvidia.com/gpu
-    value: 'true'
-  - key: beta.kubernetes.io/instance-type
-    value: g5.12xlarge
-  volumes:
-  - name: shmem
-    type: shmem
-    size: 16Gi
-    mount_path: "/dev/shm"
-  - claim: models
-    mount_path: "/models"
 - name: api
   image: awesome-release/genai-stack/api
   build:
     context: "."
-    dockerfile: api.Dockerfile
+    dockerfile: release_docs_api.Dockerfile
   has_repo: true
   volumes: []
   command:
   - uvicorn
-  - api:app
+  - release_docs_api:app
   - "--host"
   - 0.0.0.0
   - "--port"
   - '8504'
-  envs:
-  - key: LLM
-    value: "${parameters.llm}"
-  - key: EMBEDDING_MODEL
-    value: "${parameters.embedding_model}"
+  depends_on:
+  - chroma
   readiness_probe:
-    http_get:
-      path: "/"
-      port: 8504
+    exec:
+      command:
+      - curl
+      - "-f"
+      - http://localhost:8504/
     period_seconds: 5
     timeout_seconds: 3
     failure_threshold: 5
@@ -109,152 +60,174 @@ services:
     target_port: '8504'
     port: '8504'
     loadbalancer: false
-- name: bot
-  image: awesome-release/genai-stack/bot
-  build:
-    context: "."
-    dockerfile: bot.Dockerfile
-  has_repo: true
-  volumes: []
-  command:
-  - streamlit
-  - run
-  - bot.py
-  - "--server.port=8501"
-  - "--server.address=0.0.0.0"
   envs:
+  - key: CHROMA_HOST
+    value: chroma
+  - key: CHROMA_PORT
+    value: 8000
   - key: LLM
     value: "${parameters.llm}"
   - key: EMBEDDING_MODEL
-    value: "${parameters.embedding_model}"
-  ports:
-  - type: node_port
-    target_port: '8501'
-    port: '8501'
-    loadbalancer: false
-- name: database
-  image: neo4j:5.11
+    value: sentence_transformer
+  - key: OLLAMA_BASE_URL
+    value: http://ollama:11434
+  - key: LANGCHAIN_API_KEY
+    value: "${LANGCHAIN_API_KEY}"
+  - key: LANGCHAIN_PROJECT
+    value: "${LANGCHAIN_PROJECT}"
+  - key: LANGCHAIN_ENDPOINT
+    value: https://api.smith.langchain.com
+  - key: LANGCHAIN_TRACING_V2
+    value: false
+- name: chroma
+  image: chromadb/chroma
   has_repo: false
-  volumes: []
+  args:
+  - "--workers"
+  - '1'
+  - "--host"
+  - 0.0.0.0
+  - "--port"
+  - '8000'
+  - "--proxy-headers"
+  - "--log-config"
+  - chromadb/log_config.yml
+  - "--timeout-keep-alive"
+  - '30'
   readiness_probe:
-    http_get:
-      path: "/"
-      port: 7474
-    period_seconds: 15
-    timeout_seconds: 30
-    failure_threshold: 10
+    exec:
+      command:
+      - curl
+      - "-f"
+      - http://localhost:8000/api/v1/heartbeat
+    period_seconds: 30
+    timeout_seconds: 10
+    failure_threshold: 3
     initial_delay_seconds: 0
   ports:
   - type: node_port
-    target_port: '7474'
-    port: '7474'
-    loadbalancer: false
-  - type: node_port
-    target_port: '7687'
-    port: '7687'
+    target_port: '8000'
+    port: '8000'
     loadbalancer: false
 - name: front-end
-  static: true
-  build_base: front-end
-  build_output_directory: dist/
-  build_package_install_command: npm install
-  build_command: npm run build
-- name: loader
-  image: awesome-release/genai-stack/loader
+  image: awesome-release/genai-stack/front-end
   build:
     context: "."
-    dockerfile: loader.Dockerfile
+    dockerfile: front-end.Dockerfile
   has_repo: true
-  volumes: []
   command:
-  - streamlit
+  - npm
   - run
-  - loader.py
-  - "--server.port=8502"
-  - "--server.address=0.0.0.0"
+  - dev
+  depends_on:
+  - api
   ports:
   - type: node_port
-    target_port: '8502'
-    port: '8502'
+    target_port: '8505'
+    port: '8505'
     loadbalancer: false
+  envs:
+  - key: VITE_API_BASE_URL
+    value: "${API_INGRESS_URL}"
+- name: ollama
+  image: ollama/ollama:latest
+  has_repo: false
+  memory:
+    limits: 64Gi
+    requests: 4Gi
+  cpu:
+    limits: 128
+    requests: 2
+  command:
+  - ollama
+  args:
+  - serve
+  ports:
   - type: node_port
-    target_port: '8080'
-    port: '8081'
+    target_port: '11434'
+    port: '11434'
     loadbalancer: false
-- name: pdf-bot
-  image: awesome-release/genai-stack/pdf_bot
+  node_selector:
+  - key: nvidia.com/gpu
+    value: 'true'
+  volumes:
+  - name: shmem
+    type: shmem
+    size: 16Gi
+    mount_path: "/dev/shm"
+  - claim: models
+    mount_path: "/root/.ollama"
+- name: gitbook-ingest
   build:
     context: "."
-    dockerfile: pdf_bot.Dockerfile
+    dockerfile: release_docs_gitbook_ingest.Dockerfile
   has_repo: true
+jobs:
+- name: gitbook-ingest
+  from_services: gitbook-ingest
   command:
-  - streamlit
-  - run
-  - pdf_bot.py
-  - "--server.port=8503"
-  - "--server.address=0.0.0.0"
+  - python
+  - release-docs-gitbook-ingest.py
   envs:
-  - key: LLM
-    value: "${parameters.llm}"
-  - key: EMBEDDING_MODEL
-    value: "${parameters.embedding_model}"
-  ports:
-  - type: node_port
-    target_port: '8503'
-    port: '8503'
-    loadbalancer: false
+  - key: CHROMA_HOST
+    value: chroma
+  - key: CHROMA_PORT
+    value: 8000
+  - key: OLLAMA_BASE_URL
+    value: http://ollama:11434
+  - key: LANGCHAIN_API_KEY
+    value: NONE
+  - key: LANGCHAIN_PROJECT
+    value: LANGCHAIN_PROJECT
+  - key: LANGCHAIN_ENDPOINT
+    value: https://api.smith.langchain.com
+  - key: LANGCHAIN_TRACING_V2
+    value: 'false'
+  memory:
+    limits: 4Gi
+    requests: 100Mi
+  cpu:
+    limits: 2000m
+    requests: 100m
 - name: pull-model
-  image: awesome-release/genai-stack/pull-model
-  build:
-    context: "."
-    dockerfile: pull_model.Dockerfile
-  has_repo: true
+  image: releaseai/genai-stack-pull-model
   command:
   - bb
   - "-f"
   - pull_model.clj
-jobs:
-- name: pull-model
-  from_services: pull-model
   envs:
+  - key: OLLAMA_BASE_URL
+    value: http://ollama:11434
   - key: LLM
     value: "${parameters.llm}"
-  - key: EMBEDDING_MODEL
-    value: "${parameters.embedding_model}"
 workflows:
 - name: setup
   parallelize:
-  - step: services-parallel
-    wait_for_finish: false
-    tasks:
-    - services.front-end
   - step: services-0
     tasks:
-    - services.database
+    - services.chroma
     - services.ollama
   - step: services-1
     tasks:
+    - services.api
     - jobs.pull-model
   - step: services-2
     tasks:
-    - services.api
-    - services.bot
-    - services.loader
-    - services.pdf-bot
+    - services.front-end
 - name: patch
   parallelize:
-  - step: services-parallel
-    wait_for_finish: false
-    tasks:
-    - services.front-end
-  - step: services-1
+  - step: services-0
     tasks:
     - services.api
-    - services.bot
-    - services.loader
-    - services.pdf-bot
+- name: gitbook-ingest
+  parallelize:
+  - step: gitbook-ingest
+    tasks:
+    - jobs.gitbook-ingest
 - name: teardown
   parallelize:
   - step: remove-environment
     tasks:
     - release.remove_environment
+
+

From d56c9206e6e5ab75c3d081184f47a44b62527823 Mon Sep 17 00:00:00 2001
From: David Giffin <david@giffin.org>
Date: Sat, 3 Aug 2024 18:24:11 -0400
Subject: [PATCH 18/18] Makign image for releae docs api

---
 .github/workflows/build-and-push-images.yml | 85 +--------------------
 .release/application_template.yaml          | 44 +----------
 2 files changed, 5 insertions(+), 124 deletions(-)

diff --git a/.github/workflows/build-and-push-images.yml b/.github/workflows/build-and-push-images.yml
index e535efc67..06c8d7f88 100644
--- a/.github/workflows/build-and-push-images.yml
+++ b/.github/workflows/build-and-push-images.yml
@@ -9,7 +9,7 @@ concurrency:
 
 on:
   push:
-    branches: [release-docker-build]
+    branches: [release-docs]
     paths-ignore:
       - '**.md'
       - 'images/**/*'
@@ -77,93 +77,16 @@ jobs:
             type=ref,event=tag
             type=ref,event=pr
 
-      - name: "Build and push multi-platform Docker image: genai-stack/pull-model"
+      - name: "Build and push multi-platform Docker image: genai-stack/release-api"
         uses: docker/build-push-action@v5
         with:
           context: .
-          file: ./pull_model.Dockerfile
+          file: ./release_docs_api.Dockerfile
           push: true
           platforms: linux/amd64,linux/arm64
           # tags: ${{ steps.meta.outputs.tags }}
-          tags: releaseai/genai-stack-pull-model:latest
+          tags: releaseai/genai-stack-release-api:latest
           labels: ${{ steps.meta.outputs.labels }}
           cache-from: type=gha
           cache-to: type=gha,mode=max
 
-      - name: "Build and push multi-platform Docker image: genai-stack/loader"
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: ./loader.Dockerfile
-          push: true
-          platforms: linux/amd64,linux/arm64
-          # tags: ${{ steps.meta.outputs.tags }}
-          tags: releaseai/genai-stack-loader:latest
-          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-      - name: "Build and push multi-platform Docker image: genai-stack/custom-loader"
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: ./custom_loader.Dockerfile
-          push: true
-          platforms: linux/amd64,linux/arm64
-          # tags: ${{ steps.meta.outputs.tags }}
-          tags: releaseai/genai-stack-custom-loader:latest
-          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-      - name: "Build and push multi-platform Docker image: genai-stack/bot"
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: ./bot.Dockerfile
-          push: true
-          platforms: linux/amd64,linux/arm64
-          # tags: ${{ steps.meta.outputs.tags }}
-          tags: releaseai/genai-stack-bot:latest
-          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-      - name: "Build and push multi-platform Docker image: genai-stack/pdf-bot"
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: ./pdf_bot.Dockerfile
-          push: true
-          platforms: linux/amd64,linux/arm64
-          # tags: ${{ steps.meta.outputs.tags }}
-          tags: releaseai/genai-stack-pdf-bot:latest
-          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-      - name: "Build and push multi-platform Docker image: genai-stack/api"
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: ./api.Dockerfile
-          push: true
-          platforms: linux/amd64,linux/arm64
-          # tags: ${{ steps.meta.outputs.tags }}
-          tags: releaseai/genai-stack-api:latest
-          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-      - name: "Build and push multi-platform Docker image: genai-stack/front-end"
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: ./front-end.Dockerfile
-          push: true
-          platforms: linux/amd64,linux/arm64
-          # tags: ${{ steps.meta.outputs.tags }}
-          tags: releaseai/genai-stack-front-end:latest
-          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
diff --git a/.release/application_template.yaml b/.release/application_template.yaml
index a091c4ab8..f52677d30 100644
--- a/.release/application_template.yaml
+++ b/.release/application_template.yaml
@@ -1,7 +1,5 @@
 ---
 execution_type: server
-context: release-ai
-domain: ai-playground.releaseapp.io
 repo_name: awesome-release/genai-stack
 hostnames:
 - api: api-release-docs-rag-${env_id}.${domain}
@@ -30,11 +28,7 @@ parameters:
   optional: true
 services:
 - name: api
-  image: awesome-release/genai-stack/api
-  build:
-    context: "."
-    dockerfile: release_docs_api.Dockerfile
-  has_repo: true
+  image: awesome-release/genai-stack/release-api
   volumes: []
   command:
   - uvicorn
@@ -157,38 +151,7 @@ services:
     mount_path: "/dev/shm"
   - claim: models
     mount_path: "/root/.ollama"
-- name: gitbook-ingest
-  build:
-    context: "."
-    dockerfile: release_docs_gitbook_ingest.Dockerfile
-  has_repo: true
 jobs:
-- name: gitbook-ingest
-  from_services: gitbook-ingest
-  command:
-  - python
-  - release-docs-gitbook-ingest.py
-  envs:
-  - key: CHROMA_HOST
-    value: chroma
-  - key: CHROMA_PORT
-    value: 8000
-  - key: OLLAMA_BASE_URL
-    value: http://ollama:11434
-  - key: LANGCHAIN_API_KEY
-    value: NONE
-  - key: LANGCHAIN_PROJECT
-    value: LANGCHAIN_PROJECT
-  - key: LANGCHAIN_ENDPOINT
-    value: https://api.smith.langchain.com
-  - key: LANGCHAIN_TRACING_V2
-    value: 'false'
-  memory:
-    limits: 4Gi
-    requests: 100Mi
-  cpu:
-    limits: 2000m
-    requests: 100m
 - name: pull-model
   image: releaseai/genai-stack-pull-model
   command:
@@ -219,11 +182,6 @@ workflows:
   - step: services-0
     tasks:
     - services.api
-- name: gitbook-ingest
-  parallelize:
-  - step: gitbook-ingest
-    tasks:
-    - jobs.gitbook-ingest
 - name: teardown
   parallelize:
   - step: remove-environment