Merge pull request #34 from pattern-tech/feat/llms-support

yasinfakhar · web-flow · commit 35eeb7af9472 · 2025-02-20T16:43:44.000+03:30
Feat/llms support
diff --git a/api/.env.sample b/api/.env.sample
@@ -1,5 +1,3 @@
-OPENAI_API_KEY=
-
 POSTGRES_HOST=postgres
 POSTGRES_PORT=5432
 POSTGRES_DB=pattern-core
@@ -11,7 +9,7 @@ JWT_SECRET_KEY=mysecretkey
 QDRANT_URL=http://qdrant:6333
 QDRANT_COLLECTION=pattern-core
 
-LANGCHAIN_API_KEY=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
+LANGCHAIN_API_KEY=
 
 #functions
 GOOGLE_SEARCH_URL=https://google.serper.dev/search
@@ -24,4 +22,24 @@ EXA_URL=https://api.exa.ai
 PERPLEXITY_URL=https://api.perplexity.ai
 TAVILY_URL=https://api.tavily.com
 
-SECRET_KEY=
+SECRET_KEY=
+
+#llm
+# +-------------+------------------------------------------------+
+# | Provider    | Model                                          |
+# +-------------+------------------------------------------------+
+# | openai      | gpt4o-mini                                     |
+# | google      | gemini-2.0-flash                               |
+# | together    | deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free |
+# | ollama      | llama3.3                                       |
+# | groq        | llama-3.3-70b-versatile                        |
+# | firework    | accounts/fireworks/models/firefunction-v2      |
+# | huggingface | meta-llama/Llama-3.3-70B-Instruct              |
+# +-------------+------------------------------------------------+
+
+LLM_SERVICE=openai
+LLM_MODEL=gpt-4o-mini
+LLM_API_KEY=
+
+OLLAMA_HOST=
+OLLAMA_MODELS=
diff --git a/api/.gitignore b/api/.gitignore
@@ -213,4 +213,6 @@ cython_debug/
 **/.DS_Store
 
 data_processing/*
-!data_processing/*.py
+!data_processing/*.py
+
+/.vscode
diff --git a/api/requirements.txt b/api/requirements.txt
@@ -3,19 +3,18 @@ fastapi==0.115.5
 uvicorn==0.32.0
 python-dotenv==1.0.1
 bcrypt==4.2.0
-pydantic==2.9.2
-pydantic[email]==2.9.2
+pydantic==2.9.0
+pydantic[email]==2.9.0
 sqlalchemy==2.0.35
 python-jose==3.3.0
 passlib==1.7.4
 langgraph==0.2.53
 langchain-community==0.3.7
-langchain-openai==0.2.9
 langchain-qdrant==0.2.0
 langchain-postgres==0.0.12
 psycopg==3.2.3
 psycopg-pool==3.2.4
-psycopg2-binary==2.9.10 
+psycopg2-binary==2.9.10
 newsapi-python==0.2.7
 cryptography==44.0.0
 beautifulsoup4==4.12.3
@@ -24,3 +23,11 @@ web3==7.6.1
 moralis==0.1.49
 scalar_fastapi==1.0.3
 python-multipart==0.0.19
+langchain-openai==0.3.0
+langchain-ollama==0.2.3
+langchain-together==0.3.0
+langchain-fireworks==0.2.7
+langchain-google-genai==2.0.7
+langchain-google-vertexai==2.0.9
+langchain-groq==0.2.4
+langchain-huggingface==0.1.2
diff --git a/api/src/agent/services/agent_service.py b/api/src/agent/services/agent_service.py
@@ -1,16 +1,22 @@
+import os
 import json
 import asyncio
 
 from typing import List
 from langchain import hub
 from pydantic import BaseModel, Field
+from langchain_ollama import ChatOllama
 from langchain_openai import ChatOpenAI
-from langgraph.prebuilt import create_react_agent
+from langchain.agents import create_react_agent
 from langchain_core.prompts import ChatPromptTemplate
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain_core.callbacks import StdOutCallbackHandler
 from langchain_core.runnables.history import RunnableWithMessageHistory
-from langchain.agents import AgentExecutor, create_openai_functions_agent
+from langchain.agents import (AgentExecutor,
+                              create_openai_functions_agent,
+                              create_tool_calling_agent)
+
+from src.agent.tools.shared_tools import init_llm
 
 
 class PlanStep(BaseModel):
@@ -127,20 +133,28 @@ def __init__(self, tools, memory=None, streaming: bool = True):
         # Set up the streaming callback if streaming is enabled.
         if streaming:
             self.streaming_handler = StreamingCallbackHandler()
-            self.llm = ChatOpenAI(
-                model="gpt-4o-mini",
-                streaming=True,
-                callbacks=[self.streaming_handler]
-            )
+
+        self.llm = init_llm(service=os.environ["LLM_SERVICE"],
+                            model_name=os.environ["LLM_MODEL"],
+                            api_key=os.environ["LLM_API_KEY"],
+                            stream=streaming,
+                            callbacks=[self.streaming_handler])
+
+        if isinstance(self.llm, ChatOpenAI):
+            self.prompt = hub.pull("pattern-agent/pattern-agent")
+
+            self.agent = create_openai_functions_agent(
+                self.llm, self.tools, self.prompt)
+        elif isinstance(self.llm, ChatOllama):
+            self.prompt = hub.pull("hwchase17/react")
+
+            self.agent = create_react_agent(
+                llm=self.llm, tools=self.tools, prompt=self.prompt)
         else:
-            self.llm = ChatOpenAI(model="gpt-4o-mini")
+            self.prompt = hub.pull("pattern-agent/pattern-agent")
 
-        self.prompt = hub.pull("pattern-agent/pattern-agent")
-        self.agent = create_openai_functions_agent(
-            self.llm,
-            self.tools,
-            self.prompt
-        )
+            self.agent = create_tool_calling_agent(
+                llm=self.llm, tools=self.tools, prompt=self.prompt)
 
         if streaming:
             self.agent_executor = AgentExecutor(
@@ -168,7 +182,19 @@ def __init__(self, tools, memory=None, streaming: bool = True):
 
     async def stream(self, message: str):
         """
-        Asynchronously stream the agent’s response token-by-token.
+        Args:
+            message (str): The input message to be processed by the agent.
+
+        Yields:
+            str: Tokens of the agent's response as they become available.
+
+        Raises:
+            asyncio.TimeoutError: If waiting for a token from the queue times out.
+
+        Notes:
+            - If memory is enabled, the agent's response is invoked synchronously using `run_in_executor`.
+            - If memory is not enabled, the agent's response is invoked asynchronously using `arun`.
+            - The method clears any leftover tokens in the queue before starting to stream the response.
         """
         # Clear any leftover tokens.
         while not self.streaming_handler.queue.empty():
@@ -200,6 +226,18 @@ async def stream(self, message: str):
         result = await task
 
     def ask(self, message: str):
+        """
+        Sends a message to the agent and returns the response.
+
+        Args:
+            message (str): The message to send to the agent.
+
+        Returns:
+            The response from the agent.
+
+        If the agent has memory, it uses the agent with chat history to invoke the response.
+        Otherwise, it uses the agent executor to invoke the response.
+        """
         if self.memory:
             return self.agent_with_chat_history.invoke(
                 input={"input": message},
diff --git a/api/src/agent/tools/agentic/eth_blockchain_tool.py b/api/src/agent/tools/agentic/eth_blockchain_tool.py
@@ -1,11 +1,17 @@
-from langchain.tools import tool
+import os
 
 from langchain import hub
+from langchain.tools import tool
+from langchain_ollama import ChatOllama
 from langchain_openai import ChatOpenAI
-from langchain.agents import AgentExecutor, create_openai_functions_agent
+from langchain.agents import (
+    AgentExecutor,
+    create_openai_functions_agent,
+    create_tool_calling_agent)
 
-from src.agent.tools.shared_tools import handle_exceptions, timeout
+from src.agent.tools.shared_tools import init_llm
 from src.agent.tools.tools_index import get_all_tools
+from src.agent.tools.shared_tools import handle_exceptions, timeout
 
 
 @tool
@@ -35,14 +41,29 @@ def ethereum_blockchain_tool(query: str):
         ValueError: If the query cannot be parsed or contract address is invalid
     """
 
-    llm = ChatOpenAI(model="gpt-4o-mini")
-    prompt = hub.pull("pattern-agent/eth-agent")
+    llm = init_llm(service=os.environ["LLM_SERVICE"],
+                   model_name=os.environ["LLM_MODEL"],
+                   api_key=os.environ["LLM_API_KEY"],
+                   stream=False)
+
     tools = get_all_tools(tools_path="eth_blockchain_function")
 
-    agent = create_openai_functions_agent(
-        llm,
-        tools,
-        prompt)
+    if isinstance(llm, ChatOpenAI):
+        prompt = hub.pull("pattern-agent/eth-agent")
+
+        agent = create_openai_functions_agent(
+            llm, tools, prompt)
+    elif isinstance(llm, ChatOllama):
+        prompt = hub.pull("hwchase17/react")
+
+        agent = create_react_agent(
+            llm=llm, tools=tools, prompt=prompt)
+    else:
+        prompt = hub.pull("pattern-agent/eth-agent")
+
+        agent = create_tool_calling_agent(
+            llm=llm, tools=tools, prompt=prompt)
+
 
     agent_executor = AgentExecutor(
         agent=agent,
diff --git a/api/src/agent/tools/shared_tools.py b/api/src/agent/tools/shared_tools.py
@@ -2,7 +2,14 @@
 
 from typing import TypeVar
 from functools import wraps
+from langchain_groq import ChatGroq
+from langchain_openai import ChatOpenAI
+from langchain_ollama import ChatOllama
 from multiprocessing import Process, Queue
+from langchain_together import ChatTogether
+from langchain_fireworks import ChatFireworks
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
 
 T = TypeVar('T')
 
@@ -116,3 +123,79 @@ def wrapper(*args, **kwargs):
         except Exception as e:
             return f"Error: {str(e)}, Class: {e.__class__.__name__}"
     return wrapper
+
+
+def init_llm(service: str, model_name: str, api_key: str, stream: bool = False, callbacks=None):
+    """
+    Returns an instance of a language model based on the specified service.
+
+    Args:
+        service (str): The name of the service to use (e.g., "openai", "groq", "fireworks",
+            "together", "huggingface", "ollama").
+        model_name (str): The name of the model to use.
+        api_key (str): The API key for the specified service.
+        stream (bool, optional): Whether to enable streaming for the model. Defaults to False.
+        callbacks (StreamingCallbackHandler, optional): callback functions for the model. Defaults to None.
+
+    Returns:
+        An instance of the specified language model.
+
+    Raises:
+        NotImplementedError: If the specified service is not supported.
+    """
+    if service == "openai":
+        return ChatOpenAI(
+            model=model_name,
+            streaming=False,
+            api_key=api_key,
+            callbacks=callbacks
+        )
+    elif service == "google":
+        return ChatGoogleGenerativeAI(
+            model=model_name,
+            api_key=api_key,
+            streaming=stream,
+            callbacks=callbacks
+        )
+    elif service == "groq":
+        return ChatGroq(
+            model=model_name,
+            api_key=api_key,
+            streaming=stream,
+            callbacks=callbacks
+        )
+    elif service == "fireworks":
+        return ChatFireworks(
+            model=model_name,
+            api_key=api_key,
+            streaming=stream,
+            callbacks=callbacks
+        )
+    elif service == "together":
+        return ChatTogether(
+            model=model_name,
+            together_api_key=api_key,
+            streaming=stream,
+            callbacks=callbacks
+        )
+    elif service == "huggingface":
+        pipeline_kwargs = {
+            "max_new_tokens": 512,
+            "do_sample": False,
+            "repetition_penalty": 1.03,
+        }
+        model = HuggingFacePipeline.from_model_id(
+            model_id=model_name,
+            task="text-generation",
+            pipeline_kwargs=pipeline_kwargs,
+            device_map="cpu"
+        )
+        return ChatHuggingFace(llm=model, callbacks=callbacks)
+    elif service == "ollama":
+        return ChatOllama(
+            model=model_name,
+            streaming=stream,
+            callbacks=callbacks
+        )
+    else:
+        raise NotImplementedError(f"Service {service} is not supported.")