-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
274 lines (224 loc) · 9.31 KB
/
app.py
File metadata and controls
274 lines (224 loc) · 9.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
"""
Streamlit Web UI for RAG System.
Provides file upload, document processing, and chat interface.
"""
import streamlit as st
import os
import tempfile
from typing import List, Tuple
from src.config import Config
from src.rag import DocumentProcessor, RAGChain
from src.utils.helpers import setup_logger, format_context
logger = setup_logger(__name__)
# Page configuration
st.set_page_config(
page_title="RAG Document Assistant",
page_icon="📚",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: bold;
color: #1f77b4;
margin-bottom: 1rem;
}
.response-box {
background-color: #f0f2f6;
padding: 1.5rem;
border-radius: 0.5rem;
margin: 1rem 0;
}
.source-box {
background-color: #e8f4f8;
padding: 1rem;
border-left: 4px solid #1f77b4;
margin: 0.5rem 0;
}
</style>
""", unsafe_allow_html=True)
def initialize_session_state():
"""Initialize Streamlit session state variables."""
if "processor" not in st.session_state:
st.session_state.processor = None
if "rag_chain" not in st.session_state:
st.session_state.rag_chain = None
if "documents_processed" not in st.session_state:
st.session_state.documents_processed = 0
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
if "uploaded_files" not in st.session_state:
st.session_state.uploaded_files = []
def validate_configuration():
"""Validate that all required configuration is present."""
try:
Config.validate()
return True
except ValueError as e:
st.error(f"❌ Configuration Error: {str(e)}")
st.info("Please create a `.env` file with the required API keys")
return False
def process_uploaded_files(uploaded_files) -> int:
"""
Process uploaded files and prepare them for RAG.
Args:
uploaded_files: List of uploaded files from Streamlit
Returns:
int: Number of chunks processed
"""
if not uploaded_files:
return 0
try:
with st.spinner("Processing documents..."):
# Initialize processor if not already done
if st.session_state.processor is None:
st.session_state.processor = DocumentProcessor()
processor = st.session_state.processor
total_chunks = 0
# Create temp directory for uploaded files
with tempfile.TemporaryDirectory() as temp_dir:
for uploaded_file in uploaded_files:
# Save uploaded file to temp directory
file_path = os.path.join(temp_dir, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
# Process the file
chunks = processor.process_file(file_path, uploaded_file.name)
total_chunks += chunks
st.session_state.uploaded_files.append(uploaded_file.name)
# Initialize RAG chain after processing
if st.session_state.rag_chain is None:
st.session_state.rag_chain = RAGChain()
st.session_state.documents_processed = total_chunks
return total_chunks
except Exception as e:
message = str(e)
if "429" in message or "rate" in message.lower() or "quota" in message.lower():
st.error(
"Rate limit hit while generating embeddings. Wait a moment or use a key with higher quota."
)
else:
st.error(f"Error processing documents: {message}")
logger.error(f"Error processing documents: {e}")
return 0
def display_chat_interface():
"""Display the chat interface."""
st.subheader("💬 Chat Interface")
# Display chat history
if st.session_state.chat_history:
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Chat input
if st.session_state.documents_processed > 0:
user_input = st.chat_input(
"Ask a question about your documents...",
disabled=st.session_state.rag_chain is None
)
if user_input:
# Add user message to history
st.session_state.chat_history.append({
"role": "user",
"content": user_input
})
with st.chat_message("user"):
st.markdown(user_input)
# Get response from RAG chain
with st.spinner("Processing your question..."):
try:
result = st.session_state.rag_chain.query(user_input)
answer = result.get("answer", "No answer generated")
source_docs = result.get("source_documents", [])
# Format response with sources
response_content = answer
if source_docs:
response_content += "\n\n**📖 Source Documents:**"
for i, doc in enumerate(source_docs, 1):
source_text = doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content
source_file = doc.metadata.get("source", "Unknown")
response_content += f"\n\n**Document {i}** ({source_file}):\n{source_text}"
# Add assistant response to history
st.session_state.chat_history.append({
"role": "assistant",
"content": response_content
})
# Display response
with st.chat_message("assistant"):
st.markdown(answer)
if source_docs:
st.divider()
st.markdown("**📖 Source Documents:**")
for i, doc in enumerate(source_docs, 1):
with st.expander(f"Document {i}: {doc.metadata.get('source', 'Unknown')}"):
st.markdown(doc.page_content[:500])
except Exception as e:
st.error(f"Error processing query: {str(e)}")
logger.error(f"Error processing query: {e}")
else:
st.info("📤 Please upload documents first to start chatting.")
def main():
"""Main Streamlit application."""
# Header
st.markdown('<h1 class="main-header">📚 RAG Document Assistant</h1>', unsafe_allow_html=True)
st.write("Upload your documents and ask questions about them using AI-powered retrieval.")
# Initialize session state
initialize_session_state()
# Validate configuration
if not validate_configuration():
return
# Create two columns
col1, col2 = st.columns([1, 2])
# Left column - Document Management
with col1:
st.subheader("📤 Document Management")
# File uploader
uploaded_files = st.file_uploader(
"Upload documents (.txt, .pdf, .docx)",
type=["txt", "pdf", "docx"],
accept_multiple_files=True,
key="file_uploader"
)
if uploaded_files:
st.success(f"✅ {len(uploaded_files)} file(s) selected")
if st.button("📤 Process Documents", key="process_btn"):
chunks = process_uploaded_files(uploaded_files)
if chunks > 0:
st.success(f"✅ Documents processed! Created {chunks} chunks.")
st.balloons()
# Display statistics
if st.session_state.documents_processed > 0:
st.divider()
st.subheader("📊 Statistics")
col_a, col_b = st.columns(2)
with col_a:
st.metric("Chunks Processed", st.session_state.documents_processed)
with col_b:
st.metric("Files Uploaded", len(st.session_state.uploaded_files))
if st.session_state.uploaded_files:
st.subheader("📋 Uploaded Files")
for file in st.session_state.uploaded_files:
st.write(f"• {file}")
# Clear data button
if st.button("🗑️ Clear All Data"):
st.session_state.processor = None
st.session_state.rag_chain = None
st.session_state.documents_processed = 0
st.session_state.chat_history = []
st.session_state.uploaded_files = []
st.success("All data cleared!")
st.rerun()
# Right column - Chat Interface
with col2:
display_chat_interface()
# Footer
st.divider()
st.markdown("""
<div style="text-align: center; color: gray; margin-top: 2rem;">
<small>RAG System powered by LangChain, Pinecone, and Google Gemini</small>
</div>
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()