Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 202 additions & 0 deletions FIXED.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import requests
import json
import os
from geopy.distance import geodesic
from flask import Flask, request, jsonify
from flask_cors import CORS
import logging
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
import openai
import wikipedia

import API_KEYS

# Initialize Flask app and CORS
app = Flask(__name__)
CORS(app) # 모든 출처에서의 요청 허용

# Set up logging
logging.basicConfig(level=logging.DEBUG) # Change to logging.INFO or logging.ERROR in production
logger = logging.getLogger(__name__)

# OpenAI and Google API keys
OPENAI_API_KEY = API_KEYS.OPENAI_API_KEY # 환경 변수에서 API 키를 가져옵니다.
GOOGLE_PLACES_API_KEY = API_KEYS.GOOGLE_MAPS_API_KEY # 환경 변수에서 Google Places API 키를 가져옵니다.

# PDF Loading and Text Splitting
pdf_path = 'seoul.pdf'
if not os.path.exists(pdf_path):
logger.error(f"{pdf_path} file not found.")
raise FileNotFoundError(f"{pdf_path} file not found.")

pdf_loader = PyPDFLoader(pdf_path)
pdf_docs = pdf_loader.load()

# Split text and prepare FAISS vector store
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = []
for doc in pdf_docs:
if hasattr(doc, 'page_content') and isinstance(doc.page_content, str):
texts.extend(text_splitter.split_text(doc.page_content))
else:
logger.warning("Document does not have valid page content.")

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vectorstore = FAISS.from_texts(texts, embeddings)

# Set up Wikipedia API
wikipedia.set_lang('ko')


# RAG-based description generation function
def generate_description_with_rag(name, place_type):
try:
relevant_docs = vectorstore.similarity_search(f"{name} {place_type}")
qa_chain = load_qa_chain(
llm=ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.7),
chain_type="stuff"
)

prompt = PromptTemplate(
input_variables=["name", "place_type"],
template=f"당신은 유능한 관광지 전문가입니다. '{name}'({place_type})에 대해 두 문장 이내로, 그곳에 가고싶게 설명해주세요. 정보가 없으면, 고객을 절대 속이지 말고 'False'만 반환하세요."
)

formatted_prompt = prompt.format(name=name, place_type=place_type)
response = qa_chain.run(input_documents=relevant_docs, question=formatted_prompt)

logger.debug(f"Generated description for {name}: {response}")
return response
except openai.APIStatusError as e:
logger.error(f"OpenAI API error: {str(e)}")
return f"OpenAI API error: {str(e)}"


# Google Places API function
def search_nearby_places(lat, lon, user_preferences):
results = []
category_mapping = {
"culturalTourism": ["museum", "art_gallery"],
"historicalTourism": ["church", "historical_place"],
"shopping": ["shopping_mall"],
"leisureSports": ["gym", "stadium"],
"restaurant": ["restaurant", "cafe"],
"natureTourism": ["park"]
}
excluded_types = {"lodging", "hotel", "motel"}

for preference, types in user_preferences.items():
if types > 0.5:
for place_type in category_mapping.get(preference, []):
url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat},{lon}&radius=1500&type={place_type}&language=ko&key={GOOGLE_PLACES_API_KEY}"

# Logging the request URL for debugging
logger.debug(f"Request URL for {place_type}: {url}")

# Send the request
response = requests.get(url)

# Log response status and content
logger.debug(f"Response status: {response.status_code}")
logger.debug(f"Response content: {response.text}")

if response.status_code != 200:
logger.error(f"Google Places API returned an error for {place_type}: {response.status_code}")
continue

data = response.json()
if data.get("status") != "OK":
logger.warning(f"Google Places API returned status {data.get('status')} for {place_type}")
continue

for place in data.get('results', []):
if any(ptype in excluded_types for ptype in place.get("types", [])):
continue
if place.get('user_ratings_total', 0) >= 15:
results.append({
'name': place['name'],
'type': place_type,
'distance': geodesic((lat, lon), (
place['geometry']['location']['lat'],
place['geometry']['location']['lng']
)).kilometers,
'source': 'Google',
'place_id': place['place_id']
})
return results


# Recommendation function
def recommend_spots(user_location, user_preferences, max_distance=2, top_n=5):
google_places = search_nearby_places(user_location[0], user_location[1], user_preferences)
logger.info(f"Google Places Results: {google_places}")

recommendations = sorted(google_places, key=lambda x: x.get('distance'))

for place in recommendations[:top_n]:
description = generate_description_with_rag(place['name'], place['type'])
if description == "False":
try:
candidate_description = wikipedia.page(place['name'])
description = candidate_description.summary + " (from Wikipedia)"
except (wikipedia.exceptions.PageError, wikipedia.exceptions.DisambiguationError):
description = "정보가 업데이트 중입니다!"
place['description'] = description
logger.debug(f"Final description for {place['name']}: {description}")

return recommendations[:top_n]


@app.route('/recommend', methods=['POST'])
def recommend():
try:
logger.debug("Received a new request for recommendation.")

# JSON 데이터 수신 확인
data = request.get_json()
if data is None:
logger.error("No JSON data received.")
return jsonify({"error": "No JSON data received."}), 400
logger.debug(f"Request Data: {data}")

# 사용자 위치 데이터 확인
user_latitude = data.get('latitude')
user_longitude = data.get('longitude')
if not user_latitude or not user_longitude:
logger.error("User location (latitude/longitude) is missing.")
return jsonify({"error": "User location is missing."}), 400

# 사용자 위치 및 기본 사용자 선호도 설정
user_location = (user_latitude, user_longitude)
user_location = (37.496486063, 127.028361548)
user_preferences = data.get('user_preferences', {
"culturalTourism": 0.2,
"historicalTourism": 0.5,
"shopping": 0.3,
"leisureSports": 0.5,
"restaurant": 0.1,
"natureTourism": 0.7
})
logger.debug(f"User Location: {user_location}")
logger.debug(f"User Preferences: {user_preferences}")

# 추천 장소 생성 함수 호출
recommended_spots = recommend_spots(user_location, user_preferences)
logger.info(f"Recommended Spots: {recommended_spots}")

# 추천 결과 반환
return jsonify({"results": recommended_spots})

except Exception as e:
logger.error(f"Error occurred during recommendation processing: {str(e)}")
return jsonify({"error": str(e)}), 500



if __name__ == '__main__':
app.run(debug=False, port=5000)
73 changes: 73 additions & 0 deletions crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import requests
from sympy.integrals.meijerint_doc import category

"""
from bs4 import BeautifulSoup

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}

# 이 이후로 필요한 부분 추출
data = requests.get('https://korean.visitseoul.net/attractions?srchType=&srchOptnCode=&srchCtgry=68&sortOrder=&srchWord=&radioOptionLike=TURSM_AREA_8',headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')

print(soup.prettify())

title = soup.select_one('#li.item:nth-child(1) > a:nth-child(1) > div:nth-child(2) > div:nth-child(1) > span:nth-child(1)')
print(title)
"""

from bs4 import BeautifulSoup

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
}

category = ["카페&디저트", "주점", "한식", "양식", "중식", "일식", "아시아식", "서양식", "채식", "할랄"]

# BeautifulSoup 객체 생성
for i in range(1, 60):
try:
html = requests.get(f'https://korean.visitseoul.net/area?curPage={i}&srchType=&srchOptnCode=&srchCtgry=98&sortOrder=&srchWord=&radioOptionLike=TURSM_AREA_8',headers=headers)
except requests.RequestException as e:
print(f'Error: {e}')
break

soup = BeautifulSoup(html.text, 'html.parser')

# 모든 `li` 요소 찾기
items = soup.find_all('li', class_='item')

# 정보 추출
for item in items:
title = item.find('span', class_='title')
description = item.find('span', class_='small-text text-dot-d')

# 텍스트가 있으면 출력
if title and description:
print("Title:", title.get_text(strip=True))
print("Location: 기타")
print("Description:", description.get_text(strip=True))
print('-' * 30)
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
}

html = requests.get(f'https://korean.visitseoul.net/attractions?curPage=2&srchType=&srchOptnCode=&srchCtgry=69&sortOrder=&srchWord=&radioOptionLike=TURSM_AREA_8',headers=headers)
soup = BeautifulSoup(html.text, 'html.parser')

# 모든 `li` 요소 찾기
items = soup.find_all('li', class_='item')

# 정보 추출
for item in items:
title = item.find('span', class_='title')
description = item.find('span', class_='small-text text-dot-d')

# 텍스트가 있으면 출력
if title and description:
print("Title:", title.get_text(strip=True))
print("Description:", description.get_text(strip=True))
print('-' * 30)
"""
Binary file added document_cache.pkl
Binary file not shown.
Loading