-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis.py
More file actions
138 lines (89 loc) · 3.41 KB
/
analysis.py
File metadata and controls
138 lines (89 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import pandas as pd
import matplotlib.pyplot as plt
# --------------------------- Level-2 (Analysis using python + pandas + SQL) ------------------------------
import sqlite3
#Connect to the database
conn = sqlite3.connect("expense_data.db")
# Read sql file
with open("sql_queries.sql", "r") as file:
sql_content = file.read()
# Split queries by semicolon
queries = sql_content.strip().split(';')
# 1. Category-wise spending
df_category = pd.read_sql_query(queries[0], conn)
plt.bar(df_category["category"], df_category["total_spent"])
plt.title('Total Spending by Category')
plt.show()
# 2. Monthly Spending Trend
df_monthly = pd.read_sql_query(queries[1], conn)
df_monthly["month"] = pd.to_datetime(df_monthly["month"])
df_monthly["month_name"] = df_monthly["month"].dt.strftime("%b")
plt.plot(df_monthly["month_name"], df_monthly["total_spent"], marker = 'o')
plt.title("Monthly Spending Trend")
plt.show()
# 3. City-wise spending
df_city = pd.read_sql_query(queries[2], conn)
plt.pie(df_city["total_spent"], labels = df_city["city"], autopct = "%1.1f%%" )
plt.title("City-Wise Spending Analysis")
plt.show()
# Payment Method Analysis
df_payment = pd.read_sql_query(queries[3], conn)
plt.bar(df_payment["payment_method"], df_payment["total_spent"])
plt.title("Payment Method Analysis")
plt.show()
# close the connection
conn.close()
"""
# ---------------------------- Level-1 (Analysis using only python and pandas) ----------------------------------
#Load the expense data
df = pd.read_csv('expenses.csv')
#Removing empty rows
df.dropna(inplace = True)
#convert the data column
df['expense_date'] = pd.to_datetime(df['expense_date'])
#Removing duplicates
df.drop_duplicates(inplace = True)
print('Data loaded successfully.....')
print(df.head())
# ----------------------------------- 1. Total Spending Over Time -----------------------------------
# Calculating total spending by category
category_spending = df.groupby('category')['amount'].sum()
print('\n 1. Total Spending by category:')
print(category_spending)
#Plotting total spending by category
category_spending.plot(kind = 'bar', title = 'Total Spending by Category')
plt.xlabel('Category')
plt.ylabel('Total Amount Spent')
plt.tight_layout()
plt.show()
# ----------------------------------- 2. Monthly Spending Trend -----------------------------------
df['month'] = df['expense_date'].dt.to_period('M')
monthly_spending = df.groupby('month')['amount'].sum()
print('\n 2. Monthly Spending Trend:')
print(monthly_spending)
#plotting
monthly_spending.plot(kind = 'line', marker = 'o', title = 'Monthly Spending Trend')
plt.xlabel('Month')
plt.ylabel('Total Amount Spent')
plt.tight_layout()
plt.show()
# ----------------------------------- 3. City-wise Spending Analysis -----------------------------------
city_spending = df.groupby('city')['amount'].sum()
print('\n 3. City-wise Spending Analysis:')
print(city_spending)
#plotting
city_spending.plot(kind = 'pie', autopct = '%1.1f%%', title = 'City-wise Spending Analysis')
plt.ylabel('')
plt.tight_layout()
plt.show()
# ----------------------------------- 4. Payment Method Analysis -----------------------------------
payment_analysis = df.groupby('payment_method')['amount'].sum()
print('\n 4. Payment Method Analysis:')
print(payment_analysis)
#plotting
payment_analysis.plot(kind = 'bar', title = 'Payment Method Analysis')
plt.xlabel('Payment Method')
plt.ylabel('Total Amount Spent')
plt.tight_layout()
plt.show()
"""