#Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
#import csv city
city_data = "../HW5/city_data.csv"
city_read = pd.read_csv(city_data)
city_pd = pd.DataFrame(city_read, columns=["city", "driver_count", "type"])
city_pd.head()
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
</style>
|
city |
driver_count |
type |
| 0 |
Kelseyland |
63 |
Urban |
| 1 |
Nguyenbury |
8 |
Urban |
| 2 |
East Douglas |
12 |
Urban |
| 3 |
West Dawnfurt |
34 |
Urban |
| 4 |
Rodriguezburgh |
52 |
Urban |
#import csv ride
ride_data = "../HW5/ride_data.csv"
ride_read = pd.read_csv(ride_data)
ride_pd = pd.DataFrame(ride_read, columns=["city", "date", "fare", "ride_id"])
ride_pd.head()
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
</style>
|
city |
date |
fare |
ride_id |
| 0 |
Sarabury |
2016-01-16 13:49:27 |
38.35 |
5403689035038 |
| 1 |
South Roy |
2016-01-02 18:42:34 |
17.49 |
4036272335942 |
| 2 |
Wiseborough |
2016-01-21 17:35:29 |
44.18 |
3645042422587 |
| 3 |
Spencertown |
2016-07-31 14:53:22 |
6.87 |
2242596575892 |
| 4 |
Nguyenbury |
2016-07-09 04:42:44 |
6.28 |
1543057793673 |
#Inner merge files
merge_city = pd.merge(city_pd, ride_pd, on="city", how='inner')
merge_city.head()
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
</style>
|
city |
driver_count |
type |
date |
fare |
ride_id |
| 0 |
Kelseyland |
63 |
Urban |
2016-08-19 04:27:52 |
5.51 |
6246006544795 |
| 1 |
Kelseyland |
63 |
Urban |
2016-04-17 06:59:50 |
5.54 |
7466473222333 |
| 2 |
Kelseyland |
63 |
Urban |
2016-05-04 15:06:07 |
30.54 |
2140501382736 |
| 3 |
Kelseyland |
63 |
Urban |
2016-01-25 20:44:56 |
12.08 |
1896987891309 |
| 4 |
Kelseyland |
63 |
Urban |
2016-08-09 18:19:47 |
17.91 |
8784212854829 |
#having issues building pies and scatterplot, looking to type of data
merge_city.dtypes
city object
driver_count int64
type object
date object
fare float64
ride_id int64
dtype: object
#Changing fare to a numeric to attempt to solve pie and scatterplpt
merge_city['fare'] = pd.to_numeric(merge_city['fare'])
#caculate the avg fare per city
city_group = merge_city.groupby(["city"])
city_group.head()
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
</style>
|
city |
driver_count |
type |
date |
fare |
ride_id |
| 0 |
Kelseyland |
63 |
Urban |
2016-08-19 04:27:52 |
5.51 |
6246006544795 |
| 1 |
Kelseyland |
63 |
Urban |
2016-04-17 06:59:50 |
5.54 |
7466473222333 |
| 2 |
Kelseyland |
63 |
Urban |
2016-05-04 15:06:07 |
30.54 |
2140501382736 |
| 3 |
Kelseyland |
63 |
Urban |
2016-01-25 20:44:56 |
12.08 |
1896987891309 |
| 4 |
Kelseyland |
63 |
Urban |
2016-08-09 18:19:47 |
17.91 |
8784212854829 |
| 28 |
Nguyenbury |
8 |
Urban |
2016-07-09 04:42:44 |
6.28 |
1543057793673 |
| 29 |
Nguyenbury |
8 |
Urban |
2016-11-08 19:22:04 |
19.49 |
1702803950740 |
| 30 |
Nguyenbury |
8 |
Urban |
2016-03-19 13:08:09 |
35.07 |
9198401002936 |
| 31 |
Nguyenbury |
8 |
Urban |
2016-05-12 15:57:15 |
41.63 |
224683791660 |
| 32 |
Nguyenbury |
8 |
Urban |
2016-04-07 06:59:51 |
19.01 |
4648481871830 |
| 54 |
East Douglas |
12 |
Urban |
2016-10-01 19:07:00 |
16.36 |
8450340983211 |
| 55 |
East Douglas |
12 |
Urban |
2016-07-19 07:42:04 |
11.24 |
8566233760392 |
| 56 |
East Douglas |
12 |
Urban |
2016-09-20 02:40:41 |
23.26 |
825335145222 |
| 57 |
East Douglas |
12 |
Urban |
2016-04-02 13:49:14 |
28.17 |
3800595642657 |
| 58 |
East Douglas |
12 |
Urban |
2016-10-19 20:25:16 |
28.18 |
6204409645686 |
| 76 |
West Dawnfurt |
34 |
Urban |
2016-07-24 15:18:57 |
30.80 |
3839329929610 |
| 77 |
West Dawnfurt |
34 |
Urban |
2016-03-06 18:21:13 |
28.26 |
3899054595030 |
| 78 |
West Dawnfurt |
34 |
Urban |
2016-09-18 18:24:17 |
19.67 |
2851656538564 |
| 79 |
West Dawnfurt |
34 |
Urban |
2016-01-03 11:35:45 |
6.69 |
7455207171412 |
| 80 |
West Dawnfurt |
34 |
Urban |
2016-09-25 16:35:54 |
22.28 |
9242784905015 |
| 105 |
Rodriguezburgh |
52 |
Urban |
2016-09-05 05:20:39 |
4.54 |
9650770953139 |
| 106 |
Rodriguezburgh |
52 |
Urban |
2016-11-21 10:41:56 |
26.13 |
6513545702246 |
| 107 |
Rodriguezburgh |
52 |
Urban |
2016-05-22 21:34:07 |
8.83 |
5135321621391 |
| 108 |
Rodriguezburgh |
52 |
Urban |
2016-10-07 11:09:50 |
25.19 |
7709877217148 |
| 109 |
Rodriguezburgh |
52 |
Urban |
2016-06-11 21:01:35 |
27.66 |
6959463827218 |
| 128 |
South Josephville |
4 |
Urban |
2016-06-01 05:15:38 |
28.33 |
7956832876432 |
| 129 |
South Josephville |
4 |
Urban |
2016-10-06 03:53:57 |
28.19 |
2604125036913 |
| 130 |
South Josephville |
4 |
Urban |
2016-03-11 23:05:39 |
29.12 |
7477161326509 |
| 131 |
South Josephville |
4 |
Urban |
2016-03-24 18:38:25 |
13.73 |
1488440031973 |
| 132 |
South Josephville |
4 |
Urban |
2016-08-28 04:52:03 |
38.51 |
4250063766740 |
| ... |
... |
... |
... |
... |
... |
... |
| 2355 |
Stevensport |
6 |
Rural |
2016-02-22 02:45:07 |
19.91 |
808097865942 |
| 2356 |
North Whitney |
10 |
Rural |
2016-04-01 21:21:37 |
51.01 |
612689673941 |
| 2357 |
North Whitney |
10 |
Rural |
2016-04-26 09:35:48 |
42.09 |
9465134041656 |
| 2358 |
North Whitney |
10 |
Rural |
2016-06-24 21:09:09 |
50.03 |
9224879345166 |
| 2359 |
North Whitney |
10 |
Rural |
2016-06-10 18:27:03 |
29.25 |
4071225680519 |
| 2360 |
North Whitney |
10 |
Rural |
2016-02-21 18:20:14 |
42.01 |
3306522110065 |
| 2366 |
East Stephen |
6 |
Rural |
2016-02-16 11:58:06 |
22.43 |
8118042484039 |
| 2367 |
East Stephen |
6 |
Rural |
2016-07-27 09:55:18 |
41.64 |
3154261826545 |
| 2368 |
East Stephen |
6 |
Rural |
2016-07-11 01:21:31 |
41.86 |
1023430862078 |
| 2369 |
East Stephen |
6 |
Rural |
2016-11-19 04:47:01 |
43.84 |
7366535419230 |
| 2370 |
East Stephen |
6 |
Rural |
2016-05-23 12:55:48 |
48.49 |
9985496304508 |
| 2376 |
East Leslie |
9 |
Rural |
2016-04-21 18:44:59 |
19.26 |
5836114186294 |
| 2377 |
East Leslie |
9 |
Rural |
2016-04-13 04:30:56 |
40.47 |
7075058703398 |
| 2378 |
East Leslie |
9 |
Rural |
2016-04-26 02:34:30 |
45.80 |
9402873395510 |
| 2379 |
East Leslie |
9 |
Rural |
2016-04-05 18:53:16 |
44.78 |
6113138249150 |
| 2380 |
East Leslie |
9 |
Rural |
2016-11-13 10:21:10 |
15.71 |
7275986542384 |
| 2387 |
Hernandezshire |
10 |
Rural |
2016-02-20 08:17:32 |
58.95 |
3176534714830 |
| 2388 |
Hernandezshire |
10 |
Rural |
2016-06-26 20:11:50 |
28.78 |
6382848462030 |
| 2389 |
Hernandezshire |
10 |
Rural |
2016-01-24 00:21:35 |
30.32 |
7342649945759 |
| 2390 |
Hernandezshire |
10 |
Rural |
2016-03-05 10:40:16 |
23.35 |
7443355895137 |
| 2391 |
Hernandezshire |
10 |
Rural |
2016-04-11 04:44:50 |
10.41 |
9823290002445 |
| 2396 |
Horneland |
8 |
Rural |
2016-07-19 10:07:33 |
12.63 |
8214498891817 |
| 2397 |
Horneland |
8 |
Rural |
2016-03-22 21:22:20 |
31.53 |
1797785685674 |
| 2398 |
Horneland |
8 |
Rural |
2016-01-26 09:38:17 |
21.73 |
5665544449606 |
| 2399 |
Horneland |
8 |
Rural |
2016-03-25 02:05:42 |
20.04 |
5729327140644 |
| 2400 |
West Kevintown |
5 |
Rural |
2016-11-27 20:12:58 |
12.92 |
6460741616450 |
| 2401 |
West Kevintown |
5 |
Rural |
2016-02-19 01:42:58 |
11.15 |
8622534016726 |
| 2402 |
West Kevintown |
5 |
Rural |
2016-03-11 09:03:43 |
42.13 |
4568909568268 |
| 2403 |
West Kevintown |
5 |
Rural |
2016-06-25 08:04:12 |
24.53 |
8188407925972 |
| 2404 |
West Kevintown |
5 |
Rural |
2016-07-24 13:41:23 |
11.78 |
2001192693573 |
618 rows × 6 columns
#calculate average fare, numeric is code not working as expected
avg_fare = city_group['fare'].mean()
avg_fare.head()
city
Alvarezhaven 23.928710
Alyssaberg 20.609615
Anitamouth 37.315556
Antoniomouth 23.625000
Aprilchester 21.981579
Name: fare, dtype: float64
#total rides pero city
total_rides = city_group['city'].count()
total_rides.head()
city
Alvarezhaven 31
Alyssaberg 26
Anitamouth 9
Antoniomouth 22
Aprilchester 19
Name: city, dtype: int64
#total number of drivers per city
total_drivers = city_group['driver_count'].mean()
total_drivers.head()
city
Alvarezhaven 21
Alyssaberg 67
Anitamouth 16
Antoniomouth 21
Aprilchester 49
Name: driver_count, dtype: int64
#issues with data showing up in brackets, this could also be causing issues with the scatterplot
city_type = city_group['type'].unique()
city_type.head()
city
Alvarezhaven [Urban]
Alyssaberg [Urban]
Anitamouth [Suburban]
Antoniomouth [Urban]
Aprilchester [Urban]
Name: type, dtype: object
#DataFrame of Summary
city_summary = pd.DataFrame({"AVG Fare/City":avg_fare,
"Rides/City":total_rides,
"Drivers/City":total_drivers,
"City Type":city_type})
city_summary.head()
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
</style>
|
AVG Fare/City |
City Type |
Drivers/City |
Rides/City |
| city |
|
|
|
|
| Alvarezhaven |
23.928710 |
[Urban] |
21 |
31 |
| Alyssaberg |
20.609615 |
[Urban] |
67 |
26 |
| Anitamouth |
37.315556 |
[Suburban] |
16 |
9 |
| Antoniomouth |
23.625000 |
[Urban] |
21 |
22 |
| Aprilchester |
21.981579 |
[Urban] |
49 |
19 |
#cannot build a scatterplot, but this would be my idea as a start.
x = np.arange(total_rides)
y = np.arange(avg_fare)
z = np.city_type
plt.scatter("x", "y","z", marker="o", color='red')
plt.show()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-22-9aed9aa03671> in <module>()
----> 1 x = np.arange(total_rides)
2 y = np.arange(avg_fare)
3 z = np.city_type
4
5 plt.scatter("x", "y","z", marker="o", color='red')
~/anaconda3/envs/PythonData/lib/python3.6/site-packages/pandas/core/series.py in wrapper(self)
95 return converter(self.iloc[0])
96 raise TypeError("cannot convert the series to "
---> 97 "{0}".format(str(converter)))
98
99 return wrapper
TypeError: cannot convert the series to <class 'float'>