Skip to content

Latest commit

 

History

History
1069 lines (960 loc) · 20.1 KB

File metadata and controls

1069 lines (960 loc) · 20.1 KB
#Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
#import csv city
city_data = "../HW5/city_data.csv"
city_read = pd.read_csv(city_data)
city_pd = pd.DataFrame(city_read, columns=["city", "driver_count", "type"])
city_pd.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
city driver_count type
0 Kelseyland 63 Urban
1 Nguyenbury 8 Urban
2 East Douglas 12 Urban
3 West Dawnfurt 34 Urban
4 Rodriguezburgh 52 Urban
#import csv ride
ride_data = "../HW5/ride_data.csv"
ride_read = pd.read_csv(ride_data)
ride_pd = pd.DataFrame(ride_read, columns=["city", "date", "fare", "ride_id"])
ride_pd.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
city date fare ride_id
0 Sarabury 2016-01-16 13:49:27 38.35 5403689035038
1 South Roy 2016-01-02 18:42:34 17.49 4036272335942
2 Wiseborough 2016-01-21 17:35:29 44.18 3645042422587
3 Spencertown 2016-07-31 14:53:22 6.87 2242596575892
4 Nguyenbury 2016-07-09 04:42:44 6.28 1543057793673
#Inner merge files

merge_city = pd.merge(city_pd, ride_pd, on="city", how='inner')
merge_city.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
city driver_count type date fare ride_id
0 Kelseyland 63 Urban 2016-08-19 04:27:52 5.51 6246006544795
1 Kelseyland 63 Urban 2016-04-17 06:59:50 5.54 7466473222333
2 Kelseyland 63 Urban 2016-05-04 15:06:07 30.54 2140501382736
3 Kelseyland 63 Urban 2016-01-25 20:44:56 12.08 1896987891309
4 Kelseyland 63 Urban 2016-08-09 18:19:47 17.91 8784212854829
#having issues building pies and scatterplot, looking to type of data
merge_city.dtypes
city             object
driver_count      int64
type             object
date             object
fare            float64
ride_id           int64
dtype: object
#Changing fare to a numeric to attempt to solve pie and scatterplpt
merge_city['fare'] = pd.to_numeric(merge_city['fare'])
#caculate the avg fare per city
city_group = merge_city.groupby(["city"])
city_group.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
city driver_count type date fare ride_id
0 Kelseyland 63 Urban 2016-08-19 04:27:52 5.51 6246006544795
1 Kelseyland 63 Urban 2016-04-17 06:59:50 5.54 7466473222333
2 Kelseyland 63 Urban 2016-05-04 15:06:07 30.54 2140501382736
3 Kelseyland 63 Urban 2016-01-25 20:44:56 12.08 1896987891309
4 Kelseyland 63 Urban 2016-08-09 18:19:47 17.91 8784212854829
28 Nguyenbury 8 Urban 2016-07-09 04:42:44 6.28 1543057793673
29 Nguyenbury 8 Urban 2016-11-08 19:22:04 19.49 1702803950740
30 Nguyenbury 8 Urban 2016-03-19 13:08:09 35.07 9198401002936
31 Nguyenbury 8 Urban 2016-05-12 15:57:15 41.63 224683791660
32 Nguyenbury 8 Urban 2016-04-07 06:59:51 19.01 4648481871830
54 East Douglas 12 Urban 2016-10-01 19:07:00 16.36 8450340983211
55 East Douglas 12 Urban 2016-07-19 07:42:04 11.24 8566233760392
56 East Douglas 12 Urban 2016-09-20 02:40:41 23.26 825335145222
57 East Douglas 12 Urban 2016-04-02 13:49:14 28.17 3800595642657
58 East Douglas 12 Urban 2016-10-19 20:25:16 28.18 6204409645686
76 West Dawnfurt 34 Urban 2016-07-24 15:18:57 30.80 3839329929610
77 West Dawnfurt 34 Urban 2016-03-06 18:21:13 28.26 3899054595030
78 West Dawnfurt 34 Urban 2016-09-18 18:24:17 19.67 2851656538564
79 West Dawnfurt 34 Urban 2016-01-03 11:35:45 6.69 7455207171412
80 West Dawnfurt 34 Urban 2016-09-25 16:35:54 22.28 9242784905015
105 Rodriguezburgh 52 Urban 2016-09-05 05:20:39 4.54 9650770953139
106 Rodriguezburgh 52 Urban 2016-11-21 10:41:56 26.13 6513545702246
107 Rodriguezburgh 52 Urban 2016-05-22 21:34:07 8.83 5135321621391
108 Rodriguezburgh 52 Urban 2016-10-07 11:09:50 25.19 7709877217148
109 Rodriguezburgh 52 Urban 2016-06-11 21:01:35 27.66 6959463827218
128 South Josephville 4 Urban 2016-06-01 05:15:38 28.33 7956832876432
129 South Josephville 4 Urban 2016-10-06 03:53:57 28.19 2604125036913
130 South Josephville 4 Urban 2016-03-11 23:05:39 29.12 7477161326509
131 South Josephville 4 Urban 2016-03-24 18:38:25 13.73 1488440031973
132 South Josephville 4 Urban 2016-08-28 04:52:03 38.51 4250063766740
... ... ... ... ... ... ...
2355 Stevensport 6 Rural 2016-02-22 02:45:07 19.91 808097865942
2356 North Whitney 10 Rural 2016-04-01 21:21:37 51.01 612689673941
2357 North Whitney 10 Rural 2016-04-26 09:35:48 42.09 9465134041656
2358 North Whitney 10 Rural 2016-06-24 21:09:09 50.03 9224879345166
2359 North Whitney 10 Rural 2016-06-10 18:27:03 29.25 4071225680519
2360 North Whitney 10 Rural 2016-02-21 18:20:14 42.01 3306522110065
2366 East Stephen 6 Rural 2016-02-16 11:58:06 22.43 8118042484039
2367 East Stephen 6 Rural 2016-07-27 09:55:18 41.64 3154261826545
2368 East Stephen 6 Rural 2016-07-11 01:21:31 41.86 1023430862078
2369 East Stephen 6 Rural 2016-11-19 04:47:01 43.84 7366535419230
2370 East Stephen 6 Rural 2016-05-23 12:55:48 48.49 9985496304508
2376 East Leslie 9 Rural 2016-04-21 18:44:59 19.26 5836114186294
2377 East Leslie 9 Rural 2016-04-13 04:30:56 40.47 7075058703398
2378 East Leslie 9 Rural 2016-04-26 02:34:30 45.80 9402873395510
2379 East Leslie 9 Rural 2016-04-05 18:53:16 44.78 6113138249150
2380 East Leslie 9 Rural 2016-11-13 10:21:10 15.71 7275986542384
2387 Hernandezshire 10 Rural 2016-02-20 08:17:32 58.95 3176534714830
2388 Hernandezshire 10 Rural 2016-06-26 20:11:50 28.78 6382848462030
2389 Hernandezshire 10 Rural 2016-01-24 00:21:35 30.32 7342649945759
2390 Hernandezshire 10 Rural 2016-03-05 10:40:16 23.35 7443355895137
2391 Hernandezshire 10 Rural 2016-04-11 04:44:50 10.41 9823290002445
2396 Horneland 8 Rural 2016-07-19 10:07:33 12.63 8214498891817
2397 Horneland 8 Rural 2016-03-22 21:22:20 31.53 1797785685674
2398 Horneland 8 Rural 2016-01-26 09:38:17 21.73 5665544449606
2399 Horneland 8 Rural 2016-03-25 02:05:42 20.04 5729327140644
2400 West Kevintown 5 Rural 2016-11-27 20:12:58 12.92 6460741616450
2401 West Kevintown 5 Rural 2016-02-19 01:42:58 11.15 8622534016726
2402 West Kevintown 5 Rural 2016-03-11 09:03:43 42.13 4568909568268
2403 West Kevintown 5 Rural 2016-06-25 08:04:12 24.53 8188407925972
2404 West Kevintown 5 Rural 2016-07-24 13:41:23 11.78 2001192693573

618 rows × 6 columns

#calculate average fare, numeric is code not working as expected
avg_fare = city_group['fare'].mean()
avg_fare.head()
city
Alvarezhaven    23.928710
Alyssaberg      20.609615
Anitamouth      37.315556
Antoniomouth    23.625000
Aprilchester    21.981579
Name: fare, dtype: float64
#total rides pero city
total_rides = city_group['city'].count()
total_rides.head()
city
Alvarezhaven    31
Alyssaberg      26
Anitamouth       9
Antoniomouth    22
Aprilchester    19
Name: city, dtype: int64
#total number of drivers per city
total_drivers = city_group['driver_count'].mean()
total_drivers.head()
city
Alvarezhaven    21
Alyssaberg      67
Anitamouth      16
Antoniomouth    21
Aprilchester    49
Name: driver_count, dtype: int64
#issues with data showing up in brackets, this could also be causing issues with the scatterplot
city_type = city_group['type'].unique()
city_type.head()
city
Alvarezhaven       [Urban]
Alyssaberg         [Urban]
Anitamouth      [Suburban]
Antoniomouth       [Urban]
Aprilchester       [Urban]
Name: type, dtype: object
#DataFrame of Summary
city_summary = pd.DataFrame({"AVG Fare/City":avg_fare,
                            "Rides/City":total_rides,
                            "Drivers/City":total_drivers,
                            "City Type":city_type})
city_summary.head()
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
AVG Fare/City City Type Drivers/City Rides/City
city
Alvarezhaven 23.928710 [Urban] 21 31
Alyssaberg 20.609615 [Urban] 67 26
Anitamouth 37.315556 [Suburban] 16 9
Antoniomouth 23.625000 [Urban] 21 22
Aprilchester 21.981579 [Urban] 49 19
#cannot build a scatterplot, but this would be my idea as a start.
x = np.arange(total_rides)
y = np.arange(avg_fare)
z = np.city_type

plt.scatter("x", "y","z", marker="o", color='red')
plt.show()
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-22-9aed9aa03671> in <module>()
----> 1 x = np.arange(total_rides)
      2 y = np.arange(avg_fare)
      3 z = np.city_type
      4 
      5 plt.scatter("x", "y","z", marker="o", color='red')


~/anaconda3/envs/PythonData/lib/python3.6/site-packages/pandas/core/series.py in wrapper(self)
     95             return converter(self.iloc[0])
     96         raise TypeError("cannot convert the series to "
---> 97                         "{0}".format(str(converter)))
     98 
     99     return wrapper


TypeError: cannot convert the series to <class 'float'>