---
title: "Ford Go Bike"
output:
html_notebook: default
pdf_document: default
---
In this notebook I download and unzip the [Ford Go Bike](https://www.fordgobike.com/) [data](https://www.fordgobike.com/system-data).
```{r}
library(tidyverse)
library(tictoc)
library(ggmap)
library(skimr)
library(lubridate)
library(forcats)
```
Create a directory data in your directory, as a subdirectory, within your working directory. Of use a Project and delete the previous code chunk. Download the files into the data directory. First one is not zipped, the remaining are zipped.
```{r}
URL <- "https://s3.amazonaws.com/fordgobike-data/2017-fordgobike-tripdata.csv"
download.file(URL, destfile = "./data/2017-fordgobike-tripdata.csv", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201801-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201801-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201802-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201802-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201803-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201803-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201804-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201804-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201805-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201805-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201806-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201806-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201807-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201807-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201808-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201808-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201809-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201809-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201810-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201810-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201811-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201811-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201812-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201812-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201901-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201901-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201902-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201902-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201903-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201903-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/fordgobike-data/201904-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201904-fordgobike-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/baywheels-data/201905-baywheels-tripdata.csv.zip"
download.file(URL, destfile = "./data/201905-baywheels-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/baywheels-data/201906-baywheels-tripdata.csv.zip"
download.file(URL, destfile = "./data/201906-baywheels-tripdata.csv.zip", method="curl")
URL <- "https://s3.amazonaws.com/baywheels-data/201907-baywheels-tripdata.csv.zip"
download.file(URL, destfile = "./data/201907-baywheels-tripdata.csv.zip", method="curl")
```
Unzip downloaded files.
```{r}
unzip("./data/201801-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201802-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201803-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201804-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201805-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201806-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201807-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201808-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201809-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201810-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201811-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201812-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201901-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201902-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201903-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201904-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201905-baywheels-tripdata.csv.zip",exdir="./data")
unzip("./data/201906-baywheels-tripdata.csv.zip",exdir="./data")
unzip("./data/201907-baywheels-tripdata.csv.zip",exdir="./data")
```
Clean up data directory.
```{r}
fn <- "./data/201801-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201802-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201803-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201804-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201805-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201806-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201807-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201808-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201809-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201810-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201811-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201812-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201901-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201902-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201903-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201904-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201905-baywheels-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201906-baywheels-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
fn <- "./data/201907-baywheels-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
```
Rename the BayWheels files to fordgobike
```{r}
file.rename("./data/201905-baywheels-tripdata.csv", "./data/201905-fordgobike-tripdata.csv")
file.rename("./data/201906-baywheels-tripdata.csv", "./data/201906-fordgobike-tripdata.csv")
file.rename("./data/201907-baywheels-tripdata.csv", "./data/201907-fordgobike-tripdata.csv")
```
Read the.csv files
```{r message=FALSE}
fordgobike2017 <- read_csv(file="./data/2017-fordgobike-tripdata.csv")
fordgobike201801 <- read_csv(file="./data/201801-fordgobike-tripdata.csv")
fordgobike201802 <- read_csv(file="./data/201802-fordgobike-tripdata.csv")
fordgobike201803 <- read_csv(file="./data/201803-fordgobike-tripdata.csv")
fordgobike201804 <- read_csv(file="./data/201804-fordgobike-tripdata.csv")
fordgobike201805 <- read_csv(file="./data/201805-fordgobike-tripdata.csv")
fordgobike201806 <- read_csv(file="./data/201806-fordgobike-tripdata.csv")
fordgobike201807 <- read_csv(file="./data/201807-fordgobike-tripdata.csv")
fordgobike201808 <- read_csv(file="./data/201808-fordgobike-tripdata.csv")
fordgobike201809 <- read_csv(file="./data/201809-fordgobike-tripdata.csv")
fordgobike201810 <- read_csv(file="./data/201810-fordgobike-tripdata.csv")
fordgobike201811 <- read_csv(file="./data/201811-fordgobike-tripdata.csv")
fordgobike201812 <- read_csv(file="./data/201812-fordgobike-tripdata.csv")
fordgobike201901 <- read_csv(file="./data/201901-fordgobike-tripdata.csv")
fordgobike201902 <- read_csv(file="./data/201902-fordgobike-tripdata.csv")
fordgobike201903 <- read_csv(file="./data/201903-fordgobike-tripdata.csv")
fordgobike201904 <- read_csv(file="./data/201904-fordgobike-tripdata.csv")
fordgobike201905 <- read_csv(file="./data/201905-fordgobike-tripdata.csv")
fordgobike201906 <- read_csv(file="./data/201906-fordgobike-tripdata.csv")
fordgobike201907 <- read_csv(file="./data/201907-fordgobike-tripdata.csv")
```
Check the head() and tail() of the data.frames that are loaded.
```{r}
head(fordgobike2017)
head(fordgobike201801)
head(fordgobike201802)
head(fordgobike201803)
head(fordgobike201804)
head(fordgobike201805)
head(fordgobike201806)
head(fordgobike201807)
head(fordgobike201808)
head(fordgobike201809)
head(fordgobike201810)
head(fordgobike201811)
head(fordgobike201812)
head(fordgobike201901)
head(fordgobike201902)
head(fordgobike201903)
head(fordgobike201904)
head(fordgobike201905)
head(fordgobike201906)
head(fordgobike201907)
```
```{r}
tail(fordgobike2017)
tail(fordgobike201801)
tail(fordgobike201802)
tail(fordgobike201803)
tail(fordgobike201804)
tail(fordgobike201805)
tail(fordgobike201806)
tail(fordgobike201807)
tail(fordgobike201808)
tail(fordgobike201809)
tail(fordgobike201810)
tail(fordgobike201811)
tail(fordgobike201812)
tail(fordgobike201901)
tail(fordgobike201902)
tail(fordgobike201903)
tail(fordgobike201904)
tail(fordgobike201905)
tail(fordgobike201906)
tail(fordgobike201907)
```
Year 2017
```{r}
dim(fordgobike2017)
fordgobike2017 %>% count()
```
Year 2018
```{r}
fordgobike201801 %>% count()
fordgobike201802 %>% count()
fordgobike201803 %>% count()
fordgobike201804 %>% count()
fordgobike201805 %>% count()
fordgobike201806 %>% count()
fordgobike201807 %>% count()
fordgobike201808 %>% count()
fordgobike201809 %>% count()
fordgobike201810 %>% count()
fordgobike201811 %>% count()
fordgobike201812 %>% count()
```
Year 2019
```{r}
fordgobike201901 %>% count()
fordgobike201902 %>% count()
fordgobike201903 %>% count()
fordgobike201904 %>% count()
fordgobike201905 %>% count()
fordgobike201906 %>% count()
fordgobike201907 %>% count()
```
The end_station_id have been updated.
```{r}
glimpse(fordgobike201804)
glimpse(fordgobike201805)
glimpse(fordgobike201806)
glimpse(fordgobike201807)
fordgobike201806 %>% select(start_station_id)
fordgobike201806 <- fordgobike201806 %>%
mutate(start_station_id = as.integer(start_station_id),
end_station_id= as.integer(end_station_id) )
fordgobike201806 %>% select(start_station_id)
fordgobike201807 %>% select(start_station_id)
fordgobike201807 <- fordgobike201807 %>%
mutate(start_station_id = as.integer(start_station_id),
end_station_id= as.integer(end_station_id) )
fordgobike201807 %>% select(start_station_id)
fordgobike201807 %>% select(start_time) %>%
mutate(year=year(start_time), month=month(start_time), day=day(start_time) )
fordgobike201808 %>% select(start_station_id)
fordgobike201808 <- fordgobike201808 %>%
mutate(start_station_id = as.integer(start_station_id),
end_station_id= as.integer(end_station_id) )
fordgobike201808 %>% select(start_station_id)
fordgobike201809 %>% select(start_station_id)
fordgobike201809 <- fordgobike201809 %>%
mutate(start_station_id = as.integer(start_station_id),
end_station_id= as.integer(end_station_id) )
fordgobike201809 %>% select(start_station_id)
fordgobike201810 %>% select(start_station_id)
fordgobike201810 <- fordgobike201810 %>%
mutate(start_station_id = as.integer(start_station_id),
end_station_id= as.integer(end_station_id) )
fordgobike201810 %>% select(start_station_id)
fordgobike201811 %>% select(start_station_id)
fordgobike201811 <- fordgobike201811 %>%
mutate(start_station_id = as.integer(start_station_id),
end_station_id= as.integer(end_station_id) )
fordgobike201811 %>% select(start_station_id)
fordgobike201812 %>% select(start_station_id)
fordgobike201812 <- fordgobike201812 %>%
mutate(start_station_id = as.integer(start_station_id),
end_station_id= as.integer(end_station_id) )
fordgobike201812 %>% select(start_station_id)
fordgobike2018 <- bind_rows(fordgobike201801, fordgobike201802, fordgobike201803, fordgobike201804,
fordgobike201805, fordgobike201806, fordgobike201807, fordgobike201808,
fordgobike201809, fordgobike201810, fordgobike201811, fordgobike201812)
glimpse(fordgobike2018)
fordgobike2018 %>% mutate(year=year(start_time), month=month(start_time), day=day(start_time) ) %>%
select(month) %>%
filter(month == '7')
```
```{r}
glimpse(fordgobike201902)
glimpse(fordgobike201903)
glimpse(fordgobike201906)
fordgobike201902 %>% select(start_station_id)
fordgobike201902 <- fordgobike201902 %>%
mutate(start_station_id = as.integer(start_station_id),
end_station_id= as.integer(end_station_id) )
fordgobike201902 %>% select(start_station_id)
fordgobike201903 %>% select(start_station_id)
fordgobike201903 <- fordgobike201903 %>%
mutate(start_station_id = as.integer(start_station_id),
end_station_id= as.integer(end_station_id) )
fordgobike201903 %>% select(start_station_id)
fordgobike2019 <- bind_rows(fordgobike201901, fordgobike201902, fordgobike201903, fordgobike201904,
fordgobike201905, fordgobike201906, fordgobike201907)
glimpse(fordgobike2019)
```
```{r}
fordgobike2017 %>% select(start_station_id,start_station_name, start_station_latitude,start_station_longitude) %>%
arrange(start_station_id) %>%
distinct() %>%
head()
fordgobike2018 %>% select(start_station_id,start_station_name, start_station_latitude,start_station_longitude) %>%
arrange(start_station_id) %>%
distinct() %>%
head()
fordgobike2019 %>% select(start_station_id,start_station_name, start_station_latitude,start_station_longitude) %>%
arrange(start_station_id) %>%
distinct() %>%
head()
```
```{r}
dim(fordgobike2017)
fordgobike2017 %>% count()
dim(fordgobike2018)
fordgobike2018 %>% count()
dim(fordgobike2019)
fordgobike2019 %>% count()
fordgobike <- bind_rows(fordgobike2017, fordgobike2018, fordgobike2019)
dim(fordgobike)
fordgobike %>% count()
fordgobike <- fordgobike %>% mutate(age = 2019 - member_birth_year)
fordgobike %>% count()
dim(fordgobike)
fordgobike %>% select(start_time)
fordgobike <- fordgobike %>% mutate(year=year(start_time), month=month(start_time), day=day(start_time) )
fordgobike %>% count()
dim(fordgobike)
fordgobike <- fordgobike %>% mutate(week_day = wday(start_time, label = TRUE, abbr = TRUE) )
fordgobike %>% count()
dim(fordgobike)
```
```{r}
today()
now()
```
Age
```{r warning=FALSE}
fordgobike %>% group_by( age ) %>% count()
fordgobike %>% summary()
skim(fordgobike)
fordgobike %>% ggplot(aes(x=age)) + geom_histogram()
fordgobike %>% filter(age <= 80) %>% ggplot(aes(x=age)) + geom_histogram()
fordgobike %>% filter(age <= 100) %>% ggplot(aes(x=age)) + geom_histogram()
fordgobike %>% filter(age > 100) %>% ggplot(aes(x=age)) + geom_histogram()
```
```{r}
fordgobike %>% group_by( member_gender, age ) %>% count()
fordgobike %>% ggplot(aes(x=age, class=member_gender)) + geom_histogram()
fordgobike %>% ggplot(aes(x=age, class=member_gender)) + geom_histogram(aes(y=..density..))
fordgobike %>% filter(age <= 80) %>% ggplot(aes(x=age)) + geom_histogram()
```
```{r}
fordgobike %>% filter(age <= 80) %>% ggplot(aes(x=age, color=member_gender)) +
geom_histogram(position="identity") +
facet_grid(member_gender ~ .)
fordgobike %>% filter(age <= 80) %>% ggplot(aes(x=age, color=member_gender)) +
geom_histogram(aes(y=..density..),position="identity") +
facet_grid(member_gender ~ .)
fordgobike %>% select(member_gender)
fordgobike <- fordgobike %>% mutate( member_gender = as.factor(member_gender) )
fct_count(fordgobike$member_gender)
fordgobike$member_gender <- fordgobike$member_gender %>% na_if( "?")
fordgobike$member_gender <- fct_collapse(fordgobike$member_gender,
Male = c("Male", "M"),
Female = c("Female", "F"),
Other = c("Other", "O"),
"NA" = "?"
)
fct_count(fordgobike$member_gender)
fordgobike %>% filter(age <= 80) %>% ggplot(aes(x=age, color=member_gender)) +
geom_histogram(position="identity") +
facet_grid(member_gender ~ .)
fordgobike %>% filter(age <= 80) %>% ggplot(aes(x=age, color=member_gender)) +
geom_histogram(aes(y=..density..),position="identity") +
facet_grid(member_gender ~ .)
```
Year and day of week.
```{r}
fordgobike %>% select(year, month, day)
fordgobike <- fordgobike %>%
mutate(year = as.integer(year),
month = as.integer(month) )
fordgobike %>% select(year, month, day)
fordgobike %>% select(year, month, day) %>%
filter(year == '2018', month == '7')
fordgobike %>% ggplot(aes(x=year)) + geom_bar()
fordgobike %>% ggplot(aes(x=month)) + geom_bar() + facet_grid(year ~ .)
fordgobike %>% ggplot(aes(x=day)) + geom_bar() + facet_grid(year ~ .)
```
```{r}
fordgobike_restricted <- fordgobike2017 %>% filter(start_station_latitude < 38 & start_station_longitude < 120 )
fordgobike_subset_2017 <- fordgobike_restricted %>% select(start_station_longitude, start_station_latitude, member_gender )
fordgobike_subset_2017 %>% ggplot(aes(x=start_station_longitude, y=start_station_latitude)) +
geom_point()
fordgobike_restricted <- fordgobike2018 %>% filter(start_station_latitude < 38 & start_station_longitude < 120 )
fordgobike_subset_2018 <- fordgobike_restricted %>% select(start_station_longitude, start_station_latitude, member_gender)
fordgobike_subset_2018 %>% ggplot(aes(x=start_station_longitude, y=start_station_latitude)) +
geom_point()
fordgobike_restricted <- fordgobike2019 %>% filter(start_station_latitude > 37 & start_station_latitude < 38 & start_station_longitude < 120 )
fordgobike_subset_2019 <- fordgobike_restricted %>% select(start_station_longitude, start_station_latitude, member_gender)
fordgobike_subset_2019 %>% ggplot(aes(x=start_station_longitude, y=start_station_latitude)) +
geom_point()
```
```{r}
library(biganalytics)
# run in parallel, the doMC package runs on Windows
library(doParallel)
registerDoParallel(cores = 8)
head(fordgobike2018)
fordgobike_subset2 <- as.matrix(fordgobike_subset_2018[-3]) # do not include member_gender
set.seed <- 123454321
tic()
cl <- bigkmeans(fordgobike_subset2, 3, nstart=8)
toc()
head(cl$cluster)
cl$centers
fordgobike_subset_2018 %>% ggplot(aes(x=start_station_longitude, y=start_station_latitude, color=cl$cluster)) +
geom_point()
fordgobike_subset_2018 <- fordgobike_subset_2018 %>% mutate(clust = cl$cluster)
```
```{r}
# City of Oakland c(-122.2711, 37.8044) )
# https://stackoverflow.com/questions/20621250/simple-approach-to-assigning-clusters-for-new-data-after-k-means-clustering
cl$centers
closest.cluster <- function(x) {
cluster.dist <- apply(cl$centers, 1, function(y) sqrt(sum((x-y)^2)))
return(which.min(cluster.dist)[1])
}
oak <- closest.cluster(c(-122.2711, 37.8044))
oak
oakland <- fordgobike_subset_2018 %>% filter(clust == oak)
oakland %>% ggplot(aes(x=start_station_longitude, y=start_station_latitude)) +
geom_point()
```
```{r}
tic()
cl.km <- kmeans(fordgobike_subset_2018[-3], 3)
toc()
cl.km$centers
# City of Oakland c(-122.2711, 37.8044) )
fordgobike_subset_2018 %>% ggplot(aes(x=start_station_longitude, y=start_station_latitude, color=cl.km$cluster)) +
geom_point()
```
Gender of users
```{r}
fordgobike %>% ggplot(aes(x=member_gender, y=duration_sec)) + geom_bar(stat="Identity") +
ggtitle("Bay Area")
```
Duration of rides in the Bay Area
```{r}
fordgobike %>% ggplot(aes(x=duration_sec, y=..density..)) +
scale_x_continuous(limits = c(0, 10000)) +
geom_histogram() +
geom_density(aes(y=..density..))
fordgobike %>% ggplot(aes(log(x=duration_sec), y=..density..)) +
geom_histogram() +
geom_density(aes(y=..density..))
fordgobike %>% ggplot(aes(x=duration_sec, y=..density..)) +
scale_x_continuous(limits = c(0, 10000)) +
geom_histogram() +
geom_density(aes(y=..density..)) +
facet_grid(member_gender ~ .)
fordgobike %>% ggplot(aes(log(x=duration_sec), y=..density..)) +
geom_histogram() +
geom_density(aes(y=..density..)) +
facet_grid(member_gender ~ .)
```
Duration by City
```{r}
fordgobike2018 %>% ggplot(aes(x=duration_sec, y=..density..)) +
scale_x_continuous(limits = c(0, 10000)) +
geom_histogram() +
geom_density(aes(y=..density..))
fordgobike2018 %>% ggplot(aes(log(x=duration_sec), y=..density..)) +
geom_histogram() +
geom_density(aes(y=..density..))
fordgobike2018 %>% ggplot(aes(x=duration_sec, y=..density..)) +
scale_x_continuous(limits = c(0, 10000)) +
geom_histogram() +
geom_density(aes(y=..density..))
fordgobike2018 %>% ggplot(aes(log(x=duration_sec), y=..density..)) +
geom_histogram() +
geom_density(aes(y=..density..))
```