Using Databases using R.
See the RSQLite website.
library(tidyverse)
library(dplyr)
library(dbplyr)
library(RSQLite)
library(DBI)
library(beepr)
library(nycflights13)
con <- dbConnect(RSQLite::SQLite(), ":memory:")
dbListTables(con)
character(0)
dbWriteTable(con, "mtcars", mtcars)
dbListTables(con)
[1] "mtcars"
dbListFields(con, "mtcars")
[1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear" "carb"
dbReadTable(con, "mtcars")
res <- dbSendQuery(con, "SELECT * FROM mtcars WHERE cyl = 4")
dbFetch(res)
dbClearResult(res)
dbDisconnect(con)
con2 <- dbConnect(RSQLite::SQLite(), ":memory:")
dbListTables(con2)
character(0)
dbWriteTable(con2, "iris", iris)
dbListTables(con2)
[1] "iris"
dbListFields(con2, "iris")
[1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
dbReadTable(con2, "iris")
res <- dbSendQuery(con2, "SELECT * FROM iris")
dbFetch(res)
dbClearResult(res)
dbDisconnect(con2)
con3 <- dbConnect(RSQLite::SQLite(), ":memory:")
dbListTables(con3)
character(0)
copy_to(con3, nycflights13::flights, "flights",
temporary = FALSE,
indexes = list(
c("year", "month", "day"),
"carrier",
"tailnum",
"dest"
)
)
dbListTables(con3)
[1] "flights" "sqlite_stat1" "sqlite_stat4"
dbListFields(con3, "flights")
[1] "year" "month" "day" "dep_time"
[5] "sched_dep_time" "dep_delay" "arr_time" "sched_arr_time"
[9] "arr_delay" "carrier" "flight" "tailnum"
[13] "origin" "dest" "air_time" "distance"
[17] "hour" "minute" "time_hour"
dbReadTable(con3, "flights")
res <- dbSendQuery(con3, "SELECT * FROM flights WHERE day=1")
dbFetch(res)
dbClearResult(res)
dbDisconnect(con3)
con4 <- dbConnect(RSQLite::SQLite(), ":memory:")
dbListTables(con4)
character(0)
copy_to(con4, nycflights13::flights, "flights",
temporary = FALSE,
indexes = list(
c("year", "month", "day"),
"carrier",
"tailnum",
"dest"
)
)
flights_db <- tbl(con4, "flights")
flights_db
flights_db %>% select(year:day, dep_delay, arr_delay)
flights_db %>% filter(dep_delay > 240)
flights_db %>%
group_by(dest) %>%
summarise(delay = mean(dep_time))
tailnum_delay_db <- flights_db %>%
group_by(tailnum) %>%
summarise(
delay = mean(arr_delay),
n = n()
) %>%
arrange(desc(delay)) %>%
filter(n > 100)
tailnum_delay_db
tailnum_delay_db %>% show_query()
<SQL>
SELECT *
FROM (SELECT *
FROM (SELECT `tailnum`, AVG(`arr_delay`) AS `delay`, COUNT() AS `n`
FROM `flights`
GROUP BY `tailnum`)
ORDER BY `delay` DESC)
WHERE (`n` > 100.0)
dbDisconnect(con4)
nycflights13 in a sqlite db saved to your harddrive in the data directory Write the nycflights to a db.
my_db_file <- "data/nycflights13_new.sqlite"
my_db <- src_sqlite(my_db_file, create = TRUE)
con <- dbConnect(RSQLite::SQLite(), "data/nycflights13_new.sqlite")
dbListTables(con)
character(0)
dbWriteTable(con, "flights", nycflights13::flights)
dbWriteTable(con, "planes", nycflights13::planes)
dbWriteTable(con, "airlines", nycflights13::airlines)
dbWriteTable(con, "airports", nycflights13::airports)
dbWriteTable(con, "weather", nycflights13::weather)
dbListTables(con)
[1] "airlines" "airports" "flights" "planes" "weather"
dbListFields(con, "flights")
[1] "year" "month" "day" "dep_time"
[5] "sched_dep_time" "dep_delay" "arr_time" "sched_arr_time"
[9] "arr_delay" "carrier" "flight" "tailnum"
[13] "origin" "dest" "air_time" "distance"
[17] "hour" "minute" "time_hour"
dbReadTable(con, "flights")
res <- dbSendQuery(con, "SELECT * FROM flights WHERE origin = 'JFK'")
dbFetch(res)
dbClearResult(res)
dbDisconnect(con)
my_db_file <- "data/mtcars.sqlite"
my_db <- src_sqlite(my_db_file, create = TRUE)
con <- dbConnect(RSQLite::SQLite(), "data/mtcars.sqlite")
dbListTables(con)
character(0)
dbWriteTable(con, "mtcars", mtcars)
dbListTables(con)
[1] "mtcars"
dbListFields(con, "mtcars")
[1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear" "carb"
dbReadTable(con, "mtcars")
res <- dbSendQuery(con, "SELECT * FROM mtcars WHERE cyl = 4")
dbFetch(res)
dbClearResult(res)
dbDisconnect(con)
my_db_file <- "data/iris.sqlite"
my_db <- src_sqlite(my_db_file, create = TRUE)
con <- dbConnect(RSQLite::SQLite(), "data/iris.sqlite")
dbListTables(con)
character(0)
dbWriteTable(con, "iris", iris)
dbListTables(con)
[1] "iris"
dbListFields(con, "iris")
[1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
dbReadTable(con, "iris")
res <- dbSendQuery(con, " SELECT * FROM iris WHERE Species = 'setosa' ")
dbFetch(res)
dbClearResult(res)
dbDisconnect(con)
library(gapminder)
my_db_file <- "data/gapminder.sqlite"
my_db <- src_sqlite(my_db_file, create = TRUE)
con <- dbConnect(RSQLite::SQLite(), "data/gapminder.sqlite")
dbListTables(con)
character(0)
dbWriteTable(con, "gapminder", gapminder)
dbListTables(con)
[1] "gapminder"
dbListFields(con, "gapminder")
[1] "country" "continent" "year" "lifeExp" "pop" "gdpPercap"
dbReadTable(con, "gapminder")
res <- dbSendQuery(con, " SELECT * FROM gapminder WHERE Country = 'United States' ")
dbFetch(res)
dbClearResult(res)
dbDisconnect(con)
my_db_file <- "data/all_4.sqlite"
my_db <- src_sqlite(my_db_file, create = TRUE)
con <- dbConnect(RSQLite::SQLite(), "data/all_4.sqlite")
dbListTables(con)
character(0)
dbWriteTable(con, "mtcars", mtcars)
dbWriteTable(con, "iris", iris)
dbWriteTable(con, "gapmider", gapminder)
dbWriteTable(con, "flights", flights)
dbListTables(con)
[1] "flights" "gapmider" "iris" "mtcars"
dbListFields(con, "iris")
[1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
dbListFields(con, "flights")
[1] "year" "month" "day" "dep_time"
[5] "sched_dep_time" "dep_delay" "arr_time" "sched_arr_time"
[9] "arr_delay" "carrier" "flight" "tailnum"
[13] "origin" "dest" "air_time" "distance"
[17] "hour" "minute" "time_hour"
dbDisconnect(con)