# The pacman package has the p_load() function that can check if a package is installed and if not it will install it. # The DataExplorer package has the create_report() function that uses AutoEDA to product a report about all variables. # https://cran.r-project.org/web/packages/DataExplorer/vignettes/dataexplorer-intro.html#slicing-dicing library(pacman) p_load(nycflights13, tidyverse, visdat, naniar, DataExplorer) airlines <- airlines %>% rename(name_airlines = name) planes <- planes %>% rename(year_manufacture = year) airports <- airports %>% rename(name_airport = name) final_nycflights13 <- flights %>% left_join(airlines, by = "carrier") %>% left_join(planes, by = "tailnum") %>% left_join(airports, by = c("origin" = "faa")) %>% select(-speed) %>% mutate( manufacturer = fct_collapse(manufacturer, AIRBUS = c("AIRBUS INDUSTRIE", "AIRBUS"), "MCDONNELL DOUGLAS" = c("MCDONNELL DOUGLAS AIRCRAFT CO", "MCDONNELL DOUGLAS CORPORATION","MCDONNELL DOUGLAS") )) %>% select(-dst, -tzone, -tz) temporal_features <- c("month", "day", "hour", "minute") final_nycflights13 <- update_columns(final_nycflights13, temporal_features, as.factor) # From DataExplorer package str(final_nycflights13[, c("month", "day", "hour", "minute")]) config <- configure_report( add_plot_str = TRUE, add_plot_qq = FALSE, add_plot_prcomp = FALSE, add_plot_boxplot = TRUE, add_plot_scatterplot = TRUE, global_ggtheme = quote(theme_minimal(base_size = 14)) ) create_report(final_nycflights13, y = "arr_delay", config = config)