# The pacman package has the p_load() function that can check if a package is installed and if not it will install it. # The DataExplorer package has the create_report() function that uses AutoEDA to product a report about all variables. # https://cran.r-project.org/web/packages/DataExplorer/vignettes/dataexplorer-intro.html#slicing-dicing library(pacman) p_load(nycflights13, tidyverse, visdat, naniar, DataExplorer) flights airlines planes airports weather data_list <- list(airlines, airports, flights, planes, weather) plot_str(data_list) airlines <- airlines %>% rename(name_airlines = name) head(airlines) merge_airlines <- left_join(flights, airlines, by = "carrier") head(merge_airlines) planes <- planes %>% rename(year_manufacture = year) head(planes) merge_planes <- left_join(merge_airlines, planes, by = "tailnum") head(merge_planes) airports <- airports %>% rename(name_airport = name) head(airports) merge_airports_origin <- left_join(merge_planes, airports, by = c("origin" = "faa")) head(merge_airports_origin) final_nycflights13 <- merge_airports_origin final_nycflights13 %>% introduce() final_nycflights13 %>% plot_intro() final_nycflights13 %>% plot_missing() final_nycflights13 <- final_nycflights13 %>% select(-speed) final_nycflights13 %>% introduce() final_nycflights13 %>% plot_intro() final_nycflights13 %>% plot_missing() final_nycflights13 %>% plot_bar() final_nycflights13 <- final_nycflights13 %>% mutate( manufacturer = fct_collapse(manufacturer, AIRBUS = c("AIRBUS INDUSTRIE", "AIRBUS"), "MCDONNELL DOUGLAS" = c("MCDONNELL DOUGLAS AIRCRAFT CO", "MCDONNELL DOUGLAS CORPORATION","MCDONNELL DOUGLAS") )) final_nycflights13 %>% select(manufacturer) %>% plot_bar() final_nycflights13 %>% introduce() final_nycflights13 <- final_nycflights13 %>% select(-dst, -tzone, -tz) final_nycflights13 %>% introduce() final_nycflights13 %>% plot_bar() # conditional on arr_delay final_nycflights13 %>% plot_bar(with = "arr_delay") final_nycflights13 %>% plot_histogram() plot_correlation(na.omit(final_nycflights13), maxcat = 5L) plot_correlation(na.omit(final_nycflights13), type = "c") plot_correlation(na.omit(final_nycflights13), type = "d") config <- configure_report( add_plot_str = FALSE, add_plot_qq = FALSE, add_plot_prcomp = FALSE, add_plot_boxplot = FALSE, add_plot_scatterplot = FALSE, global_ggtheme = quote(theme_minimal(base_size = 14)) ) create_report(final_nycflights13, y = "arr_delay", config = config)