Some examples from Chapter 5
With the map functions there is no need for the MARGIN option to specify rows (1) or columns (2).
library(pacman)
p_load(tidyverse, purr, stringr, Lahman)
package ‘purr’ is not available (for R version 3.4.4)there is no package called ‘purr’Failed to install/load:
purr
Teams %>% select(15:40) %>%
apply(MARGIN = 2, FUN = mean, na.rm = TRUE)
R AB H X2B X3B HR BB SO
683.6117444 5154.5474957 1347.9136442 229.1409326 46.5067358 103.9675302 475.2294402 751.0319555
SB CS HBP SF RA ER ERA CG
111.0223908 47.6912264 45.3644214 45.0472674 683.6107081 574.1388601 3.8231744 48.9806563
SHO SV IPouts HA HRA BBA SOA E
9.6856649 24.2162349 4032.8117444 1347.6863558 103.9675302 475.4248705 750.5229706 184.1388601
DP FP
133.5395509 0.9658021
Teams %>% select(15:40) %>%
map(., mean, na.rm=TRUE) %>%
unlist()
R AB H X2B X3B HR BB SO
683.6117444 5154.5474957 1347.9136442 229.1409326 46.5067358 103.9675302 475.2294402 751.0319555
SB CS HBP SF RA ER ERA CG
111.0223908 47.6912264 45.3644214 45.0472674 683.6107081 574.1388601 3.8231744 48.9806563
SHO SV IPouts HA HRA BBA SOA E
9.6856649 24.2162349 4032.8117444 1347.6863558 103.9675302 475.4248705 750.5229706 184.1388601
DP FP
133.5395509 0.9658021
Teams %>% select(15:40) %>%
map_df(., mean, na.rm=TRUE)
Note that lapply() returns a list. The map()
angles <- Teams %>%
filter(franchID == "ANA") %>%
group_by(teamID, name) %>%
summarize(began = first(yearID), ended = last(yearID)) %>%
arrange(began)
`summarise()` regrouping output by 'teamID' (override with `.groups` argument)
angles
angles_names <- angles %>%
ungroup(teamID) %>%
select(name)
class(angles_names)
[1] "tbl_df" "tbl" "data.frame"
nchar(angles_names[1,1])
name
18
nchar(angles_names[2,1])
name
17
nchar(angles_names[3,1])
name
14
nchar(angles_names[4,1])
name
29
x <- lapply(angles_names, FUN = nchar)
class(x)
[1] "list"
x
$name
[1] 18 17 14 29
y <- sapply(angles_names, FUN = nchar)
class(y)
[1] "matrix"
y
name
[1,] 18
[2,] 17
[3,] 14
[4,] 29
z <- angles_names %>% map(., str_length)
class(z)
[1] "list"
z
$name
[1] 18 17 14 29
z <- angles_names %>% map_df(., str_length)
class(z)
[1] "tbl_df" "tbl" "data.frame"
z
top5 <- function(x, teamnames) {
x %>% filter(name == teamnames) %>%
select(teamID, yearID, W, L, name) %>%
arrange(desc(W)) %>%
head(n = 5)
}
angles_list <- lapply(angles_names, FUN = top5, x = Teams)
longer object length is not a multiple of shorter object length
class(angles_list)
[1] "list"
angles_list
$name
angles_list <- angles_names %>% map(., top5, x = Teams)
longer object length is not a multiple of shorter object length
class(angles_list)
[1] "list"
angles_list
$name
NA
library(mdsr)
Loading required package: lattice
Loading required package: ggformula
Loading required package: ggstance
Attaching package: ‘ggstance’
The following objects are masked from ‘package:ggplot2’:
geom_errorbarh, GeomErrorbarh
New to ggformula? Try the tutorials:
learnr::run_tutorial("introduction", package = "ggformula")
learnr::run_tutorial("refining", package = "ggformula")
Loading required package: mosaicData
Loading required package: Matrix
Attaching package: ‘Matrix’
The following objects are masked from ‘package:tidyr’:
expand, pack, unpack
The 'mosaic' package masks several functions from core packages in order to add
additional features. The original behavior of these functions should not be affected by this.
Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
In accordance with CRAN policy, the 'mdsr' package
no longer attaches
the 'tidyverse' package automatically.
You may need to 'library(tidyverse)' in order to
use certain functions.
OrdwayBirds
OrdwayBirds %>% select(Timestamp, Year, Month, Day) %>%
glimpse()
Observations: 15,829
Variables: 4
$ Timestamp [3m[38;5;246m<chr>[39m[23m "4/14/2010 13:20:56", "", "5/13/2010 16:00:30", "5/13/2010 16:02:15", "5/13/2010 16:…
$ Year [3m[38;5;246m<chr>[39m[23m "1972", "", "1972", "1972", "1972", "1972", "1972", "1972", "1972", "1972", "1972", …
$ Month [3m[38;5;246m<chr>[39m[23m "7", "", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", …
$ Day [3m[38;5;246m<chr>[39m[23m "16", "", "16", "16", "16", "16", "16", "16", "16", "16", "17", "18", "18", "18", "1…
covert to numbers
OrdwayBirds %>% select(Timestamp, Year, Month, Day) %>%
glimpse()
Observations: 15,829
Variables: 4
$ Timestamp [3m[38;5;246m<chr>[39m[23m "4/14/2010 13:20:56", "", "5/13/2010 16:00:30", "5/13/2010 16:02:15", "5/13/2010 16:…
$ Year [3m[38;5;246m<chr>[39m[23m "1972", "", "1972", "1972", "1972", "1972", "1972", "1972", "1972", "1972", "1972", …
$ Month [3m[38;5;246m<chr>[39m[23m "7", "", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", …
$ Day [3m[38;5;246m<chr>[39m[23m "16", "", "16", "16", "16", "16", "16", "16", "16", "16", "17", "18", "18", "18", "1…
convert Timestamp
library(lubridate)
Attaching package: ‘lubridate’
The following object is masked from ‘package:base’:
date
WhenAndWho <- OrdwayBirds %>%
mutate(When = mdy_hms(Timestamp)) %>%
select(Timestamp, Year, Month, Day, When, DataEntryPerson) %>%
glimpse()
Observations: 15,829
Variables: 6
$ Timestamp [3m[38;5;246m<chr>[39m[23m "4/14/2010 13:20:56", "", "5/13/2010 16:00:30", "5/13/2010 16:02:15", "5/13/20…
$ Year [3m[38;5;246m<chr>[39m[23m "1972", "", "1972", "1972", "1972", "1972", "1972", "1972", "1972", "1972", "1…
$ Month [3m[38;5;246m<chr>[39m[23m "7", "", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7",…
$ Day [3m[38;5;246m<chr>[39m[23m "16", "", "16", "16", "16", "16", "16", "16", "16", "16", "17", "18", "18", "1…
$ When [3m[38;5;246m<dttm>[39m[23m 2010-04-14 13:20:56, NA, 2010-05-13 16:00:30, 2010-05-13 16:02:15, 2010-05-13…
$ DataEntryPerson [3m[38;5;246m<chr>[39m[23m "Jerald Dosch", "Caitlin Baker", "Caitlin Baker", "Caitlin Baker", "Caitlin Ba…
WhenAndWho %>% ggplot(aes(x = When, y = DataEntryPerson)) +
geom_point(alpha = 0.1, position = "jitter")
WhenAndWho %>% group_by(DataEntryPerson) %>%
summarize(start = first(When), finish = last(When)) %>%
mutate( duration = interval(start, finish) / ddays(1) )
`summarise()` ungrouping output (override with `.groups` argument)
now()
[1] "2020-09-29 12:36:35 PDT"
as.Date(now())
[1] "2020-09-29"
today()
[1] "2020-09-29"
as.Date(today())
[1] "2020-09-29"
as.Date(now()) - as.Date(today())
Time difference of 0 days
now()
[1] "2020-09-29 12:36:35 PDT"
as_date(now())
[1] "2020-09-29"
today()
[1] "2020-09-29"
as_date(today())
[1] "2020-09-29"
as_date(now()) - as_date(today())
Time difference of 0 days
How many days have you woken up in the morning? Change the date.
What is wrong with this?
as.Date(today()) - as.Date("01/01/1970")
Time difference of 737678 days
Note the use of the date format.
as.Date(today()) - as.Date("01/01/1970", "%m/%d/%Y")
Time difference of 18534 days
In the lubradate package there is the mdy() function.
as_date(today())
[1] "2020-09-29"
as_date(mdy("01/01/1970"))
[1] "1970-01-01"
as_date(today()) - as_date(mdy("01/01/1970"))
Time difference of 18534 days