This is some of the code from Chapter 16. I was able to get the PageRank code to work.
library(pacman)
p_load(tidyverse)
library(mdsr)
Loading required package: lattice
Loading required package: ggformula
Loading required package: ggstance
Attaching package: ‘ggstance’
The following object is masked from ‘package:coefplot’:
position_dodgev
The following objects are masked from ‘package:ggplot2’:
geom_errorbarh, GeomErrorbarh
New to ggformula? Try the tutorials:
learnr::run_tutorial("introduction", package = "ggformula")
learnr::run_tutorial("refining", package = "ggformula")
Loading required package: mosaicData
The 'mosaic' package masks several functions from core packages in order to add
additional features. The original behavior of these functions should not be affected by this.
Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
In accordance with CRAN policy, the 'mdsr' package
no longer attaches
the 'tidyverse' package automatically.
You may need to 'library(tidyverse)' in order to
use certain functions.
library(igraph)
Attaching package: ‘igraph’
The following object is masked from ‘package:srvyr’:
groups
The following object is masked from ‘package:rlang’:
is_named
The following object is masked from ‘package:tigris’:
blocks
The following objects are masked from ‘package:dplyr’:
as_data_frame, groups, union
The following objects are masked from ‘package:purrr’:
compose, simplify
The following object is masked from ‘package:tidyr’:
crossing
The following object is masked from ‘package:tibble’:
as_data_frame
The following objects are masked from ‘package:stats’:
decompose, spectrum
The following object is masked from ‘package:base’:
union
n <- 100
p_star <- log(n)/n
plot_er <- function(n, p, ...) {
g <- erdos.renyi.game(n, p)
plot(g, main = paste("p =", round(p, 4)), vertex.frame.color = "white", vertex.size = 3, vertex.label = NA, ...)
}
plot_er(n, p = 0.8 * p_star)
plot_er(n, p = 1.2 * p_star)
n <- 1000
p_star <- log(n)/n
ps <- rep(seq(from = 0, to = 2 * p_star, by = 0.001), each = 100)
er_connected <- function(n, p, ...) {
c(n = n, p = p, connected = is.connected(erdos.renyi.game(n, p)))
}
sims <- as.data.frame(t(sapply(ps, er_connected, n = n)))
ggplot(data = sims, aes(x = p, y = connected)) +
geom_vline(xintercept = p_star, color = "darkgray") +
geom_text(x = p_star, y = 0.9, label = "Threshold value", hjust="right") + labs(x = "Probability of edge existing",
y = "Probability that random graph is
connected") + geom_count() +
geom_smooth()
g1 <- erdos.renyi.game(n, p = log(n)/n)
g2 <- barabasi.game(n, m = 3, directed = FALSE)
summary(g1)
IGRAPH 27edb8d U--- 1000 3422 -- Erdos renyi (gnp) graph
+ attr: name (g/c), type (g/c), loops (g/l), p (g/n)
summary(g2)
IGRAPH d6d543d U--- 1000 2994 -- Barabasi graph
+ attr: name (g/c), power (g/n), m (g/n), zero.appeal (g/n), algorithm (g/c)
d <- data.frame(type = rep(c("Erdos-Renyi", "Barabasi-Albert"), each = n), degree = c(degree(g1), degree(g2)))
ggplot(data = d, aes(x = degree, color = type)) +
geom_density(size = 2) +
scale_x_continuous(limits = c(0, 25))
To get the example to run I downloaded the data from the kaggle website for the March Machine Learning Mania 2015. You need to have an account and your needs to log in to download the data. Note that the data file that is needed to run the example has a different name than the name in the book.
library(mdsr)
teams <- readr::read_csv("data/all/teams.csv")
Parsed with column specification:
cols(
team_id = [32mcol_double()[39m,
team_name = [31mcol_character()[39m
)
games <- readr::read_csv("data/all/regular_season_compact_results.csv") %>%
filter(season == 1996)
Parsed with column specification:
cols(
season = [32mcol_double()[39m,
daynum = [32mcol_double()[39m,
wteam = [32mcol_double()[39m,
wscore = [32mcol_double()[39m,
lteam = [32mcol_double()[39m,
lscore = [32mcol_double()[39m,
wloc = [31mcol_character()[39m,
numot = [32mcol_double()[39m
)
dim(games)
[1] 4122 8
E <- games %>%
mutate(score_ratio = wscore/lscore) %>%
select(lteam, wteam, score_ratio)
V <- teams %>%
filter(team_id %in% unique(c(E$lteam, E$wteam)))
library(igraph)
g <- graph_from_data_frame(E, directed = TRUE, vertices = V)
summary(g)
IGRAPH a4d5398 DN-- 305 4122 --
+ attr: name (v/c), team_name (v/c), score_ratio (e/n)
g <- set_vertex_attr(g, "pagerank", value = page_rank(g)$vector)
as_data_frame(g, what = "vertices") %>%
arrange(desc(pagerank)) %>%
head(20)
wins <- E %>%
group_by(wteam) %>%
summarise(N = n())
losses <- E %>%
group_by(lteam) %>%
summarise(N = n())
wins %>% full_join(losses, by = c("wteam" = "lteam")) %>%
left_join(teams, by = c("wteam" = "team_id")) %>%
rename(wins = N.x, losses = N.y) %>%
mutate(win_pct = wins / (wins + losses)) %>%
arrange(desc(win_pct)) %>%
head(20)
E %>% filter(wteam == 1269 & lteam == 1246)
E %>% filter(lteam %in% c(1203, 1269) & wteam %in% c(1203, 1269))
A_10 <- c("Massachusetts", "Temple", "G Washington", "Rhode Island", "St Bonaventure", "St Joseph's PA", "Virginia Tech", "Xavier", "Dayton", "Duquesne", "La Salle", "Fordhham")
a10 <- V(g)[ team_name %in% A_10 ]
a <- induced_subgraph(g, vids = a10)
a <- set_vertex_attr(a, "pagerank", value = page_rank(a)$vector)
summary(a)
IGRAPH 405e439 DN-- 11 90 --
+ attr: name (v/c), team_name (v/c), pagerank (v/n), score_ratio (e/n)
library(ggnetwork)
library(intergraph)
a_df <- ggnetwork(a)
Loading required package: sna
Loading required package: statnet.common
Attaching package: ‘statnet.common’
The following object is masked from ‘package:base’:
order
Loading required package: network
network: Classes for Relational Data
Version 1.15 created on 2019-04-01.
copyright (c) 2005, Carter T. Butts, University of California-Irvine
Mark S. Handcock, University of California -- Los Angeles
David R. Hunter, Penn State University
Martina Morris, University of Washington
Skye Bender-deMoll, University of Washington
For citation information, type citation("network").
Type help("network-package") to get started.
Attaching package: ‘network’
The following objects are masked from ‘package:igraph’:
%c%, %s%, add.edges, add.vertices, delete.edges, delete.vertices, get.edge.attribute,
get.edges, get.vertex.attribute, is.bipartite, is.directed, list.edge.attributes,
list.vertex.attributes, set.edge.attribute, set.vertex.attribute
sna: Tools for Social Network Analysis
Version 2.4 created on 2016-07-23.
copyright (c) 2005, Carter T. Butts, University of California-Irvine
For citation information, type citation("sna").
Type help(package="sna") to get started.
Attaching package: ‘sna’
The following objects are masked from ‘package:igraph’:
betweenness, bonpow, closeness, components, degree, dyad.census, evcent, hierarchy,
is.connected, neighborhood, triad.census
duplicated edges detected
ggplot(a_df, aes(x, y, xend = xend, yend = yend)) +
geom_edges(aes(alpha = score_ratio), color = "lightgray", arrow = arrow(length = unit(0.2, "cm")), curvature = 0.2) +
geom_nodes(aes(size = pagerank, color = pagerank), alpha = 0.6) + geom_nodetext(aes(label = team_name)) +
scale_alpha_continuous(range = c(0.4, 1)) +
scale_size_continuous(range = c(1, 10)) +
guides(color = guide_legend("PageRank"), size=guide_legend("PageRank")) +
theme_blank()
P <- t(as_adjacency_matrix(a, sparse = FALSE, attr = "score_ratio"))
P <- scale(P, center = FALSE, scale = colSums(P))
round(P, 2)
1173 1182 1203 1247 1269 1348 1382 1386 1396 1439 1462
1173 0.00 0.09 0.00 0.10 0 0.14 0.11 0.00 0.00 0.00 0.16
1182 0.11 0.00 0.00 0.11 0 0.00 0.00 0.00 0.00 0.00 0.00
1203 0.14 0.12 0.00 0.10 1 0.14 0.11 0.17 0.33 0.27 0.16
1247 0.00 0.09 0.25 0.00 0 0.00 0.12 0.00 0.00 0.00 0.00
1269 0.14 0.09 0.26 0.12 0 0.14 0.12 0.16 0.41 0.25 0.15
1348 0.00 0.10 0.00 0.11 0 0.00 0.13 0.16 0.26 0.21 0.18
1382 0.13 0.08 0.00 0.00 0 0.14 0.00 0.00 0.00 0.00 0.00
1386 0.13 0.09 0.24 0.10 0 0.14 0.10 0.00 0.00 0.00 0.00
1396 0.14 0.15 0.00 0.13 0 0.15 0.10 0.16 0.00 0.27 0.19
1439 0.11 0.10 0.25 0.12 0 0.14 0.11 0.17 0.00 0.00 0.15
1462 0.11 0.09 0.00 0.11 0 0.00 0.12 0.18 0.00 0.00 0.00
attr(,"scaled:scale")
1173 1182 1203 1247 1269 1348 1382 1386 1396 1439
9.580291 12.193997 4.385632 10.705995 1.131579 7.620903 10.466131 6.573151 4.110908 5.112315
1462
6.894009
v0 <- rep(1, vcount(a)) / vcount(a)
v0
[1] 0.09090909 0.09090909 0.09090909 0.09090909 0.09090909 0.09090909 0.09090909 0.09090909 0.09090909
[10] 0.09090909 0.09090909
v <- v0
for (i in 1:20) {
v <- P %*% v
}
as.vector(v)
[1] 0.02620462 0.01073061 0.28880610 0.07414360 0.18416968 0.07731754 0.01477331 0.09291498 0.08133820
[10] 0.11927412 0.03032723
page_rank(a)$vect
1173 1182 1203 1247 1269 1348 1382 1386 1396
0.03683789 0.02134844 0.25160534 0.07006017 0.18793707 0.07779895 0.02541142 0.08807760 0.09042641
1439 1462
0.11095533 0.03954137
page_rank(a, damping = 1)$vec
1173 1182 1203 1247 1269 1348 1382 1386 1396
0.023832141 0.008035839 0.288239781 0.073484127 0.204591471 0.072857810 0.011052515 0.091039461 0.084180455
1439 1462
0.115619832 0.027066568
w <- v0
d <- 0.85
for (i in 1:20) {
w <- d * P %*% w + (1 - d) * v0
}
as.vector(w)
[1] 0.04033680 0.02382045 0.25181394 0.07127669 0.16836965 0.08332106 0.02933482 0.08934652 0.08765190
[10] 0.11231744 0.04241073
page_rank(a, damping = 0.85)$vector
1173 1182 1203 1247 1269 1348 1382 1386 1396
0.03683789 0.02134844 0.25160534 0.07006017 0.18793707 0.07779895 0.02541142 0.08807760 0.09042641
1439 1462
0.11095533 0.03954137