This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

Example: Filtering spam SMS messages

Step 1: Download the data

URL <- "http://cox.csueastbay.edu/~esuess/classes/Statistics_6620/Presentations/ml6/sms_spam.csv"
download.file(URL, destfile = "./sms_spam.csv", method="curl")
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  469k  100  469k    0     0  1243k      0 --:--:-- --:--:-- --:--:-- 1245k

Step 2: Exploring and preparing the data —-

# read the sms data into the sms data frame
sms_raw <- read.csv("sms_spam.csv", stringsAsFactors = FALSE)
# examine the structure of the sms data
str(sms_raw)
'data.frame':   5559 obs. of  2 variables:
 $ type: chr  "ham" "ham" "ham" "spam" ...
 $ text: chr  "Hope you are having a good week. Just checking in" "K..give back my thanks." "Am also doing in cbe only. But have to pay." "complimentary 4 STAR Ibiza Holiday or £10,000 cash needs your URGENT collection. 09066364349 NOW from Landline "| __truncated__ ...
# convert spam/ham to factor.
sms_raw$type <- factor(sms_raw$type)
# examine the type variable more carefully
str(sms_raw$type)
 Factor w/ 2 levels "ham","spam": 1 1 1 2 2 1 1 1 2 1 ...
table(sms_raw$type)

 ham spam 
4812  747 
# build a corpus using the text mining (tm) package
library(tm)
sms_corpus <- VCorpus(VectorSource(sms_raw$text))
# examine the sms corpus
print(sms_corpus)
<<VCorpus>>
Metadata:  corpus specific: 0, document level (indexed): 0
Content:  documents: 5559
inspect(sms_corpus[1:2])
<<VCorpus>>
Metadata:  corpus specific: 0, document level (indexed): 0
Content:  documents: 2

[[1]]
<<PlainTextDocument>>
Metadata:  7
Content:  chars: 49

[[2]]
<<PlainTextDocument>>
Metadata:  7
Content:  chars: 23
as.character(sms_corpus[[1]])
[1] "Hope you are having a good week. Just checking in"
lapply(sms_corpus[1:2], as.character)
$`1`
[1] "Hope you are having a good week. Just checking in"

$`2`
[1] "K..give back my thanks."
# clean up the corpus using tm_map()
sms_corpus_clean <- tm_map(sms_corpus, content_transformer(tolower))
# show the difference between sms_corpus and corpus_clean
as.character(sms_corpus[[1]])
[1] "Hope you are having a good week. Just checking in"
as.character(sms_corpus_clean[[1]])
[1] "hope you are having a good week. just checking in"
sms_corpus_clean <- tm_map(sms_corpus_clean, removeNumbers) # remove numbers
sms_corpus_clean <- tm_map(sms_corpus_clean, removeWords, stopwords()) # remove stop words
sms_corpus_clean <- tm_map(sms_corpus_clean, removePunctuation) # remove punctuation
# tip: create a custom function to replace (rather than remove) punctuation
removePunctuation("hello...world")
[1] "helloworld"
replacePunctuation <- function(x) { gsub("[[:punct:]]+", " ", x) }
replacePunctuation("hello...world")
[1] "hello world"
# illustration of word stemming
library(SnowballC)
wordStem(c("learn", "learned", "learning", "learns"))
[1] "learn" "learn" "learn" "learn"
sms_corpus_clean <- tm_map(sms_corpus_clean, stemDocument)
sms_corpus_clean <- tm_map(sms_corpus_clean, stripWhitespace) # eliminate unneeded whitespace
# examine the final clean corpus
lapply(sms_corpus[1:3], as.character)
$`1`
[1] "Hope you are having a good week. Just checking in"

$`2`
[1] "K..give back my thanks."

$`3`
[1] "Am also doing in cbe only. But have to pay."
lapply(sms_corpus_clean[1:3], as.character)
$`1`
[1] "hope good week just check"

$`2`
[1] "kgive back thank"

$`3`
[1] "also cbe pay"
# create a document-term sparse matrix
sms_dtm <- DocumentTermMatrix(sms_corpus_clean)
# alternative solution: create a document-term sparse matrix directly from the SMS corpus
sms_dtm2 <- DocumentTermMatrix(sms_corpus, control = list(
  tolower = TRUE,
  removeNumbers = TRUE,
  stopwords = TRUE,
  removePunctuation = TRUE,
  stemming = TRUE
))
# alternative solution: using custom stop words function ensures identical result
sms_dtm3 <- DocumentTermMatrix(sms_corpus, control = list(
  tolower = TRUE,
  removeNumbers = TRUE,
  stopwords = function(x) { removeWords(x, stopwords()) },
  removePunctuation = TRUE,
  stemming = TRUE
))
# compare the result
sms_dtm
<<DocumentTermMatrix (documents: 5559, terms: 6559)>>
Non-/sparse entries: 42147/36419334
Sparsity           : 100%
Maximal term length: 40
Weighting          : term frequency (tf)
sms_dtm2
<<DocumentTermMatrix (documents: 5559, terms: 6961)>>
Non-/sparse entries: 43221/38652978
Sparsity           : 100%
Maximal term length: 40
Weighting          : term frequency (tf)
sms_dtm3
<<DocumentTermMatrix (documents: 5559, terms: 6559)>>
Non-/sparse entries: 42147/36419334
Sparsity           : 100%
Maximal term length: 40
Weighting          : term frequency (tf)
# creating training and test datasets
sms_dtm_train <- sms_dtm[1:4169, ]
sms_dtm_test  <- sms_dtm[4170:5559, ]
# also save the labels
sms_train_labels <- sms_raw[1:4169, ]$type
sms_test_labels  <- sms_raw[4170:5559, ]$type
# check that the proportion of spam is similar
prop.table(table(sms_train_labels))
sms_train_labels
      ham      spam 
0.8647158 0.1352842 
prop.table(table(sms_test_labels))
sms_test_labels
      ham      spam 
0.8683453 0.1316547 
# word cloud visualization
library(wordcloud)
wordcloud(sms_corpus_clean, min.freq = 50, random.order = FALSE)

# subset the training data into spam and ham groups
spam <- subset(sms_raw, type == "spam")
ham  <- subset(sms_raw, type == "ham")
wordcloud(spam$text, max.words = 40, scale = c(3, 0.5))
transformation drops documentstransformation drops documents

wordcloud(ham$text, max.words = 40, scale = c(3, 0.5))
transformation drops documentstransformation drops documents

sms_dtm_freq_train <- removeSparseTerms(sms_dtm_train, 0.999)
sms_dtm_freq_train
<<DocumentTermMatrix (documents: 4169, terms: 1104)>>
Non-/sparse entries: 24827/4577749
Sparsity           : 99%
Maximal term length: 19
Weighting          : term frequency (tf)
# indicator features for frequent words
findFreqTerms(sms_dtm_train, 5)
   [1] "£wk"                 "€˜m"                 "€˜s"                 "abiola"              "abl"                
   [6] "abt"                 "accept"              "access"              "account"             "across"             
  [11] "act"                 "activ"               "actual"              "add"                 "address"            
  [16] "admir"               "adult"               "advanc"              "aft"                 "afternoon"          
  [21] "age"                 "ago"                 "aha"                 "ahead"               "aight"              
  [26] "aint"                "air"                 "aiyo"                "alex"                "almost"             
  [31] "alon"                "alreadi"             "alright"             "also"                "alway"              
  [36] "angri"               "announc"             "anoth"               "answer"              "anymor"             
  [41] "anyon"               "anyth"               "anytim"              "anyway"              "apart"              
  [46] "app"                 "appli"               "appreci"             "arcad"               "ard"                
  [51] "area"                "argu"                "argument"            "armand"              "around"             
  [56] "arrang"              "arriv"               "asap"                "ask"                 "askd"               
  [61] "attempt"             "auction"             "avail"               "ave"                 "avoid"              
  [66] "await"               "awak"                "award"               "away"                "awesom"             
  [71] "babe"                "babi"                "back"                "bad"                 "bag"                
  [76] "bank"                "bare"                "basic"               "bath"                "batteri"            
  [81] "bcoz"                "bday"                "beauti"              "becom"               "bed"                
  [86] "bedroom"             "beer"                "begin"               "believ"              "best"               
  [91] "better"              "bid"                 "big"                 "bill"                "bird"               
  [96] "birthday"            "bit"                 "black"               "blank"               "bless"              
 [101] "blue"                "bluetooth"           "bold"                "bonus"               "boo"                
 [106] "book"                "boost"               "bore"                "boss"                "bother"             
 [111] "bout"                "box"                 "boy"                 "boytoy"              "break"              
 [116] "breath"              "bring"               "brother"             "bslvyl"              "btnationalr"        
 [121] "buck"                "bus"                 "busi"                "buy"                 "cabin"              
 [126] "call"                "caller"              "callertun"           "camcord"             "came"               
 [131] "camera"              "campus"              "can"                 "cancel"              "cancer"             
 [136] "cant"                "car"                 "card"                "care"                "carlo"              
 [141] "case"                "cash"                "cashbal"             "catch"               "caus"               
 [146] "celebr"              "cell"                "centr"               "chanc"               "chang"              
 [151] "charg"               "chat"                "cheap"               "cheaper"             "check"              
 [156] "cheer"               "chennai"             "chikku"              "childish"            "children"           
 [161] "choic"               "choos"               "christma"            "claim"               "class"              
 [166] "clean"               "clear"               "close"               "club"                "code"               
 [171] "coffe"               "cold"                "colleagu"            "collect"             "colleg"             
 [176] "colour"              "come"                "comin"               "comp"                "compani"            
 [181] "competit"            "complet"             "complimentari"       "comput"              "condit"             
 [186] "confirm"             "congrat"             "congratul"           "connect"             "contact"            
 [191] "content"             "contract"            "cook"                "cool"                "copi"               
 [196] "correct"             "cos"                 "cost"                "cost£pm"             "costa"              
 [201] "coupl"               "cours"               "cover"               "coz"                 "crave"              
 [206] "crazi"               "creat"               "credit"              "cri"                 "cross"              
 [211] "cuddl"               "cum"                 "cup"                 "current"             "custcar"            
 [216] "custom"              "cut"                 "cute"                "cuz"                 "dad"                
 [221] "daddi"               "darl"                "darlin"              "darren"              "dat"                
 [226] "date"                "day"                 "dead"                "deal"                "dear"               
 [231] "decid"               "decim"               "decis"               "deep"                "definit"            
 [236] "del"                 "deliv"               "deliveri"            "den"                 "depend"             
 [241] "detail"              "didnt"               "die"                 "diet"                "differ"             
 [246] "difficult"           "digit"               "din"                 "dinner"              "direct"             
 [251] "dis"                 "discount"            "discuss"             "disturb"             "dnt"                
 [256] "doc"                 "doctor"              "doesnt"              "dog"                 "doin"               
 [261] "don"                 "done"                "dont"                "door"                "doubl"              
 [266] "download"            "draw"                "dream"               "drink"               "drive"              
 [271] "drop"                "drug"                "dude"                "due"                 "dun"                
 [276] "dunno"               "dvd"                 "earli"               "earlier"             "earth"              
 [281] "easi"                "eat"                 "eatin"               "egg"                 "either"             
 [286] "els"                 "email"               "embarass"            "end"                 "energi"             
 [291] "england"             "enjoy"               "enough"              "enter"               "entitl"             
 [296] "entri"               "envelop"             "etc"                 "euro"                "eve"                
 [301] "even"                "ever"                "everi"               "everybodi"           "everyon"            
 [306] "everyth"             "exact"               "exam"                "excel"               "excit"              
 [311] "excus"               "expect"              "experi"              "expir"               "extra"              
 [316] "eye"                 "face"                "facebook"            "fact"                "fall"               
 [321] "famili"              "fanci"               "fantasi"             "fantast"             "far"                
 [326] "fast"                "fat"                 "father"              "fault"               "feb"                
 [331] "feel"                "felt"                "fetch"               "fight"               "figur"              
 [336] "file"                "fill"                "film"                "final"               "find"               
 [341] "fine"                "finger"              "finish"              "first"               "fix"                
 [346] "flag"                "flat"                "flight"              "flower"              "follow"             
 [351] "fone"                "food"                "forev"               "forget"              "forgot"             
 [356] "forward"             "found"               "freak"               "free"                "freemsg"            
 [361] "freephon"            "fren"                "fri"                 "friday"              "friend"             
 [366] "friendship"          "frm"                 "frnd"                "frnds"               "full"               
 [371] "fullonsmscom"        "fun"                 "funni"               "futur"               "gal"                
 [376] "game"                "gap"                 "gas"                 "gave"                "gay"                
 [381] "gentl"               "get"                 "gettin"              "gift"                "girl"               
 [386] "girlfrnd"            "give"                "glad"                "god"                 "goe"                
 [391] "goin"                "gone"                "gonna"               "good"                "goodmorn"           
 [396] "goodnight"           "got"                 "goto"                "gotta"               "great"              
 [401] "grin"                "guarante"            "gud"                 "guess"               "guy"                
 [406] "gym"                 "haf"                 "haha"                "hai"                 "hair"               
 [411] "half"                "hand"                "handset"             "hang"                "happen"             
 [416] "happi"               "hard"                "hate"                "hav"                 "havent"             
 [421] "head"                "hear"                "heard"               "heart"               "heavi"              
 [426] "hee"                 "hell"                "hello"               "help"                "hey"                
 [431] "hgsuiteland"         "hit"                 "hiya"                "hmm"                 "hmmm"               
 [436] "hmv"                 "hol"                 "hold"                "holder"              "holiday"            
 [441] "home"                "hook"                "hop"                 "hope"                "horni"              
 [446] "hospit"              "hot"                 "hotel"               "hour"                "hous"               
 [451] "how"                 "howev"               "howz"                "hrs"                 "httpwwwurawinnercom"
 [456] "hug"                 "huh"                 "hungri"              "hurri"               "hurt"               
 [461] "ice"                 "idea"                "identifi"            "ignor"               "ill"                
 [466] "immedi"              "import"              "inc"                 "includ"              "india"              
 [471] "info"                "inform"              "insid"               "instead"             "interest"           
 [476] "invit"               "ipod"                "irrit"               "ish"                 "island"             
 [481] "issu"                "ive"                 "izzit"               "januari"             "jay"                
 [486] "job"                 "john"                "join"                "joke"                "joy"                
 [491] "jst"                 "jus"                 "just"                "juz"                 "kate"               
 [496] "keep"                "kept"                "kick"                "kid"                 "kill"               
 [501] "kind"                "kinda"               "king"                "kiss"                "knew"               
 [506] "know"                "knw"                 "ladi"                "land"                "landlin"            
 [511] "laptop"              "lar"                 "last"                "late"                "later"              
 [516] "latest"              "laugh"               "lazi"                "ldn"                 "lead"               
 [521] "learn"               "least"               "leav"                "lect"                "left"               
 [526] "leh"                 "lei"                 "less"                "lesson"              "let"                
 [531] "letter"              "liao"                "librari"             "lie"                 "life"               
 [536] "lift"                "light"               "like"                "line"                "link"               
 [541] "list"                "listen"              "littl"               "live"                "lmao"               
 [546] "load"                "loan"                "local"               "locat"               "log"                
 [551] "lol"                 "london"              "long"                "longer"              "look"               
 [556] "lookin"              "lor"                 "lose"                "lost"                "lot"                
 [561] "lovabl"              "love"                "lover"               "loyalti"             "ltd"                
 [566] "luck"                "lucki"               "lunch"               "luv"                 "mad"                
 [571] "made"                "mah"                 "mail"                "make"                "malaria"            
 [576] "man"                 "mani"                "march"               "mark"                "marri"              
 [581] "match"               "mate"                "matter"              "maxim"               "maxmin"             
 [586] "may"                 "mayb"                "meal"                "mean"                "meant"              
 [591] "med"                 "medic"               "meet"                "meetin"              "meh"                
 [596] "member"              "men"                 "merri"               "messag"              "met"                
 [601] "mid"                 "midnight"            "might"               "min"                 "mind"               
 [606] "mine"                "minut"               "miracl"              "miss"                "mistak"             
 [611] "moan"                "mob"                 "mobil"               "mobileupd"           "mode"               
 [616] "mom"                 "moment"              "mon"                 "monday"              "money"              
 [621] "month"               "morn"                "mother"              "motorola"            "move"               
 [626] "movi"                "mrng"                "mrt"                 "mrw"                 "msg"                
 [631] "msgs"                "mths"                "much"                "mum"                 "murder"             
 [636] "music"               "must"                "muz"                 "nah"                 "nake"               
 [641] "name"                "nation"              "natur"               "naughti"             "near"               
 [646] "need"                "net"                 "network"             "neva"                "never"              
 [651] "new"                 "news"                "next"                "nice"                "nigeria"            
 [656] "night"               "nite"                "nobodi"              "noe"                 "nokia"              
 [661] "noon"                "nope"                "normal"              "normpton"            "noth"               
 [666] "notic"               "now"                 "num"                 "number"              "nyt"                
 [671] "obvious"             "offer"               "offic"               "offici"              "okay"               
 [676] "oki"                 "old"                 "omg"                 "one"                 "onlin"              
 [681] "onto"                "oop"                 "open"                "oper"                "opinion"            
 [686] "opt"                 "optout"              "orang"               "orchard"             "order"              
 [691] "oredi"               "oso"                 "other"               "otherwis"            "outsid"             
 [696] "pack"                "page"                "paid"                "pain"                "paper"              
 [701] "parent"              "park"                "part"                "parti"               "partner"            
 [706] "pass"                "passion"             "password"            "past"                "pay"                
 [711] "peopl"               "per"                 "person"              "pete"                "phone"              
 [716] "photo"               "pic"                 "pick"                "pictur"              "pin"                
 [721] "piss"                "pix"                 "pizza"               "place"               "plan"               
 [726] "play"                "player"              "pleas"               "pleasur"             "plenti"             
 [731] "pls"                 "plus"                "plz"                 "pmin"                "pmsg"               
 [736] "pobox"               "point"               "poli"                "polic"               "poor"               
 [741] "pop"                 "possess"             "possibl"             "post"                "pound"              
 [746] "power"               "ppm"                 "pray"                "present"             "press"              
 [751] "pretti"              "previous"            "price"               "princess"            "privat"             
 [756] "prize"               "prob"                "probabl"             "problem"             "project"            
 [761] "promis"              "pub"                 "put"                 "qualiti"             "question"           
 [766] "quick"               "quit"                "quiz"                "quot"                "rain"               
 [771] "random"              "rang"                "rate"                "rather"              "rcvd"               
 [776] "reach"               "read"                "readi"               "real"                "reali"              
 [781] "realli"              "reason"              "receipt"             "receiv"              "recent"             
 [786] "record"              "refer"               "regard"              "regist"              "relat"              
 [791] "relax"               "remain"              "rememb"              "remind"              "remov"              
 [796] "rent"                "rental"              "repli"               "repres"              "request"            
 [801] "respond"             "respons"             "rest"                "result"              "return"             
 [806] "reveal"              "review"              "reward"              "right"               "ring"               
 [811] "rington"             "rite"                "road"                "rock"                "role"               
 [816] "room"                "roommat"             "rose"                "round"               "rowwjhl"            
 [821] "rpli"                "rreveal"             "run"                 "rush"                "sad"                
 [826] "sae"                 "safe"                "said"                "sale"                "sat"                
 [831] "saturday"            "savamob"             "save"                "saw"                 "say"                
 [836] "sch"                 "school"              "scream"              "sea"                 "search"             
 [841] "sec"                 "second"              "secret"              "see"                 "seem"               
 [846] "seen"                "select"              "self"                "sell"                "semest"             
 [851] "send"                "sens"                "sent"                "serious"             "servic"             
 [856] "set"                 "settl"               "sex"                 "sexi"                "shall"              
 [861] "share"               "shd"                 "ship"                "shirt"               "shop"               
 [866] "short"               "show"                "shower"              "sick"                "side"               
 [871] "sigh"                "sight"               "sign"                "silent"              "simpl"              
 [876] "sinc"                "singl"               "sipix"               "sir"                 "sis"                
 [881] "sister"              "sit"                 "situat"              "skxh"                "skype"              
 [886] "slave"               "sleep"               "slept"               "slow"                "slowli"             
 [891] "small"               "smile"               "smoke"               "sms"                 "smth"               
 [896] "snow"                "sofa"                "sol"                 "somebodi"            "someon"             
 [901] "someth"              "sometim"             "somewher"            "song"                "soni"               
 [906] "sonyericsson"        "soon"                "sorri"               "sort"                "sound"              
 [911] "south"               "space"               "speak"               "special"             "specialcal"         
 [916] "spend"               "spent"               "spoke"               "spree"               "stand"              
 [921] "start"               "statement"           "station"             "stay"                "std"                
 [926] "step"                "still"               "stockport"           "stone"               "stop"               
 [931] "store"               "stori"               "street"              "student"             "studi"              
 [936] "stuff"               "stupid"              "style"               "sub"                 "subscrib"           
 [941] "success"             "suck"                "suit"                "summer"              "sun"                
 [946] "sunday"              "sunshin"             "sup"                 "support"             "suppos"             
 [951] "sure"                "surf"                "surpris"             "sweet"               "swing"              
 [956] "system"              "take"                "talk"                "tampa"               "tariff"             
 [961] "tcs"                 "tea"                 "teach"               "tear"                "teas"               
 [966] "tel"                 "tell"                "ten"                 "tenerif"             "term"               
 [971] "test"                "text"                "thank"               "thanx"               "that"               
 [976] "thing"               "think"               "thinkin"             "thk"                 "tho"                
 [981] "though"              "thought"             "throw"               "thru"                "tht"                
 [986] "thur"                "tick"                "ticket"              "til"                 "till"               
 [991] "time"                "tire"                "titl"                "tmr"                 "toclaim"            
 [996] "today"               "togeth"              "told"                "tomo"                "tomorrow"           
 [ reached getOption("max.print") -- omitted 139 entries ]
# save frequently-appearing terms to a character vector
sms_freq_words <- findFreqTerms(sms_dtm_train, 5)
str(sms_freq_words)
 chr [1:1139] "£wk" "€˜m" "€˜s" "abiola" "abl" "abt" "accept" "access" "account" "across" "act" "activ" "actual" "add" ...
# create DTMs with only the frequent terms
sms_dtm_freq_train <- sms_dtm_train[ , sms_freq_words]
sms_dtm_freq_test <- sms_dtm_test[ , sms_freq_words]
# convert counts to a factor
convert_counts <- function(x) {
  x <- ifelse(x > 0, "Yes", "No")
}
# apply() convert_counts() to columns of train/test data
sms_train <- apply(sms_dtm_freq_train, MARGIN = 2, convert_counts)
sms_test  <- apply(sms_dtm_freq_test, MARGIN = 2, convert_counts)

Step 3: Training a model on the data —-

library(e1071)
sms_classifier <- naiveBayes(sms_train, sms_train_labels)

Step 4: Evaluating model performance —-

sms_test_pred <- predict(sms_classifier, sms_test)
head(sms_test_pred)
[1] ham  ham  ham  ham  spam ham 
Levels: ham spam
library(gmodels)
CrossTable(sms_test_pred, sms_test_labels,
           prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
           dnn = c('predicted', 'actual'))

 
   Cell Contents
|-------------------------|
|                       N |
|           N / Col Total |
|-------------------------|

 
Total Observations in Table:  1390 

 
             | actual 
   predicted |       ham |      spam | Row Total | 
-------------|-----------|-----------|-----------|
         ham |      1201 |        30 |      1231 | 
             |     0.995 |     0.164 |           | 
-------------|-----------|-----------|-----------|
        spam |         6 |       153 |       159 | 
             |     0.005 |     0.836 |           | 
-------------|-----------|-----------|-----------|
Column Total |      1207 |       183 |      1390 | 
             |     0.868 |     0.132 |           | 
-------------|-----------|-----------|-----------|

 

Step 5: Improving model performance —-

sms_classifier2 <- naiveBayes(sms_train, sms_train_labels, laplace = 1)
sms_test_pred2 <- predict(sms_classifier2, sms_test)
CrossTable(sms_test_pred2, sms_test_labels,
           prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
           dnn = c('predicted', 'actual'))

 
   Cell Contents
|-------------------------|
|                       N |
|           N / Col Total |
|-------------------------|

 
Total Observations in Table:  1390 

 
             | actual 
   predicted |       ham |      spam | Row Total | 
-------------|-----------|-----------|-----------|
         ham |      1202 |        28 |      1230 | 
             |     0.996 |     0.153 |           | 
-------------|-----------|-----------|-----------|
        spam |         5 |       155 |       160 | 
             |     0.004 |     0.847 |           | 
-------------|-----------|-----------|-----------|
Column Total |      1207 |       183 |      1390 | 
             |     0.868 |     0.132 |           | 
-------------|-----------|-----------|-----------|

 
LS0tCnRpdGxlOiAnQ2hhcHRlciA0OiBDbGFzc2lmaWNhdGlvbiB1c2luZyBOYWl2ZSBCYXllcycKb3V0cHV0OgogIHBkZl9kb2N1bWVudDogZGVmYXVsdAogIHdvcmRfZG9jdW1lbnQ6IGRlZmF1bHQKICBodG1sX25vdGVib29rOiBkZWZhdWx0Ci0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybCtTaGlmdCtFbnRlciouIAoKQWRkIGEgbmV3IGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqSW5zZXJ0IENodW5rKiBidXR0b24gb24gdGhlIHRvb2xiYXIgb3IgYnkgcHJlc3NpbmcgKkN0cmwrQWx0K0kqLgoKV2hlbiB5b3Ugc2F2ZSB0aGUgbm90ZWJvb2ssIGFuIEhUTUwgZmlsZSBjb250YWluaW5nIHRoZSBjb2RlIGFuZCBvdXRwdXQgd2lsbCBiZSBzYXZlZCBhbG9uZ3NpZGUgaXQgKGNsaWNrIHRoZSAqUHJldmlldyogYnV0dG9uIG9yIHByZXNzICpDdHJsK1NoaWZ0K0sqIHRvIHByZXZpZXcgdGhlIEhUTUwgZmlsZSkuCgojICoqRXhhbXBsZTogRmlsdGVyaW5nIHNwYW0gU01TIG1lc3NhZ2VzKioKCiMjIFN0ZXAgMTogRG93bmxvYWQgdGhlIGRhdGEKCmBgYHtyfQpVUkwgPC0gImh0dHA6Ly9jb3guY3N1ZWFzdGJheS5lZHUvfmVzdWVzcy9jbGFzc2VzL1N0YXRpc3RpY3NfNjYyMC9QcmVzZW50YXRpb25zL21sNi9zbXNfc3BhbS5jc3YiCmRvd25sb2FkLmZpbGUoVVJMLCBkZXN0ZmlsZSA9ICIuL3Ntc19zcGFtLmNzdiIsIG1ldGhvZD0iY3VybCIpCmBgYAoKCiMjIFN0ZXAgMjogRXhwbG9yaW5nIGFuZCBwcmVwYXJpbmcgdGhlIGRhdGEgLS0tLSAKCmBgYHtyfQojIHJlYWQgdGhlIHNtcyBkYXRhIGludG8gdGhlIHNtcyBkYXRhIGZyYW1lCnNtc19yYXcgPC0gcmVhZC5jc3YoInNtc19zcGFtLmNzdiIsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkKCiMgZXhhbWluZSB0aGUgc3RydWN0dXJlIG9mIHRoZSBzbXMgZGF0YQpzdHIoc21zX3JhdykKCmBgYApgYGB7cn0KIyBjb252ZXJ0IHNwYW0vaGFtIHRvIGZhY3Rvci4Kc21zX3JhdyR0eXBlIDwtIGZhY3RvcihzbXNfcmF3JHR5cGUpCgojIGV4YW1pbmUgdGhlIHR5cGUgdmFyaWFibGUgbW9yZSBjYXJlZnVsbHkKc3RyKHNtc19yYXckdHlwZSkKdGFibGUoc21zX3JhdyR0eXBlKQoKYGBgCmBgYHtyfQojIGJ1aWxkIGEgY29ycHVzIHVzaW5nIHRoZSB0ZXh0IG1pbmluZyAodG0pIHBhY2thZ2UKbGlicmFyeSh0bSkKc21zX2NvcnB1cyA8LSBWQ29ycHVzKFZlY3RvclNvdXJjZShzbXNfcmF3JHRleHQpKQoKIyBleGFtaW5lIHRoZSBzbXMgY29ycHVzCnByaW50KHNtc19jb3JwdXMpCmluc3BlY3Qoc21zX2NvcnB1c1sxOjJdKQpgYGAKYGBge3J9CmFzLmNoYXJhY3RlcihzbXNfY29ycHVzW1sxXV0pCmxhcHBseShzbXNfY29ycHVzWzE6Ml0sIGFzLmNoYXJhY3RlcikKYGBgCmBgYHtyfQojIGNsZWFuIHVwIHRoZSBjb3JwdXMgdXNpbmcgdG1fbWFwKCkKc21zX2NvcnB1c19jbGVhbiA8LSB0bV9tYXAoc21zX2NvcnB1cywgY29udGVudF90cmFuc2Zvcm1lcih0b2xvd2VyKSkKCiMgc2hvdyB0aGUgZGlmZmVyZW5jZSBiZXR3ZWVuIHNtc19jb3JwdXMgYW5kIGNvcnB1c19jbGVhbgphcy5jaGFyYWN0ZXIoc21zX2NvcnB1c1tbMV1dKQphcy5jaGFyYWN0ZXIoc21zX2NvcnB1c19jbGVhbltbMV1dKQpgYGAKYGBge3J9CnNtc19jb3JwdXNfY2xlYW4gPC0gdG1fbWFwKHNtc19jb3JwdXNfY2xlYW4sIHJlbW92ZU51bWJlcnMpICMgcmVtb3ZlIG51bWJlcnMKc21zX2NvcnB1c19jbGVhbiA8LSB0bV9tYXAoc21zX2NvcnB1c19jbGVhbiwgcmVtb3ZlV29yZHMsIHN0b3B3b3JkcygpKSAjIHJlbW92ZSBzdG9wIHdvcmRzCnNtc19jb3JwdXNfY2xlYW4gPC0gdG1fbWFwKHNtc19jb3JwdXNfY2xlYW4sIHJlbW92ZVB1bmN0dWF0aW9uKSAjIHJlbW92ZSBwdW5jdHVhdGlvbgpgYGAKYGBge3J9CiMgdGlwOiBjcmVhdGUgYSBjdXN0b20gZnVuY3Rpb24gdG8gcmVwbGFjZSAocmF0aGVyIHRoYW4gcmVtb3ZlKSBwdW5jdHVhdGlvbgpyZW1vdmVQdW5jdHVhdGlvbigiaGVsbG8uLi53b3JsZCIpCnJlcGxhY2VQdW5jdHVhdGlvbiA8LSBmdW5jdGlvbih4KSB7IGdzdWIoIltbOnB1bmN0Ol1dKyIsICIgIiwgeCkgfQpyZXBsYWNlUHVuY3R1YXRpb24oImhlbGxvLi4ud29ybGQiKQpgYGAKYGBge3J9CiMgaWxsdXN0cmF0aW9uIG9mIHdvcmQgc3RlbW1pbmcKbGlicmFyeShTbm93YmFsbEMpCndvcmRTdGVtKGMoImxlYXJuIiwgImxlYXJuZWQiLCAibGVhcm5pbmciLCAibGVhcm5zIikpCgpzbXNfY29ycHVzX2NsZWFuIDwtIHRtX21hcChzbXNfY29ycHVzX2NsZWFuLCBzdGVtRG9jdW1lbnQpCgpzbXNfY29ycHVzX2NsZWFuIDwtIHRtX21hcChzbXNfY29ycHVzX2NsZWFuLCBzdHJpcFdoaXRlc3BhY2UpICMgZWxpbWluYXRlIHVubmVlZGVkIHdoaXRlc3BhY2UKCiMgZXhhbWluZSB0aGUgZmluYWwgY2xlYW4gY29ycHVzCmxhcHBseShzbXNfY29ycHVzWzE6M10sIGFzLmNoYXJhY3RlcikKbGFwcGx5KHNtc19jb3JwdXNfY2xlYW5bMTozXSwgYXMuY2hhcmFjdGVyKQpgYGAKYGBge3J9CiMgY3JlYXRlIGEgZG9jdW1lbnQtdGVybSBzcGFyc2UgbWF0cml4CnNtc19kdG0gPC0gRG9jdW1lbnRUZXJtTWF0cml4KHNtc19jb3JwdXNfY2xlYW4pCgojIGFsdGVybmF0aXZlIHNvbHV0aW9uOiBjcmVhdGUgYSBkb2N1bWVudC10ZXJtIHNwYXJzZSBtYXRyaXggZGlyZWN0bHkgZnJvbSB0aGUgU01TIGNvcnB1cwpzbXNfZHRtMiA8LSBEb2N1bWVudFRlcm1NYXRyaXgoc21zX2NvcnB1cywgY29udHJvbCA9IGxpc3QoCiAgdG9sb3dlciA9IFRSVUUsCiAgcmVtb3ZlTnVtYmVycyA9IFRSVUUsCiAgc3RvcHdvcmRzID0gVFJVRSwKICByZW1vdmVQdW5jdHVhdGlvbiA9IFRSVUUsCiAgc3RlbW1pbmcgPSBUUlVFCikpCgojIGFsdGVybmF0aXZlIHNvbHV0aW9uOiB1c2luZyBjdXN0b20gc3RvcCB3b3JkcyBmdW5jdGlvbiBlbnN1cmVzIGlkZW50aWNhbCByZXN1bHQKc21zX2R0bTMgPC0gRG9jdW1lbnRUZXJtTWF0cml4KHNtc19jb3JwdXMsIGNvbnRyb2wgPSBsaXN0KAogIHRvbG93ZXIgPSBUUlVFLAogIHJlbW92ZU51bWJlcnMgPSBUUlVFLAogIHN0b3B3b3JkcyA9IGZ1bmN0aW9uKHgpIHsgcmVtb3ZlV29yZHMoeCwgc3RvcHdvcmRzKCkpIH0sCiAgcmVtb3ZlUHVuY3R1YXRpb24gPSBUUlVFLAogIHN0ZW1taW5nID0gVFJVRQopKQoKIyBjb21wYXJlIHRoZSByZXN1bHQKc21zX2R0bQpzbXNfZHRtMgpzbXNfZHRtMwpgYGAKYGBge3J9CiMgY3JlYXRpbmcgdHJhaW5pbmcgYW5kIHRlc3QgZGF0YXNldHMKc21zX2R0bV90cmFpbiA8LSBzbXNfZHRtWzE6NDE2OSwgXQpzbXNfZHRtX3Rlc3QgIDwtIHNtc19kdG1bNDE3MDo1NTU5LCBdCgojIGFsc28gc2F2ZSB0aGUgbGFiZWxzCnNtc190cmFpbl9sYWJlbHMgPC0gc21zX3Jhd1sxOjQxNjksIF0kdHlwZQpzbXNfdGVzdF9sYWJlbHMgIDwtIHNtc19yYXdbNDE3MDo1NTU5LCBdJHR5cGUKCiMgY2hlY2sgdGhhdCB0aGUgcHJvcG9ydGlvbiBvZiBzcGFtIGlzIHNpbWlsYXIKcHJvcC50YWJsZSh0YWJsZShzbXNfdHJhaW5fbGFiZWxzKSkKcHJvcC50YWJsZSh0YWJsZShzbXNfdGVzdF9sYWJlbHMpKQpgYGAKYGBge3J9CiMgd29yZCBjbG91ZCB2aXN1YWxpemF0aW9uCmxpYnJhcnkod29yZGNsb3VkKQp3b3JkY2xvdWQoc21zX2NvcnB1c19jbGVhbiwgbWluLmZyZXEgPSA1MCwgcmFuZG9tLm9yZGVyID0gRkFMU0UpCmBgYApgYGB7cn0KIyBzdWJzZXQgdGhlIHRyYWluaW5nIGRhdGEgaW50byBzcGFtIGFuZCBoYW0gZ3JvdXBzCnNwYW0gPC0gc3Vic2V0KHNtc19yYXcsIHR5cGUgPT0gInNwYW0iKQpoYW0gIDwtIHN1YnNldChzbXNfcmF3LCB0eXBlID09ICJoYW0iKQoKd29yZGNsb3VkKHNwYW0kdGV4dCwgbWF4LndvcmRzID0gNDAsIHNjYWxlID0gYygzLCAwLjUpKQp3b3JkY2xvdWQoaGFtJHRleHQsIG1heC53b3JkcyA9IDQwLCBzY2FsZSA9IGMoMywgMC41KSkKYGBgCmBgYHtyfQpzbXNfZHRtX2ZyZXFfdHJhaW4gPC0gcmVtb3ZlU3BhcnNlVGVybXMoc21zX2R0bV90cmFpbiwgMC45OTkpCnNtc19kdG1fZnJlcV90cmFpbgoKIyBpbmRpY2F0b3IgZmVhdHVyZXMgZm9yIGZyZXF1ZW50IHdvcmRzCmZpbmRGcmVxVGVybXMoc21zX2R0bV90cmFpbiwgNSkKCiMgc2F2ZSBmcmVxdWVudGx5LWFwcGVhcmluZyB0ZXJtcyB0byBhIGNoYXJhY3RlciB2ZWN0b3IKc21zX2ZyZXFfd29yZHMgPC0gZmluZEZyZXFUZXJtcyhzbXNfZHRtX3RyYWluLCA1KQpzdHIoc21zX2ZyZXFfd29yZHMpCgojIGNyZWF0ZSBEVE1zIHdpdGggb25seSB0aGUgZnJlcXVlbnQgdGVybXMKc21zX2R0bV9mcmVxX3RyYWluIDwtIHNtc19kdG1fdHJhaW5bICwgc21zX2ZyZXFfd29yZHNdCnNtc19kdG1fZnJlcV90ZXN0IDwtIHNtc19kdG1fdGVzdFsgLCBzbXNfZnJlcV93b3Jkc10KCiMgY29udmVydCBjb3VudHMgdG8gYSBmYWN0b3IKY29udmVydF9jb3VudHMgPC0gZnVuY3Rpb24oeCkgewogIHggPC0gaWZlbHNlKHggPiAwLCAiWWVzIiwgIk5vIikKfQoKIyBhcHBseSgpIGNvbnZlcnRfY291bnRzKCkgdG8gY29sdW1ucyBvZiB0cmFpbi90ZXN0IGRhdGEKc21zX3RyYWluIDwtIGFwcGx5KHNtc19kdG1fZnJlcV90cmFpbiwgTUFSR0lOID0gMiwgY29udmVydF9jb3VudHMpCnNtc190ZXN0ICA8LSBhcHBseShzbXNfZHRtX2ZyZXFfdGVzdCwgTUFSR0lOID0gMiwgY29udmVydF9jb3VudHMpCmBgYAoKIyMgU3RlcCAzOiBUcmFpbmluZyBhIG1vZGVsIG9uIHRoZSBkYXRhIC0tLS0KCmBgYHtyfQpsaWJyYXJ5KGUxMDcxKQpzbXNfY2xhc3NpZmllciA8LSBuYWl2ZUJheWVzKHNtc190cmFpbiwgc21zX3RyYWluX2xhYmVscykKCmBgYAoKIyMgU3RlcCA0OiBFdmFsdWF0aW5nIG1vZGVsIHBlcmZvcm1hbmNlIC0tLS0KCmBgYHtyfQpzbXNfdGVzdF9wcmVkIDwtIHByZWRpY3Qoc21zX2NsYXNzaWZpZXIsIHNtc190ZXN0KQoKaGVhZChzbXNfdGVzdF9wcmVkKQoKbGlicmFyeShnbW9kZWxzKQpDcm9zc1RhYmxlKHNtc190ZXN0X3ByZWQsIHNtc190ZXN0X2xhYmVscywKICAgICAgICAgICBwcm9wLmNoaXNxID0gRkFMU0UsIHByb3AudCA9IEZBTFNFLCBwcm9wLnIgPSBGQUxTRSwKICAgICAgICAgICBkbm4gPSBjKCdwcmVkaWN0ZWQnLCAnYWN0dWFsJykpCmBgYAoKIyMgU3RlcCA1OiBJbXByb3ZpbmcgbW9kZWwgcGVyZm9ybWFuY2UgLS0tLQoKYGBge3J9CnNtc19jbGFzc2lmaWVyMiA8LSBuYWl2ZUJheWVzKHNtc190cmFpbiwgc21zX3RyYWluX2xhYmVscywgbGFwbGFjZSA9IDEpCnNtc190ZXN0X3ByZWQyIDwtIHByZWRpY3Qoc21zX2NsYXNzaWZpZXIyLCBzbXNfdGVzdCkKQ3Jvc3NUYWJsZShzbXNfdGVzdF9wcmVkMiwgc21zX3Rlc3RfbGFiZWxzLAogICAgICAgICAgIHByb3AuY2hpc3EgPSBGQUxTRSwgcHJvcC50ID0gRkFMU0UsIHByb3AuciA9IEZBTFNFLAogICAgICAgICAgIGRubiA9IGMoJ3ByZWRpY3RlZCcsICdhY3R1YWwnKSkKYGBgCgoKCg==