library(pacman)
p_load(Boruta, mlbench)
data(Ozone)

head(Ozone)

Note that the target variable is V4 = Daily maximum one-hour-average ozone reading

Ozone <- Ozone %>% mutate(
  V1 = as.integer(V1),
  V2 = as.integer(V2),
  V3 = as.integer(V3)
)

head(Ozone)
Ozone2 <- Ozone %>% clean_names()

head(Ozone2)

It is always a good idea to check for duplicate records/examples/rows in your dataset.

get_dupes(Ozone2)
No variable names specified - using all columns.

No duplicate combinations found of: v1, v2, v3, v4, v5, v6, v7, v8, v9, ... and 4 other variables

Start by investigating the missing values and completeness of the features in the data. Note that the age variable contains some missing values.

vis_miss(Ozone2)

gg_miss_var(Ozone2)

gg_miss_var(Ozone2, show_pct = TRUE)

create_report(Ozone2, y = "v4", output_file = "report_Ozone.html", output_dir = getwd())
Ozone2 <- Ozone2 %>% select(-v9)

vis_miss(Ozone2)

gg_miss_var(Ozone2)

gg_miss_var(Ozone2, show_pct = TRUE)

Ozone2 <- drop_na(Ozone2)

vis_miss(Ozone2)

gg_miss_var(Ozone2)

gg_miss_var(Ozone2, show_pct = TRUE)

Boruta.Ozone <- Boruta(v4 ~ ., data = Ozone2, doTrace = 2, ntree = 500)
 1. run of importance source...
 2. run of importance source...
 3. run of importance source...
 4. run of importance source...
 5. run of importance source...
 6. run of importance source...
 7. run of importance source...
 8. run of importance source...
 9. run of importance source...
 10. run of importance source...
 11. run of importance source...
After 11 iterations, +0.48 secs: 
 confirmed 8 attributes: v1, v10, v11, v12, v13 and 3 more;
 still have 3 attributes left.

 12. run of importance source...
 13. run of importance source...
 14. run of importance source...
 15. run of importance source...
After 15 iterations, +0.65 secs: 
 rejected 1 attribute: v3;
 still have 2 attributes left.

 16. run of importance source...
 17. run of importance source...
 18. run of importance source...
 19. run of importance source...
 20. run of importance source...
 21. run of importance source...
 22. run of importance source...
 23. run of importance source...
 24. run of importance source...
 25. run of importance source...
 26. run of importance source...
 27. run of importance source...
 28. run of importance source...
 29. run of importance source...
 30. run of importance source...
 31. run of importance source...
 32. run of importance source...
 33. run of importance source...
 34. run of importance source...
 35. run of importance source...
 36. run of importance source...
 37. run of importance source...
 38. run of importance source...
 39. run of importance source...
 40. run of importance source...
 41. run of importance source...
 42. run of importance source...
 43. run of importance source...
 44. run of importance source...
 45. run of importance source...
 46. run of importance source...
 47. run of importance source...
 48. run of importance source...
 49. run of importance source...
 50. run of importance source...
 51. run of importance source...
 52. run of importance source...
 53. run of importance source...
 54. run of importance source...
 55. run of importance source...
 56. run of importance source...
 57. run of importance source...
 58. run of importance source...
 59. run of importance source...
 60. run of importance source...
 61. run of importance source...
 62. run of importance source...
 63. run of importance source...
 64. run of importance source...
 65. run of importance source...
 66. run of importance source...
 67. run of importance source...
 68. run of importance source...
 69. run of importance source...
 70. run of importance source...
 71. run of importance source...
 72. run of importance source...
 73. run of importance source...
 74. run of importance source...
 75. run of importance source...
 76. run of importance source...
 77. run of importance source...
 78. run of importance source...
 79. run of importance source...
 80. run of importance source...
 81. run of importance source...
 82. run of importance source...
 83. run of importance source...
 84. run of importance source...
 85. run of importance source...
 86. run of importance source...
 87. run of importance source...
 88. run of importance source...
 89. run of importance source...
 90. run of importance source...
 91. run of importance source...
 92. run of importance source...
 93. run of importance source...
 94. run of importance source...
 95. run of importance source...
 96. run of importance source...
 97. run of importance source...
 98. run of importance source...
 99. run of importance source...
Boruta.Ozone
Boruta performed 99 iterations in 4.12301 secs.
 8 attributes confirmed important: v1, v10, v11, v12,
v13 and 3 more;
 1 attributes confirmed unimportant: v3;
 2 tentative attributes left: v2, v6;
plot(Boruta.Ozone)

Boruta.Short <- Boruta(v4 ~ ., data = Ozone2, maxRuns = 12)
Boruta.Short 
Boruta performed 11 iterations in 0.4473038 secs.
 8 attributes confirmed important: v1, v10, v11, v12,
v13 and 3 more;
 1 attributes confirmed unimportant: v3;
 2 tentative attributes left: v2, v6;
plot(Boruta.Short)

TentativeRoughFix(Boruta.Short)
Boruta performed 11 iterations in 0.4473038 secs.
Tentatives roughfixed over the last 11 iterations.
 8 attributes confirmed important: v1, v10, v11, v12,
v13 and 3 more;
 3 attributes confirmed unimportant: v2, v3, v6;
getConfirmedFormula(Boruta.Ozone)
v4 ~ v1 + v5 + v7 + v8 + v10 + v11 + v12 + v13
<environment: 0x55daa7bd5b58>
attStats(Boruta.Ozone)
plotImpHistory(Boruta.Ozone)

LS0tCnRpdGxlOiAiU3RhdCA2NTI6IEZpbmFsIgphdXRob3I6ICJQcm9mLiBFcmljIEEuIFN1ZXNzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgoKCmBgYHtyfQpsaWJyYXJ5KHBhY21hbikKcF9sb2FkKEJvcnV0YSwgbWxiZW5jaCkKYGBgCgpgYGB7cn0KZGF0YShPem9uZSkKCmhlYWQoT3pvbmUpCmBgYAoKTm90ZSB0aGF0IHRoZSB0YXJnZXQgdmFyaWFibGUgaXMgVjQgPSBEYWlseSBtYXhpbXVtIG9uZS1ob3VyLWF2ZXJhZ2Ugb3pvbmUgcmVhZGluZwoKYGBge3J9Ck96b25lIDwtIE96b25lICU+JSBtdXRhdGUoCiAgVjEgPSBhcy5pbnRlZ2VyKFYxKSwKICBWMiA9IGFzLmludGVnZXIoVjIpLAogIFYzID0gYXMuaW50ZWdlcihWMykKKQoKaGVhZChPem9uZSkKYGBgCgpgYGB7cn0KT3pvbmUyIDwtIE96b25lICU+JSBjbGVhbl9uYW1lcygpCgpoZWFkKE96b25lMikKYGBgCgpJdCBpcyBhbHdheXMgYSBnb29kIGlkZWEgdG8gY2hlY2sgZm9yIGR1cGxpY2F0ZSByZWNvcmRzL2V4YW1wbGVzL3Jvd3MgaW4geW91ciBkYXRhc2V0LgoKYGBge3J9CmdldF9kdXBlcyhPem9uZTIpCmBgYAoKU3RhcnQgYnkgaW52ZXN0aWdhdGluZyB0aGUgbWlzc2luZyB2YWx1ZXMgYW5kIGNvbXBsZXRlbmVzcyBvZiB0aGUgZmVhdHVyZXMgaW4gdGhlIGRhdGEuICBOb3RlIHRoYXQgdGhlICphZ2UqIHZhcmlhYmxlIGNvbnRhaW5zIHNvbWUgbWlzc2luZyB2YWx1ZXMuCgpgYGB7cn0KdmlzX21pc3MoT3pvbmUyKQpnZ19taXNzX3ZhcihPem9uZTIpCmdnX21pc3NfdmFyKE96b25lMiwgc2hvd19wY3QgPSBUUlVFKQpgYGAKCmBgYHtyIGV2YWwgPSBGQUxTRX0KY3JlYXRlX3JlcG9ydChPem9uZTIsIHkgPSAidjQiLCBvdXRwdXRfZmlsZSA9ICJyZXBvcnRfT3pvbmUuaHRtbCIsIG91dHB1dF9kaXIgPSBnZXR3ZCgpKQpgYGAKCmBgYHtyfQpPem9uZTIgPC0gT3pvbmUyICU+JSBzZWxlY3QoLXY5KQoKdmlzX21pc3MoT3pvbmUyKQpnZ19taXNzX3ZhcihPem9uZTIpCmdnX21pc3NfdmFyKE96b25lMiwgc2hvd19wY3QgPSBUUlVFKQpgYGAKCmBgYHtyfQpPem9uZTIgPC0gZHJvcF9uYShPem9uZTIpCgp2aXNfbWlzcyhPem9uZTIpCmdnX21pc3NfdmFyKE96b25lMikKZ2dfbWlzc192YXIoT3pvbmUyLCBzaG93X3BjdCA9IFRSVUUpCmBgYAoKCmBgYHtyfQpCb3J1dGEuT3pvbmUgPC0gQm9ydXRhKHY0IH4gLiwgZGF0YSA9IE96b25lMiwgZG9UcmFjZSA9IDIsIG50cmVlID0gNTAwKQpgYGAKCmBgYHtyfQpCb3J1dGEuT3pvbmUKYGBgCgpgYGB7cn0KcGxvdChCb3J1dGEuT3pvbmUpCmBgYAoKCmBgYHtyfQpCb3J1dGEuU2hvcnQgPC0gQm9ydXRhKHY0IH4gLiwgZGF0YSA9IE96b25lMiwgbWF4UnVucyA9IDEyKQpgYGAKCmBgYHtyfQpCb3J1dGEuU2hvcnQgCmBgYAoKYGBge3J9CnBsb3QoQm9ydXRhLlNob3J0KQpgYGAKCmBgYHtyfQpUZW50YXRpdmVSb3VnaEZpeChCb3J1dGEuU2hvcnQpCmBgYAoKYGBge3J9CmdldENvbmZpcm1lZEZvcm11bGEoQm9ydXRhLk96b25lKQpgYGAKCmBgYHtyfQphdHRTdGF0cyhCb3J1dGEuT3pvbmUpCmBgYAoKYGBge3J9CnBsb3RJbXBIaXN0b3J5KEJvcnV0YS5Pem9uZSkKYGBgCgoK