-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmpg.R
57 lines (39 loc) · 3.01 KB
/
mpg.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
library(tidyverse)
library(datasets)
# Rename Columns for my ease. f_e = fuel efficiency
?mpg
col_new_names <- c("car","model","engine_displacement","year","cylinders","transmission","drv","f_e_city","f_e_highway","fuel_type","car_type")
colnames(mpg) <- col_new_names
View(mpg)
# Relationship btn displacement & fuel efficiency on the highway
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway))
?geom_point
# Aesthetics
# Add a third dimension(color/size) to the scatter plot using other present variables in mpg
# Discrete variables/values is best represented with alpha aesthetic, visualising using transparency/?
# Shape aeshetic can accomodate at most 6 levels of observations
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway), color = 'brown', stroke = 2)
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway, color = car_type))
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway, color = fuel_type))
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway, color = car_type, alpha = fuel_type))
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway, color = car_type, shape = fuel_type))
# Facets works best with categorical variables, creating subplots based on the various categories of the observations
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway, color = fuel_type)) + facet_wrap(~car_type, nrow = 2)
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway, color = fuel_type)) + facet_wrap(~transmission)
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway, color = fuel_type)) + facet_wrap(~drv)
# Combine two categorical variables using facet_grid(variable plotted on row ~ variable plotted column)
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway, color = fuel_type)) + facet_grid(drv ~ cylinders)
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway, color = car_type)) + facet_grid(cylinders ~ .)
# Smooth geom()
# linetype aesthetic does not support many variables. group aesthetic doesn't have a legend
ggplot(mpg) + geom_smooth(aes(x = engine_displacement, y = f_e_highway))
ggplot(mpg) + geom_smooth(aes(x = engine_displacement, y = f_e_city, linetype = drv))
ggplot(mpg) + geom_smooth(aes(x= engine_displacement, y = f_e_city, group = drv))
ggplot(mpg) + geom_smooth(aes(x= engine_displacement, y = f_e_city, color = drv), show.legend = FALSE)
# Using multiple geoms in one plot
ggplot(mpg, aes(x = engine_displacement, y = f_e_city)) + geom_point(aes(color = drv)) + geom_smooth()
ggplot(mpg, aes(x = engine_displacement, y = f_e_city, color = drv)) + geom_point() + geom_smooth(se = FALSE)
# To 'solve the problem of overplotting' whereby the scatter plots are somehow overlapping, one uses position = 'jitter'
# Box Plot - Visualse the quartiles, median, lower and higher extremes
ggplot(mpg) + geom_point(aes(x = engine_displacement, y = f_e_highway), position = 'jitter')
ggplot(mpg) + geom_boxplot(aes(x = drv, y = f_e_highway))