-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdescriptive_statistics.R
58 lines (45 loc) · 1.57 KB
/
descriptive_statistics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
library(moments)
movies <- read.csv("/home/emu/R Projects/Exploratory Data Analysis with R/Movies.csv",
header = TRUE,
sep = ",")
genres <- read.csv("/home/emu/R Projects/Exploratory Data Analysis with R/Genres.csv",
header = TRUE,
sep = ",")
head(movies)
head(genres)
# analyze univariate qualitative variable [Location, Spread, Shape]
table(movies$Rating)
table(genres$Genre)
## Analyze the location of quanlitative variable
mean(movies$Runtime)
median(movies$Runtime)
which.max(table(movies$Runtime))
## Analyze the Spread of quanlitative variable
min(movies$Runtime)
max(movies$Runtime)
range(movies$Runtime)
diff(range(movies$Runtime))
quantile(movies$Runtime)
quantile(movies$Runtime, 0.35)
IQR(movies$Runtime) # Inter Quantile Range
var(movies$Runtime)
sd(movies$Runtime)
## Analyze the Shape of quanlitative variable
skewness(movies$Runtime)
kurtosis(movies$Runtime)
plot(density(movies$Runtime))
summary(movies$Runtime)
# Bivariate statistics for qualitative variable
table(genres$Genre, genres$Rating)
# Bivariate statistics for two qualitative variable
## Covariance
cov(movies$Runtime, movies$Box.Office)
cov(movies$Critic.Score, movies$Box.Office)
## Correlation Coefficients [Runtime much co-related with BoxOffice]
cor(movies$Runtime, movies$Box.Office)
cor(movies$Critic.Score, movies$Box.Office)
# Bivariate statistics for both a qualitative variable and a quantative variable
tapply(movies$Box.Office, movies$Rating, mean)
tapply(genres$Box.Office, genres$Genre, mean)
# WHOLE SUMMARY
summary(movies)