-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSVA Analysis.R
66 lines (48 loc) · 1.99 KB
/
SVA Analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
library(plyr)
library(psych)
####### ---------------------------------
####### Data Prep
####### ---------------------------------
#load ratings data
sva <- read.csv("http://samswift.org/data/SVA-data-2012-03-21.csv", stringsAsFactors=F)
#set empty strings to NA
sva[sva==""] <- NA
#remove uncessary columns
sva <- sva[,!names(sva) %in% c("V1","V2","V3","V4","V5","V7","V10")]
#rename unnamed columns
sva <- rename(sva, c("V6"="IP","V8"="StartTime","V9"="EndTime","X186k_miles"="186k_miles","X190_2.3"="model_190_2.3","X1991"="year_1991"))
#add simple IDs for each record
sva$ID <- factor(c(1:nrow(sva)))
ratings <- as.data.frame(sapply(sva[4:27],mean))
names(ratings) <- c("mean")
ratings$code <- rownames(ratings)
#combine summer_tires and winter_tires into seasonal_tires
M_seasonsal_tires <- mean(c(ratings[["summer_tires","mean"]],ratings[["winter_tires","mean"]]))
ratings[nrow(ratings)+1,] <- c(M_seasonsal_tires,"seasonal_tires")
#load text mapping table
sva.tmap <- read.csv("http://samswift.org/data/SVA-text-mapping-2012-04-03.csv",stringsAsFactors=F)
sva.tmap$X <-NULL
sva.tmap$words <- apply(sva.tmap, 1, function(row){paste(row[4:ncol(sva.tmap)],collapse=",")})
sva.tmap$words <- gsub(",,","",sva.tmap$words)
#merge mapped values and ratings
sva <- merge(sva.tmap[1:4], ratings, all.x=T, all.y=T)
#create word-value df
sva.rated <- subset(sva,rated=1)
wordValueMap <- data.frame(matrix(ncol = 3))
names(wordValueMap) <- c("word","code","rating")
for(i in 1:nrow(sva.rated)){
code <- sva.rated[i,"code"]
rating <-sva.rated[[i,"mean"]]
words <- sva.rated[i,"words"]
wordList <- strsplit(words,",")
for(word in wordList){
row <- data.frame(word,code,rating)
names(row) <- c("word","code","rating")
wordValueMap <- rbind(wordValueMap,row)
}
}
wordValueMap <- wordValueMap[!is.na(wordValueMap$word),]
wordValueMap$rating <- as.numeric(wordValueMap$rating)
row.names(wordValueMap) <- wordValueMap$word
#clean up local variables
rm(row,code,i,word,wordList,words,rating,M_seasonsal_tires)