Course Project

"+y.value+"

";t=p.firstChild.firstChild;p.firstChild.cN=s.cN;s.parentNode.replaceChild(p.firstChild,s)}else{t.innerHTML=y.value}t.className=u;t.result={language:v,kw:y.keyword_count,re:y.r};if(y.second_best){t.second_best={language:y.second_best.language,kw:y.second_best.keyword_count,re:y.second_best.r}}}function o(){if(o.called){return}o.called=true;var r=document.getElementsByTagName("pre");for(var p=0;p|>=|>>|>>=|>>>|>>>=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~";this.ER="(?![\\s\\S])";this.BE={b:"\\\\.",r:0};this.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[this.BE],r:0};this.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[this.BE],r:0};this.CLCM={cN:"comment",b:"//",e:"$"};this.CBLCLM={cN:"comment",b:"/\\*",e:"\\*/"};this.HCM={cN:"comment",b:"#",e:"$"};this.NM={cN:"number",b:this.NR,r:0};this.CNM={cN:"number",b:this.CNR,r:0};this.BNM={cN:"number",b:this.BNR,r:0};this.inherit=function(r,s){var p={};for(var q in r){p[q]=r[q]}if(s){for(var q in s){p[q]=s[q]}}return p}}();hljs.LANGUAGES.cpp=function(){var a={keyword:{"false":1,"int":1,"float":1,"while":1,"private":1,"char":1,"catch":1,"export":1,virtual:1,operator:2,sizeof:2,dynamic_cast:2,typedef:2,const_cast:2,"const":1,struct:1,"for":1,static_cast:2,union:1,namespace:1,unsigned:1,"long":1,"throw":1,"volatile":2,"static":1,"protected":1,bool:1,template:1,mutable:1,"if":1,"public":1,friend:2,"do":1,"return":1,"goto":1,auto:1,"void":2,"enum":1,"else":1,"break":1,"new":1,extern:1,using:1,"true":1,"class":1,asm:1,"case":1,typeid:1,"short":1,reinterpret_cast:2,"default":1,"double":1,register:1,explicit:1,signed:1,typename:1,"try":1,"this":1,"switch":1,"continue":1,wchar_t:1,inline:1,"delete":1,alignof:1,char16_t:1,char32_t:1,constexpr:1,decltype:1,noexcept:1,nullptr:1,static_assert:1,thread_local:1,restrict:1,_Bool:1,complex:1},built_in:{std:1,string:1,cin:1,cout:1,cerr:1,clog:1,stringstream:1,istringstream:1,ostringstream:1,auto_ptr:1,deque:1,list:1,queue:1,stack:1,vector:1,map:1,set:1,bitset:1,multiset:1,multimap:1,unordered_set:1,unordered_map:1,unordered_multiset:1,unordered_multimap:1,array:1,shared_ptr:1}};return{dM:{k:a,i:"",k:a,r:10,c:["self"]}]}}}();hljs.LANGUAGES.r={dM:{c:[hljs.HCM,{cN:"number",b:"\\b0[xX][0-9a-fA-F]+[Li]?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+(?:[eE][+\\-]?\\d*)?L\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+\\.(?!\\d)(?:i\\b)?",e:hljs.IMMEDIATE_RE,r:1},{cN:"number",b:"\\b\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\.\\d+(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"keyword",b:"(?:tryCatch|library|setGeneric|setGroupGeneric)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\.",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\d+(?![\\w.])",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\b(?:function)",e:hljs.IMMEDIATE_RE,r:2},{cN:"keyword",b:"(?:if|in|break|next|repeat|else|for|return|switch|while|try|stop|warning|require|attach|detach|source|setMethod|setClass)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"literal",b:"(?:NA|NA_integer_|NA_real_|NA_character_|NA_complex_)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"literal",b:"(?:NULL|TRUE|FALSE|T|F|Inf|NaN)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"identifier",b:"[a-zA-Z.][a-zA-Z0-9._]*\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"<\\-(?!\\s*\\d)",e:hljs.IMMEDIATE_RE,r:2},{cN:"operator",b:"\\->|<\\-",e:hljs.IMMEDIATE_RE,r:1},{cN:"operator",b:"%%|~",e:hljs.IMMEDIATE_RE},{cN:"operator",b:">=|<=|==|!=|\\|\\||&&|=|\\+|\\-|\\*|/|\\^|>|<|!|&|\\||\\$|:",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"%",e:"%",i:"\\n",r:1},{cN:"identifier",b:"`",e:"`",r:0},{cN:"string",b:'"',e:'"',c:[hljs.BE],r:0},{cN:"string",b:"'",e:"'",c:[hljs.BE],r:0},{cN:"paren",b:"[[({\\])}]",e:hljs.IMMEDIATE_RE,r:0}]}};

hljs.initHighlightingOnLoad(); </script>

Course Project

Weight Lifting Classe Prediction

Yu Fang, 06/2014

Initialization

library(caret)

## Loading required package: lattice
## Loading required package: ggplot2

Read data from csv files

training=data.frame(read.csv(file="/Volumes/WATERMELON/Study/practicalMachinLearning/pml-training.csv",head=TRUE,sep=","))
testing=data.frame(read.csv(file="/Volumes/WATERMELON/Study/practicalMachinLearning/pml-testing.csv",head=TRUE,sep=","))

Data cleaning and preprocessing

#Exclude the columns containing blank or NA, this step leaves 60 columns of each data set.
training[training==""]=NA
training=training[,colSums(is.na(training))==0]
testing[testing==""]=NA
testing=testing[,colSums(is.na(testing))==0]

#Use "nearZeroVar" function to detect zero covariates. 
nsv=nearZeroVar(training,saveMetrics=TRUE)

#For variable "new_window", nzv=TRUE. In training set, "new_window" has 19216 "no" and 406 "yes"; in testing set, "new_window" are all "no". So remove samples with new_window=="yes" in training set.
useTrain=training[ which(training$new_window=="no"),]

#Exclude some other variables that obviously not relevant to class prediction:"X","cvtd_timestamp","raw_timestamp_part_1","raw_timestamp_part_2", and also from last section, "new_window"
excludeVars=names(useTrain) %in% c("X","cvtd_timestamp","raw_timestamp_part_1","raw_timestamp_part_2","new_window")
useTrain=useTrain[!excludeVars]
useTest=testing[!excludeVars]

#Considering there might be variance between subjects, dummy code the "user_name"
dummies1=dummyVars(classe ~ user_name,data=useTrain)
dummycols1=predict(dummies1,newdata=useTrain)
useTrain=cbind(dummycols1,useTrain)

dummies2=dummyVars(problem_id ~ user_name,data=useTest)
dummycols2=predict(dummies2,newdata=useTest)
useTest=cbind(dummycols2,useTest)

excludeVar=names(useTrain) %in% c("user_name")
useTrain=useTrain[!excludeVar]
useTest=useTest[!excludeVar]

#Standardize the data, and use PCA to further pick the principle components explaining 95% of the variance in the predictors. 
preProc=preProcess(useTrain[,-60],method=c("center","scale","pca"),thresh=0.95)
trainPC=predict(preProc,useTrain[,-60])
# It results in 25 principle components.
preProc

## 
## Call:
## preProcess.default(x = useTrain[, -60], method = c("center",
##  "scale", "pca"), thresh = 0.95)
## 
## Created from 19216 samples and 59 variables
## Pre-processing: centered, scaled, principal component signal extraction 
## 
## PCA needed 25 components to capture 95 percent of the variance

Fit the model, and do prediction.

#Make 10-fold cross validation
tc=trainControl("cv",10,savePred=T)

#As far as I understand, this case is to determine the quality of weight lifting by various measured parameters. So it is likely to be a tree-like selection rather than a linear regression model. Also higher accuracy is desired. Therefore random forest model is selected.
rfModelFit=train(useTrain$classe ~ .,method="rf",data=trainPC,trControl=tc)

## Loading required package: randomForest
## randomForest 4.6-7
## Type rfNews() to see new features/changes/bug fixes.

#The fit model results summary
rfModelFit

## Random Forest 
## 
## 19216 samples
##    24 predictors
##     5 classes: 'A', 'B', 'C', 'D', 'E' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## 
## Summary of sample sizes: 17296, 17293, 17295, 17296, 17293, 17294, ... 
## 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy  Kappa  Accuracy SD  Kappa SD
##   2     1         1      0.002        0.002   
##   10    1         1      0.003        0.003   
##   20    1         1      0.004        0.005   
## 
## Accuracy was used to select the optimal model using  the largest value.
## The final value used for the model was mtry = 2.

Predict the testing data

testPC=predict(preProc,useTest[,-60])
pred=predict(rfModelFit,testPC)
# The predict results on testing data
pred

##  [1] B A C A A E D B A A B C B A E E A B B B
## Levels: A B C D E

Name		Name	Last commit message	Last commit date
Latest commit History 8 Commits
fonts		fonts
images		images
javascripts		javascripts
stylesheets		stylesheets
Project.Rmd		Project.Rmd
ProjectWebpage.html		ProjectWebpage.html
README.md		README.md
index.html		index.html
params.json		params.json

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

Course Project

About

Releases

Packages

yfangumich/PracticalMachineLearning

Folders and files

Latest commit

History

Repository files navigation

Course Project

About

Resources

Stars

Watchers

Forks

Releases

Packages 0

Packages