-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrunkeeper.R
101 lines (77 loc) · 2.97 KB
/
runkeeper.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
source('run.R')
source('google.R')
library(rjson)
library(XML)
library(utils)
library(RCurl)
library(plyr)
snooze <- function(sleepSec) if (sleepSec>0) Sys.sleep(sleepSec)
findRoutes <- function(location="Nashville, TN",sleep=1,maxRoutes=NA){
runRoot <- 'http://runkeeper.com'
runURL <- 'http://runkeeper.com/search/routes'
adply(location,.margins=1,.fun=function(loc){
latlong <- gGeoCode(loc)
search_form <-
list(
activityType='RUN',
location=loc,
lat=latlong[1],
lon=latlong[2]
)
x = htmlParse(postForm(runURL,.params=search_form),asText=TRUE)
link_nodes <- getNodeSet(x,'//div[@class="resultListItem clearfix"]',fun=xmlAttrs)
if (length(link_nodes) == 0) return()
links <- unlist(lapply(link_nodes,function(i) i['link']))
titles <- unlist(getNodeSet(x,'//div[@class="resultListItem clearfix"]/div/h4[@class="resultTitle"]',fun=xmlValue))
foundMaxRoutes <- function(){
if (!is.na(maxRoutes)){
maxRoutes <<- maxRoutes - length(link_nodes)
return(ifelse(maxRoutes <= 0,TRUE,FALSE))
} else {
return(FALSE)
}
}
build_answer <- function(){
data.frame(
location=loc,
name=titles,
link=paste(runRoot,links,sep=''),
stringsAsFactors=FALSE
)
}
if (foundMaxRoutes()) return(build_answer())
next_link_node <- getNodeSet(x,'//a[@class="nextLink"]',fun=xmlAttrs)
if (length(next_link_node) == 0) return()
next_rel_link <- next_link_node[[1]]['href']
snooze(sleep)
while (isTRUE(nzchar(next_rel_link))) {
next_link <- paste(runRoot,next_rel_link,sep='')
x = htmlParse(getURL(next_link),asText=TRUE)
link_nodes <- getNodeSet(x,'//div[@class="resultListItem clearfix"]',fun=xmlAttrs)
if (length(link_nodes) == 0) return(build_answer())
links <- c(links,unlist(lapply(link_nodes,function(i) i['link'])))
titles <- c(titles,unlist(getNodeSet(x,'//div[@class="resultListItem clearfix"]/div/h4[@class="resultTitle"]',fun=xmlValue)))
if (foundMaxRoutes()) return(build_answer())
next_link_node <- getNodeSet(x,'//a[@class="nextLink"]',fun=xmlAttrs)
if (length(next_link_node) == 0) return(build_answer())
next_rel_link <- next_link_node[[1]]['href']
}
build_answer()
})
}
getRoute <- function(link=''){
if (!is.data.frame(link)){
link <- data.frame(location='Unknown',name='Unknown',link=link)
}
route <- function(piece,...){
x <- htmlParse(getURL(piece$link),asText=TRUE)
x <- getNodeSet(x,'//div[@class="mainColumnPadding"]/script',fun=xmlValue)[[1]]
payload <- try(strsplit(x,split="\n")[[1]][2])
if (inherits(payload,'try-error'))
return(data.frame(lat=x,lon=NULL,stringsAsFactors=FALSE))
x <- sub('^\\s*var\\s+routePoints\\s+=\\s+','',payload)
x <- sub(';$','',x)
ldply(fromJSON(x),function(i)data.frame(lat=i$latitude,lon=i$longitude,stringsAsFactors=FALSE) )
}
ddply( link, .(location,name),.fun=route)
}