-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjob.R
113 lines (85 loc) · 3.17 KB
/
job.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# Packages needed
library(tabulizer)
library(tidyverse)
library(lubridate)
library(jsonlite)
# Source file
pdf <- "https://www.ppac.gov.in/WriteReadData/userfiles/file/PP_9_a_DailyPriceMSHSD_Metro.pdf"
# Read csv files
petrol_file = "./data/Petrol.csv"
diesel_file = "./data/Diesel.csv"
read_csv <- function(csv_file) {
# Process repeated as above
oil <- read.csv(csv_file, sep = ",",
fileEncoding="utf-8")
oil$Date <- parse_date_time(oil$Date, orders = c('dmy', 'ydm','ymd'))
return (oil)
}
modify_df <- function(fuel_today) {
PD <- fuel_today %>%
mutate_all(type.convert) %>%
mutate_if(is.factor, as.character) %>% # Converted to character
mutate(across(where(is.character), str_trim))%>%
mutate(map_df(fuel_today, ~ gsub('\\s+', '', .x))) %>% # Remove unwanted characters
`colnames<-`(c("Date","Delhi","Mumbai","Chennai","Kolkata")) %>%
mutate_at(vars("Delhi","Mumbai","Chennai","Kolkata"), as.numeric)
PD$Date <- parse_date_time(PD$Date, orders = c('dmy', 'ydm','ymd'))
return (PD)
}
fuel_update <- function(item, pdf) {
# Extract first few rows of table
petrol_diesel <- extract_tables(pdf,
output = "matrix",
pages = c(1,1),
area = list(
c(164.246,72.158,244.977,292.080),
c(164.246,310.175,244.977,537.058)),
guess = FALSE,
)
if (item == 'petrol') {
# Petrol price
fuel_td <- as.data.frame(petrol_diesel[[1]]) # As data.frame
Pt <- modify_df(fuel_td)
petrol <- read_csv(petrol_file)
petrole <- rbind(petrol,Pt) %>%
distinct()
petrole <-petrole[rev(order(as.Date(petrole$Date, format = "%d-%B-%y"))),]
return (petrole)
}
else if (item == 'diesel') {
fuel_td <- as.data.frame(petrol_diesel[[2]])
dt <- modify_df(fuel_td)
diesel <- read_csv(diesel_file)
diesele <- rbind(diesel, dt) %>% # Rbind to append scrapped data
distinct()
diesele <-diesele[rev(order(as.Date(diesele$Date, format = "%d-%B-%y"))),]
return (diesele)
}
else {
print("Invalid Item. Please use 'petrol' or 'diesel'.")
}
}
write_output <- function(item, format, param) {
if (item == 'petrol' || item == 'diesel') {
if (format == 'csv') {
fname <- paste("./data/", str_to_title(item),".csv", sep="")
write.csv(param,fname, quote=F,row.names=FALSE)
}
else if (format == 'json') {
fname <- paste("./data/", str_to_title(item),".json", sep="")
djson <- write_json(param, path=fname)
}
else {
print("Invalid format. Use 'csv' or 'json' for output.")
}
}
else {
stop("Invalid item. Please use 'petrol' or 'diesel'.")
}
}
pet <- fuel_update('petrol', pdf)
des <- fuel_update('diesel', pdf)
write_output('petrol', 'csv', pet)
write_output('petrol', 'json', pet)
write_output('diesel', 'csv', des)
write_output('diesel', 'json', des)