-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 7768530
Showing
6 changed files
with
488,783 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
.vscode/ | ||
out.txt | ||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import datetime | ||
|
||
FORMAT = "%Y-%m-%d %H:%M:%S.%f" | ||
|
||
def parsedate(x): | ||
if not isinstance(x, datetime.datetime): | ||
x = datetime.datetime.strptime(x, FORMAT) | ||
return x | ||
|
||
def tsdiff(x, y): | ||
return (parsedate(x) - parsedate(y)).total_seconds() | ||
|
||
def tsadd(x, seconds): | ||
d = datetime.timedelta(seconds=seconds) | ||
nd = parsedate(x) + d | ||
return nd.strftime(FORMAT) | ||
|
||
if __name__ == "__main__": | ||
testd1 = "2012-01-01 23:32:38.000" | ||
testd2 = "2012-12-01 03:33:38.000" | ||
|
||
testd1 = datetime.datetime.strptime(testd1, FORMAT) | ||
|
||
for i in range(23000): | ||
a = tsdiff(testd1, testd2) | ||
b = tsadd(testd1, -122) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
DATUM;IME_PRAZNIKA;DAN_V_TEDNU;DELA_PROST_DAN;DAN;MESEC;LETO | ||
1.01.2012;novo leto;nedelja;da;1;1;2012 | ||
2.01.2012;novo leto;ponedeljek;da;2;1;2012 | ||
8.02.2012;Prešernov dan, slovenski kulturni praznik;sreda;da;8;2;2012 | ||
8.04.2012;velika noč;nedelja;da;8;4;2012 | ||
9.04.2012;velikonočni ponedeljek;ponedeljek;da;9;4;2012 | ||
27.04.2012;dan boja proti okupatorju ;petek;da;27;4;2012 | ||
1.05.2012;praznik dela;torek;da;1;5;2012 | ||
2.05.2012;praznik dela;sreda;da;2;5;2012 | ||
27.05.2012;binkoštna nedelja;nedelja;da;27;5;2012 | ||
8.06.2012;dan Primoža Trubarja;petek;ne;8;6;2012 | ||
25.06.2012;dan državnosti;ponedeljek;da;25;6;2012 | ||
15.08.2012;Marijino vnebovzetje;sreda;da;15;8;2012 | ||
17.08.2012;združitev prekmurskih Slovencev z matičnim narodom;petek;ne ;17;8;2012 | ||
15.09.2012;vrnitev Primorske k matični domovini;sobota;ne;15;9;2012 | ||
31.10.2012;dan reformacije;sreda;da;31;10;2012 | ||
1.11.2012;dan spomina na mrtve;četrtek;da;1;11;2012 | ||
23.11.2012;dan Rudolfa Maistra;petek;ne;23;11;2012 | ||
25.12.2012;božič;torek;da;25;12;2012 | ||
26.12.2012;dan samostojnosti in enotnosti;sreda;da;26;12;2012 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
from calendar import weekday | ||
from datetime import datetime, time | ||
import numpy as np | ||
import pandas as pd | ||
import linear | ||
import lpputils | ||
|
||
def read_data(filename, sep='\t'): | ||
""" | ||
Reads the data from the given file. | ||
""" | ||
return pd.read_csv(filename, sep=sep) | ||
|
||
def add_day_of_week(data): | ||
""" | ||
Adds the day of the week to the data represented as int. | ||
""" | ||
rows = pd.to_datetime(data['Departure time']) | ||
days = np.zeros((len(rows), 7)) | ||
for index, row in enumerate(rows): | ||
days[index, row.weekday()] = 1 | ||
days = pd.DataFrame(days, columns=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']) | ||
data = pd.concat([data, days], axis=1) | ||
return data | ||
|
||
def add_holiday_info(data): | ||
""" | ||
Adds information if a day is holiday to the data. | ||
""" | ||
holidays = read_data('prazniki.csv', ';') | ||
dates = list(holidays['DATUM']) | ||
data['Holiday'] = data['Departure time'].apply(lambda x: 1 if datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f').strftime('%-d.%m.%Y') in dates else 0) | ||
return data | ||
|
||
def add_duration(data): | ||
""" | ||
Adds the duration of the trip to the data. | ||
""" | ||
data['Duration'] = data.apply(lambda x: lpputils.tsdiff(x['Arrival time'], x['Departure time']), axis=1) | ||
return data | ||
|
||
def add_departure_info(data): | ||
""" | ||
Adds structured departure time to the data. | ||
""" | ||
data['DP hour'] = pd.to_datetime(data['Departure time']).dt.hour | ||
data['DP min'] = pd.to_datetime(data['Departure time']).dt.minute | ||
data['DP day'] = pd.to_datetime(data['Departure time']).dt.day | ||
data['DP month'] = pd.to_datetime(data['Departure time']).dt.month | ||
return data | ||
|
||
def pre_process_data(data, train=True): | ||
""" | ||
Pre-processes the data. | ||
""" | ||
|
||
data = add_departure_info(data) | ||
data = add_day_of_week(data) | ||
data = add_holiday_info(data) | ||
|
||
# not really needed since they are the same everywhere | ||
data = data.drop('Route description', axis=1) | ||
data = data.drop('Route Direction', axis=1) | ||
data = data.drop('First station' , axis=1) | ||
data = data.drop('Last station', axis=1) | ||
data = data.drop('Route', axis=1) | ||
|
||
data = data.drop('Registration', axis=1) | ||
data = data.drop('Driver ID', axis=1) | ||
|
||
if train: | ||
add_duration(data) | ||
data = data.drop('Arrival time', axis=1) | ||
|
||
departures = data['Departure time'] | ||
data = data.drop('Departure time', axis=1) | ||
|
||
# print(data) | ||
|
||
return data, departures | ||
|
||
def train_lr(data, lamb=1.0, label='Duration'): | ||
""" | ||
Trains the linear regression model. | ||
""" | ||
X = data.drop(label, axis=1).to_numpy() | ||
y = data[label].to_numpy() | ||
|
||
lr = linear.LinearLearner(lambda_=lamb) | ||
return lr(X,y) | ||
|
||
def predict_lr(model, data): | ||
""" | ||
Predicts the arrival time for the given data. Data should be pre-processed. | ||
""" | ||
rows = data.to_numpy() | ||
results = [] | ||
for row in rows: | ||
results.append(model(row)) | ||
|
||
data['Duration'] = results | ||
return data | ||
|
||
def post_process(data, departures): | ||
""" | ||
Post-processes the data. | ||
""" | ||
data['Departure time'] = departures | ||
data['Arrival time'] = data.apply(lambda x: lpputils.tsadd(x['Departure time'], x['Duration']), axis=1) | ||
return data | ||
|
||
def create_output(data, departures, filename='out.txt'): | ||
""" | ||
Creates the output file. | ||
""" | ||
data = post_process(data, departures) | ||
data['Arrival time'].to_csv(filename, sep='\n', index=False, header=False) | ||
|
||
if __name__ == '__main__': | ||
train_data = read_data('train_pred.csv') | ||
test_data = read_data('test_pred.csv') | ||
train_data, departures_train = pre_process_data(train_data) | ||
test_data, departures_test = pre_process_data(test_data, train=False) | ||
model = train_lr(train_data) | ||
pred = predict_lr(model, test_data) | ||
create_output(pred, departures_test) | ||
|
Oops, something went wrong.