Skip to content

Commit

Permalink
init commit
Browse files Browse the repository at this point in the history
  • Loading branch information
dPreininger committed May 3, 2022
0 parents commit 7768530
Show file tree
Hide file tree
Showing 6 changed files with 488,783 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.vscode/
out.txt
__pycache__/
26 changes: 26 additions & 0 deletions lpputils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import datetime

FORMAT = "%Y-%m-%d %H:%M:%S.%f"

def parsedate(x):
if not isinstance(x, datetime.datetime):
x = datetime.datetime.strptime(x, FORMAT)
return x

def tsdiff(x, y):
return (parsedate(x) - parsedate(y)).total_seconds()

def tsadd(x, seconds):
d = datetime.timedelta(seconds=seconds)
nd = parsedate(x) + d
return nd.strftime(FORMAT)

if __name__ == "__main__":
testd1 = "2012-01-01 23:32:38.000"
testd2 = "2012-12-01 03:33:38.000"

testd1 = datetime.datetime.strptime(testd1, FORMAT)

for i in range(23000):
a = tsdiff(testd1, testd2)
b = tsadd(testd1, -122)
20 changes: 20 additions & 0 deletions prazniki.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
DATUM;IME_PRAZNIKA;DAN_V_TEDNU;DELA_PROST_DAN;DAN;MESEC;LETO
1.01.2012;novo leto;nedelja;da;1;1;2012
2.01.2012;novo leto;ponedeljek;da;2;1;2012
8.02.2012;Prešernov dan, slovenski kulturni praznik;sreda;da;8;2;2012
8.04.2012;velika noč;nedelja;da;8;4;2012
9.04.2012;velikonočni ponedeljek;ponedeljek;da;9;4;2012
27.04.2012;dan boja proti okupatorju ;petek;da;27;4;2012
1.05.2012;praznik dela;torek;da;1;5;2012
2.05.2012;praznik dela;sreda;da;2;5;2012
27.05.2012;binkoštna nedelja;nedelja;da;27;5;2012
8.06.2012;dan Primoža Trubarja;petek;ne;8;6;2012
25.06.2012;dan državnosti;ponedeljek;da;25;6;2012
15.08.2012;Marijino vnebovzetje;sreda;da;15;8;2012
17.08.2012;združitev prekmurskih Slovencev z matičnim narodom;petek;ne ;17;8;2012
15.09.2012;vrnitev Primorske k matični domovini;sobota;ne;15;9;2012
31.10.2012;dan reformacije;sreda;da;31;10;2012
1.11.2012;dan spomina na mrtve;četrtek;da;1;11;2012
23.11.2012;dan Rudolfa Maistra;petek;ne;23;11;2012
25.12.2012;božič;torek;da;25;12;2012
26.12.2012;dan samostojnosti in enotnosti;sreda;da;26;12;2012
127 changes: 127 additions & 0 deletions predtekmovanje.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from calendar import weekday
from datetime import datetime, time
import numpy as np
import pandas as pd
import linear
import lpputils

def read_data(filename, sep='\t'):
"""
Reads the data from the given file.
"""
return pd.read_csv(filename, sep=sep)

def add_day_of_week(data):
"""
Adds the day of the week to the data represented as int.
"""
rows = pd.to_datetime(data['Departure time'])
days = np.zeros((len(rows), 7))
for index, row in enumerate(rows):
days[index, row.weekday()] = 1
days = pd.DataFrame(days, columns=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
data = pd.concat([data, days], axis=1)
return data

def add_holiday_info(data):
"""
Adds information if a day is holiday to the data.
"""
holidays = read_data('prazniki.csv', ';')
dates = list(holidays['DATUM'])
data['Holiday'] = data['Departure time'].apply(lambda x: 1 if datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f').strftime('%-d.%m.%Y') in dates else 0)
return data

def add_duration(data):
"""
Adds the duration of the trip to the data.
"""
data['Duration'] = data.apply(lambda x: lpputils.tsdiff(x['Arrival time'], x['Departure time']), axis=1)
return data

def add_departure_info(data):
"""
Adds structured departure time to the data.
"""
data['DP hour'] = pd.to_datetime(data['Departure time']).dt.hour
data['DP min'] = pd.to_datetime(data['Departure time']).dt.minute
data['DP day'] = pd.to_datetime(data['Departure time']).dt.day
data['DP month'] = pd.to_datetime(data['Departure time']).dt.month
return data

def pre_process_data(data, train=True):
"""
Pre-processes the data.
"""

data = add_departure_info(data)
data = add_day_of_week(data)
data = add_holiday_info(data)

# not really needed since they are the same everywhere
data = data.drop('Route description', axis=1)
data = data.drop('Route Direction', axis=1)
data = data.drop('First station' , axis=1)
data = data.drop('Last station', axis=1)
data = data.drop('Route', axis=1)

data = data.drop('Registration', axis=1)
data = data.drop('Driver ID', axis=1)

if train:
add_duration(data)
data = data.drop('Arrival time', axis=1)

departures = data['Departure time']
data = data.drop('Departure time', axis=1)

# print(data)

return data, departures

def train_lr(data, lamb=1.0, label='Duration'):
"""
Trains the linear regression model.
"""
X = data.drop(label, axis=1).to_numpy()
y = data[label].to_numpy()

lr = linear.LinearLearner(lambda_=lamb)
return lr(X,y)

def predict_lr(model, data):
"""
Predicts the arrival time for the given data. Data should be pre-processed.
"""
rows = data.to_numpy()
results = []
for row in rows:
results.append(model(row))

data['Duration'] = results
return data

def post_process(data, departures):
"""
Post-processes the data.
"""
data['Departure time'] = departures
data['Arrival time'] = data.apply(lambda x: lpputils.tsadd(x['Departure time'], x['Duration']), axis=1)
return data

def create_output(data, departures, filename='out.txt'):
"""
Creates the output file.
"""
data = post_process(data, departures)
data['Arrival time'].to_csv(filename, sep='\n', index=False, header=False)

if __name__ == '__main__':
train_data = read_data('train_pred.csv')
test_data = read_data('test_pred.csv')
train_data, departures_train = pre_process_data(train_data)
test_data, departures_test = pre_process_data(test_data, train=False)
model = train_lr(train_data)
pred = predict_lr(model, test_data)
create_output(pred, departures_test)

Loading

0 comments on commit 7768530

Please sign in to comment.