Skip to content

Commit

Permalink
ported code
Browse files Browse the repository at this point in the history
  • Loading branch information
dPreininger committed May 3, 2022
1 parent 7768530 commit ae9f2b3
Showing 1 changed file with 77 additions and 23 deletions.
100 changes: 77 additions & 23 deletions predtekmovanje.py → tekmovanje.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
from datetime import datetime, time
import numpy as np
import pandas as pd
import linear
import xgboost as xgb
import lpputils

route_dict = {}
direction_dict = {}

def read_data(filename, sep='\t'):
"""
Reads the data from the given file.
Expand Down Expand Up @@ -39,7 +42,7 @@ def add_duration(data):
data['Duration'] = data.apply(lambda x: lpputils.tsdiff(x['Arrival time'], x['Departure time']), axis=1)
return data

def add_departure_info(data):
def add_structured_time(data):
"""
Adds structured departure time to the data.
"""
Expand All @@ -49,21 +52,56 @@ def add_departure_info(data):
data['DP month'] = pd.to_datetime(data['Departure time']).dt.month
return data

def get_direction_from_row(row):
direction = row['Route description']
route = row['Route']
if route in direction_dict:
if direction == direction_dict[route]:
return 0
else:
return 1

direction_dict[route] = direction
return 0

def add_direction(data):
"""
Adds information about in which direction the bus is driving to the data.
"""
data['Direction'] = data.apply(get_direction_from_row, axis=1)
return data

def split_by_route(data):
"""
Creates seperate dataset for each route.
"""
datasets = {}
for route in get_routes(data):
if route not in datasets:
datasets[route] = []
datasets[route].append(data.copy()[(data['Route'] == route) & (data['Direction'] == 0)])
datasets[route].append(data.copy()[(data['Route'] == route) & (data['Direction'] == 1)])
return datasets

def get_routes(data):
return list(data['Route'].unique())

def pre_process_data(data, train=True):
"""
Pre-processes the data.
"""

data = add_departure_info(data)
data = add_day_of_week(data)
data = add_holiday_info(data)
# data = add_route(data)
data = add_direction(data)
data = add_structured_time(data)

# not really needed since they are the same everywhere
data = data.drop('Route description', axis=1)
data = data.drop('Route Direction', axis=1)
data = data.drop('First station' , axis=1)
data = data.drop('Last station', axis=1)
data = data.drop('Route', axis=1)

data = data.drop('Registration', axis=1)
data = data.drop('Driver ID', axis=1)
Expand All @@ -75,28 +113,44 @@ def pre_process_data(data, train=True):
departures = data['Departure time']
data = data.drop('Departure time', axis=1)

# print(data)
datasets = None
if train:
datasets = split_by_route(data)
for set in datasets.values():
set[0].drop(['Route', 'Direction'], axis=1, inplace=True)
set[1].drop(['Route', 'Direction'], axis=1, inplace=True)
data = data.drop('Route', axis=1)

return data, departures
return data, departures, datasets

def train_lr(data, lamb=1.0, label='Duration'):
def train_model(data, label='Duration'):
"""
Trains the linear regression model.
Trains the model.
"""
X = data.drop(label, axis=1).to_numpy()
y = data[label].to_numpy()
models = {}
for route, dataset in data.items():
X0 = dataset[0].drop(label, axis=1).to_numpy()
y0 = dataset[0][label].to_numpy()

X1 = dataset[1].drop(label, axis=1).to_numpy()
y1 = dataset[1][label].to_numpy()

lr = linear.LinearLearner(lambda_=lamb)
return lr(X,y)
model0 = xgb.XGBRegressor(eval_metric='mae', verbosity=0, n_threads=4)
model1 = xgb.XGBRegressor(eval_metric='mae', verbosity=0, n_threads=4)
model0.fit(X0, y0)
model1.fit(X1, y1)
models[route] = {0: model0, 1: model1}
return models

def predict_lr(model, data):
def predict(models, data: pd.DataFrame):
"""
Predicts the arrival time for the given data. Data should be pre-processed.
"""
rows = data.to_numpy()
results = []
for row in rows:
results.append(model(row))
for _, row in data.iterrows():
model = models[row['Route']][row['Direction']]
pred_data = [row.drop(['Route', 'Direction']).to_numpy()]
results.append(model.predict(pred_data))

data['Duration'] = results
return data
Expand All @@ -106,7 +160,7 @@ def post_process(data, departures):
Post-processes the data.
"""
data['Departure time'] = departures
data['Arrival time'] = data.apply(lambda x: lpputils.tsadd(x['Departure time'], x['Duration']), axis=1)
data['Arrival time'] = data.apply(lambda x: lpputils.tsadd(x['Departure time'], int(x['Duration'][0])), axis=1)
return data

def create_output(data, departures, filename='out.txt'):
Expand All @@ -117,11 +171,11 @@ def create_output(data, departures, filename='out.txt'):
data['Arrival time'].to_csv(filename, sep='\n', index=False, header=False)

if __name__ == '__main__':
train_data = read_data('train_pred.csv')
test_data = read_data('test_pred.csv')
train_data, departures_train = pre_process_data(train_data)
test_data, departures_test = pre_process_data(test_data, train=False)
model = train_lr(train_data)
pred = predict_lr(model, test_data)
train_data = read_data('train.csv')
test_data = read_data('test.csv')
train_data, departures_train, train_datasets = pre_process_data(train_data)
test_data, departures_test, _ = pre_process_data(test_data, train=False)
models = train_model(train_datasets)
pred = predict(models, test_data)
create_output(pred, departures_test)

0 comments on commit ae9f2b3

Please sign in to comment.