# Import packages

In [27]:
import pandas as pd
import numpy as np
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt.risk_models import CovarianceShrinkage
from pypfopt import expected_returns
from datetime import datetime
from pandas.tseries.offsets import BDay

In [28]:
import time
import pickle

# 1. Read Input Data

In [29]:
df_price = pd.read_csv("Data/1-sp500_adj_price.csv",index_col=0)

  mask |= (ar1 == a)


In [30]:
df_price.shape

(6438964, 3)

In [31]:
df_price.head()

Unnamed: 0,datadate,tic,adj_price
1,19900102,ADCT,4.074244
2,19900103,ADCT,4.0469
3,19900104,ADCT,3.964869
4,19900105,ADCT,3.992212
5,19900108,ADCT,3.937525


In [32]:
selected_stock = pd.read_csv("Data/2-portfolio_data/stocks_selected_total_user8.csv")

In [33]:
selected_stock.shape

(12932, 3)

In [34]:
selected_stock.head()

Unnamed: 0,tic,predicted_return,trade_date
0,EOG,0.033723,19950601
1,EQT,0.037745,19950601
2,HES,0.05145,19950601
3,NFX,0.030283,19950601
4,OKE,0.04102,19950601


# 2. Get trade date

In [35]:
print("Number of unique stocks selected: ", len(selected_stock.tic.unique()))

Number of unique stocks selected:  982


In [36]:
all_date=df_price.datadate.unique()

In [37]:
len(all_date)

7155

In [38]:
trade_date=selected_stock.trade_date.unique()

In [39]:
trade_date

array([19950601, 19950901, 19951201, 19960301, 19960603, 19960903,
       19961202, 19970303, 19970602, 19970902, 19971201, 19980302,
       19980601, 19980901, 19981201, 19990301, 19990601, 19990901,
       19991201, 20000301, 20000601, 20000901, 20001201, 20010301,
       20010601, 20010904, 20011203, 20020301, 20020603, 20020903,
       20021202, 20030303, 20030602, 20030902, 20031201, 20040301,
       20040601, 20040901, 20041201, 20050301, 20050601, 20050901,
       20051201, 20060301, 20060601, 20060901, 20061201, 20070301,
       20070601, 20070904, 20071203, 20080303, 20080602, 20080902,
       20081201, 20090302, 20090601, 20090901, 20091201, 20100301,
       20100601, 20100901, 20101201, 20110301, 20110601, 20110901,
       20111201, 20120301, 20120601, 20120904, 20121203, 20130301,
       20130603, 20130903, 20131202, 20140303, 20140602, 20140902,
       20141201, 20150302, 20150601, 20150901, 20151201, 20160301,
       20160601, 20160901, 20161201, 20170301, 20170601])

In [40]:
print("Number of trade dates", len(trade_date))

Number of trade dates 89


# 3. Get daily 1 year return table in each 89 trade period

In [41]:
selected_stock.head()

Unnamed: 0,tic,predicted_return,trade_date
0,EOG,0.033723,19950601
1,EQT,0.037745,19950601
2,HES,0.05145,19950601
3,NFX,0.030283,19950601
4,OKE,0.04102,19950601


In [414]:
# took about 90 minutes to run
start = time.time()
all_return_table={}
#all_predicted_return={}
all_stocks_info = {}
#for i in range(0,1):
for i in range(len(trade_date)):
    #match trading date
    index = selected_stock.trade_date==trade_date[i]
    print(trade_date[i])
    #get the corresponding trade period's selected stocks' name
    stocks_name=selected_stock.tic[selected_stock.trade_date==trade_date[i]].values
    temp_info = selected_stock[selected_stock.trade_date==trade_date[i]]
    temp_info = temp_info.reset_index()
    del temp_info['index']
    all_stocks_info[trade_date[i]] = temp_info
    #get the corresponding trade period's selected stocks' predicted return
    asset_expected_return=selected_stock[index].predicted_return.values
    
    #get current trade date and calculate trade date last year, it has to be a business date
    last_year_tradedate=int((trade_date[i]-round(trade_date[i]/10000)*10000)+round(trade_date[i]/10000-1)*10000)
    convert_to_yyyymmdd=datetime.strptime(str(last_year_tradedate), '%Y%m%d').strftime('%Y-%m-%d')
    #determine the business date
    #print(convert_to_yyyymmdd)
    ts = pd.Timestamp(convert_to_yyyymmdd) 
    bd = pd.tseries.offsets.BusinessDay(n =1) 
    new_timestamp = ts - bd 
    lastY_tradedate = int(new_timestamp.date().strftime('%Y%m%d'))
    get_date_index=(all_date<trade_date[i]) & (all_date>lastY_tradedate)
    get_date=all_date[get_date_index]
    #get adjusted price table
    return_table=pd.DataFrame()
    for m in range(len(stocks_name)):
        #get stocks's name
        index_tic=(df_price.tic==stocks_name[m])
        #get this stock's all historicall price from sp500_price
        sp500_temp=df_price[index_tic]
        merge_left_data_table = pd.DataFrame(get_date)
        merge_left_data_table.columns = ['datadate']
        temp_price=merge_left_data_table.merge(sp500_temp, on=['datadate'], how='left')
        temp_price = temp_price.dropna()
        temp_price['daily_return']=temp_price.adj_price.pct_change()

        return_table=return_table.append(temp_price,ignore_index=True)
    all_return_table[trade_date[i]] = return_table
end = time.time()
print("Time consuming: ", (end-start)/60, " minutes")
    
    

19950601
19950901
19951201
19960301
19960603
19960903
19961202
19970303
19970602
19970902
19971201
19980302
19980601
19980901
19981201
19990301
19990601
19990901
19991201
20000301
20000601
20000901
20001201
20010301
20010601
20010904
20011203
20020301
20020603
20020903
20021202
20030303
20030602
20030902
20031201
20040301
20040601
20040901
20041201
20050301
20050601
20050901
20051201
20060301
20060601
20060901
20061201
20070301
20070601
20070904
20071203
20080303
20080602
20080902
20081201
20090302
20090601
20090901
20091201
20100301
20100601
20100901
20101201
20110301
20110601
20110901
20111201
20120301
20120601
20120904
20121203
20130301
20130603
20130903
20131202
20140303
20140602
20140902
20141201
20150302
20150601
20150901
20151201
20160301
20160601
20160901
20161201
20170301
20170601
Time consuming:  92.59127250512441  minutes


## Save to pickle

In [419]:
#with open('Data/all_return_table.pickle', 'wb') as handle: 
#    pickle.dump(all_return_table, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [420]:
#with open('Data/all_stocks_info.pickle', 'wb') as handle:
#    pickle.dump(all_stocks_info, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [42]:
#with open('Data/all_return_table.pickle', 'rb') as handle:
#    all_return_table = pickle.load(handle)

#with open('Data/all_stocks_info.pickle', 'rb') as handle:
#    all_stocks_info = pickle.load(handle)


# 4. Potfolio Optimization using pypfopt

In [44]:
# took under 5 minutes to run

stocks_weight_table = pd.DataFrame([])

for i in range(len(trade_date)):
    # get selected stocks information
    p1_alldata=(all_stocks_info[trade_date[i]])
    # sort it by tic
    p1_alldata=p1_alldata.sort_values('tic')
    p1_alldata = p1_alldata.reset_index()
    del p1_alldata['index']
    
    
    # get selected stocks tic
    p1_stock = p1_alldata.tic
    
    # get predicted return from selected stocks
    p1_predicted_return=p1_alldata.pivot_table(index = 'trade_date',columns = 'tic', values = 'predicted_return')
    # use the predicted returns as the Expected returns to feed into the portfolio object
    mu = p1_predicted_return.T.values

    # get the 1-year historical return
    p1_return_table=all_return_table[trade_date[i]]
    p1_return_table_pivot=p1_return_table.pivot_table(index = 'datadate',columns = 'tic', values = 'daily_return')
    # use the 1-year historical return table to calculate covariance matrix between selected stocks
    S = risk_models.sample_cov(p1_return_table_pivot)
    del S.index.name 
    
    # mean variance
    ef_mean = EfficientFrontier(mu, S,weight_bounds=(0, 0.05))
    raw_weights_mean = ef_mean.max_sharpe()
    cleaned_weights_mean = ef_mean.clean_weights()
    #print(raw_weights_mean)
    #ef.portfolio_performance(verbose=True)

    # minimum variance
    ef_min = EfficientFrontier([0]*len(p1_stock), S,weight_bounds=(0, 0.05))
    raw_weights_min = ef_min.max_sharpe()
    cleaned_weights_min = ef_min.clean_weights()
    #print(cleaned_weights_min)
    
    p1_alldata['mean_weight'] = cleaned_weights_mean.values()
    p1_alldata['min_weight'] = cleaned_weights_min.values()
    
    #ef.portfolio_performance(verbose=True)

    
    stocks_weight_table = stocks_weight_table.append(pd.DataFrame(p1_alldata), ignore_index=True)
    print(trade_date[i], ": Done")


  sigma = np.sqrt(np.dot(weights, np.dot(cov_matrix, weights.T)))


19950601 : Done
19950901 : Done
19951201 : Done
19960301 : Done
19960603 : Done
19960903 : Done
19961202 : Done
19970303 : Done
19970602 : Done
19970902 : Done
19971201 : Done
19980302 : Done
19980601 : Done
19980901 : Done
19981201 : Done
19990301 : Done
19990601 : Done
19990901 : Done
19991201 : Done
20000301 : Done
20000601 : Done
20000901 : Done
20001201 : Done
20010301 : Done
20010601 : Done
20010904 : Done
20011203 : Done
20020301 : Done
20020603 : Done
20020903 : Done
20021202 : Done
20030303 : Done
20030602 : Done
20030902 : Done
20031201 : Done
20040301 : Done
20040601 : Done
20040901 : Done
20041201 : Done
20050301 : Done
20050601 : Done
20050901 : Done
20051201 : Done
20060301 : Done


  clean_weights[np.abs(clean_weights) < cutoff] = 0


20060601 : Done
20060901 : Done
20061201 : Done
20070301 : Done
20070601 : Done
20070904 : Done
20071203 : Done
20080303 : Done
20080602 : Done
20080902 : Done
20081201 : Done
20090302 : Done
20090601 : Done
20090901 : Done
20091201 : Done
20100301 : Done
20100601 : Done
20100901 : Done
20101201 : Done
20110301 : Done
20110601 : Done
20110901 : Done
20111201 : Done
20120301 : Done
20120601 : Done
20120904 : Done
20121203 : Done
20130301 : Done
20130603 : Done
20130903 : Done
20131202 : Done
20140303 : Done
20140602 : Done
20140902 : Done
20141201 : Done
20150302 : Done
20150601 : Done
20150901 : Done
20151201 : Done
20160301 : Done
20160601 : Done
20160901 : Done
20161201 : Done
20170301 : Done
20170601 : Done


In [45]:
stocks_weight_table.head(20)


Unnamed: 0,tic,predicted_return,trade_date,mean_weight,min_weight
0,ACV.1,0.024449,19950601,0.0,0.0
1,AES,0.096917,19950601,0.0,0.0
2,AHM.1,0.044516,19950601,0.012,0.00522
3,AMH.1,0.105036,19950601,0.0,0.0
4,AMT.1,0.085373,19950601,0.0,0.0
5,AOS,0.061494,19950601,0.0,0.0
6,APCC.,0.160571,19950601,0.00872,0.02036
7,APH,0.080985,19950601,0.01136,0.0
8,ARG,0.059334,19950601,0.0,0.0
9,ATI.1,0.170435,19950601,0.00926,0.0


In [46]:
stocks_weight_table.shape

(12932, 5)

## save to excel or csv

In [47]:
stocks_weight_table.to_excel('Data/stocks_weight_table.xlsx','Sheet1')
