{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from pypfopt.efficient_frontier import EfficientFrontier\n",
    "from pypfopt import risk_models\n",
    "from pypfopt.risk_models import CovarianceShrinkage\n",
    "from pypfopt import expected_returns\n",
    "from datetime import datetime\n",
    "from pandas.tseries.offsets import BDay"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1. Read Input Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ubuntu/anaconda3/lib/python3.6/site-packages/numpy/lib/arraysetops.py:568: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
      "  mask |= (ar1 == a)\n"
     ]
    }
   ],
   "source": [
    "df_price = pd.read_csv(\"Data/1-sp500_adj_price.csv\",index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(6438964, 3)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_price.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>datadate</th>\n",
       "      <th>tic</th>\n",
       "      <th>adj_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>19900102</td>\n",
       "      <td>ADCT</td>\n",
       "      <td>4.074244</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>19900103</td>\n",
       "      <td>ADCT</td>\n",
       "      <td>4.046900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>19900104</td>\n",
       "      <td>ADCT</td>\n",
       "      <td>3.964869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>19900105</td>\n",
       "      <td>ADCT</td>\n",
       "      <td>3.992212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>19900108</td>\n",
       "      <td>ADCT</td>\n",
       "      <td>3.937525</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   datadate   tic  adj_price\n",
       "1  19900102  ADCT   4.074244\n",
       "2  19900103  ADCT   4.046900\n",
       "3  19900104  ADCT   3.964869\n",
       "4  19900105  ADCT   3.992212\n",
       "5  19900108  ADCT   3.937525"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_price.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "selected_stock = pd.read_csv(\"Data/2-portfolio_data/stocks_selected_total_user8.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(12932, 3)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected_stock.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tic</th>\n",
       "      <th>predicted_return</th>\n",
       "      <th>trade_date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>EOG</td>\n",
       "      <td>0.033723</td>\n",
       "      <td>19950601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EQT</td>\n",
       "      <td>0.037745</td>\n",
       "      <td>19950601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>HES</td>\n",
       "      <td>0.051450</td>\n",
       "      <td>19950601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NFX</td>\n",
       "      <td>0.030283</td>\n",
       "      <td>19950601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>OKE</td>\n",
       "      <td>0.041020</td>\n",
       "      <td>19950601</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   tic  predicted_return  trade_date\n",
       "0  EOG          0.033723    19950601\n",
       "1  EQT          0.037745    19950601\n",
       "2  HES          0.051450    19950601\n",
       "3  NFX          0.030283    19950601\n",
       "4  OKE          0.041020    19950601"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected_stock.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. Get trade date"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of unique stocks selected:  982\n"
     ]
    }
   ],
   "source": [
    "print(\"Number of unique stocks selected: \", len(selected_stock.tic.unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_date=df_price.datadate.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7155"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(all_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "trade_date=selected_stock.trade_date.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([19950601, 19950901, 19951201, 19960301, 19960603, 19960903,\n",
       "       19961202, 19970303, 19970602, 19970902, 19971201, 19980302,\n",
       "       19980601, 19980901, 19981201, 19990301, 19990601, 19990901,\n",
       "       19991201, 20000301, 20000601, 20000901, 20001201, 20010301,\n",
       "       20010601, 20010904, 20011203, 20020301, 20020603, 20020903,\n",
       "       20021202, 20030303, 20030602, 20030902, 20031201, 20040301,\n",
       "       20040601, 20040901, 20041201, 20050301, 20050601, 20050901,\n",
       "       20051201, 20060301, 20060601, 20060901, 20061201, 20070301,\n",
       "       20070601, 20070904, 20071203, 20080303, 20080602, 20080902,\n",
       "       20081201, 20090302, 20090601, 20090901, 20091201, 20100301,\n",
       "       20100601, 20100901, 20101201, 20110301, 20110601, 20110901,\n",
       "       20111201, 20120301, 20120601, 20120904, 20121203, 20130301,\n",
       "       20130603, 20130903, 20131202, 20140303, 20140602, 20140902,\n",
       "       20141201, 20150302, 20150601, 20150901, 20151201, 20160301,\n",
       "       20160601, 20160901, 20161201, 20170301, 20170601])"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trade_date"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of trade dates 89\n"
     ]
    }
   ],
   "source": [
    "print(\"Number of trade dates\", len(trade_date))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3. Get daily 1 year return table in each 89 trade period"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tic</th>\n",
       "      <th>predicted_return</th>\n",
       "      <th>trade_date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>EOG</td>\n",
       "      <td>0.033723</td>\n",
       "      <td>19950601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EQT</td>\n",
       "      <td>0.037745</td>\n",
       "      <td>19950601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>HES</td>\n",
       "      <td>0.051450</td>\n",
       "      <td>19950601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NFX</td>\n",
       "      <td>0.030283</td>\n",
       "      <td>19950601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>OKE</td>\n",
       "      <td>0.041020</td>\n",
       "      <td>19950601</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   tic  predicted_return  trade_date\n",
       "0  EOG          0.033723    19950601\n",
       "1  EQT          0.037745    19950601\n",
       "2  HES          0.051450    19950601\n",
       "3  NFX          0.030283    19950601\n",
       "4  OKE          0.041020    19950601"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected_stock.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 414,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "19950601\n",
      "19950901\n",
      "19951201\n",
      "19960301\n",
      "19960603\n",
      "19960903\n",
      "19961202\n",
      "19970303\n",
      "19970602\n",
      "19970902\n",
      "19971201\n",
      "19980302\n",
      "19980601\n",
      "19980901\n",
      "19981201\n",
      "19990301\n",
      "19990601\n",
      "19990901\n",
      "19991201\n",
      "20000301\n",
      "20000601\n",
      "20000901\n",
      "20001201\n",
      "20010301\n",
      "20010601\n",
      "20010904\n",
      "20011203\n",
      "20020301\n",
      "20020603\n",
      "20020903\n",
      "20021202\n",
      "20030303\n",
      "20030602\n",
      "20030902\n",
      "20031201\n",
      "20040301\n",
      "20040601\n",
      "20040901\n",
      "20041201\n",
      "20050301\n",
      "20050601\n",
      "20050901\n",
      "20051201\n",
      "20060301\n",
      "20060601\n",
      "20060901\n",
      "20061201\n",
      "20070301\n",
      "20070601\n",
      "20070904\n",
      "20071203\n",
      "20080303\n",
      "20080602\n",
      "20080902\n",
      "20081201\n",
      "20090302\n",
      "20090601\n",
      "20090901\n",
      "20091201\n",
      "20100301\n",
      "20100601\n",
      "20100901\n",
      "20101201\n",
      "20110301\n",
      "20110601\n",
      "20110901\n",
      "20111201\n",
      "20120301\n",
      "20120601\n",
      "20120904\n",
      "20121203\n",
      "20130301\n",
      "20130603\n",
      "20130903\n",
      "20131202\n",
      "20140303\n",
      "20140602\n",
      "20140902\n",
      "20141201\n",
      "20150302\n",
      "20150601\n",
      "20150901\n",
      "20151201\n",
      "20160301\n",
      "20160601\n",
      "20160901\n",
      "20161201\n",
      "20170301\n",
      "20170601\n",
      "Time consuming:  92.59127250512441  minutes\n"
     ]
    }
   ],
   "source": [
    "# took about 90 minutes to run\n",
    "start = time.time()\n",
    "all_return_table={}\n",
    "#all_predicted_return={}\n",
    "all_stocks_info = {}\n",
    "#for i in range(0,1):\n",
    "for i in range(len(trade_date)):\n",
    "    #match trading date\n",
    "    index = selected_stock.trade_date==trade_date[i]\n",
    "    print(trade_date[i])\n",
    "    #get the corresponding trade period's selected stocks' name\n",
    "    stocks_name=selected_stock.tic[selected_stock.trade_date==trade_date[i]].values\n",
    "    temp_info = selected_stock[selected_stock.trade_date==trade_date[i]]\n",
    "    temp_info = temp_info.reset_index()\n",
    "    del temp_info['index']\n",
    "    all_stocks_info[trade_date[i]] = temp_info\n",
    "    #get the corresponding trade period's selected stocks' predicted return\n",
    "    asset_expected_return=selected_stock[index].predicted_return.values\n",
    "    \n",
    "    #get current trade date and calculate trade date last year, it has to be a business date\n",
    "    last_year_tradedate=int((trade_date[i]-round(trade_date[i]/10000)*10000)+round(trade_date[i]/10000-1)*10000)\n",
    "    convert_to_yyyymmdd=datetime.strptime(str(last_year_tradedate), '%Y%m%d').strftime('%Y-%m-%d')\n",
    "    #determine the business date\n",
    "    #print(convert_to_yyyymmdd)\n",
    "    ts = pd.Timestamp(convert_to_yyyymmdd) \n",
    "    bd = pd.tseries.offsets.BusinessDay(n =1) \n",
    "    new_timestamp = ts - bd \n",
    "    lastY_tradedate = int(new_timestamp.date().strftime('%Y%m%d'))\n",
    "    get_date_index=(all_date<trade_date[i]) & (all_date>lastY_tradedate)\n",
    "    get_date=all_date[get_date_index]\n",
    "    #get adjusted price table\n",
    "    return_table=pd.DataFrame()\n",
    "    for m in range(len(stocks_name)):\n",
    "        #get stocks's name\n",
    "        index_tic=(df_price.tic==stocks_name[m])\n",
    "        #get this stock's all historicall price from sp500_price\n",
    "        sp500_temp=df_price[index_tic]\n",
    "        merge_left_data_table = pd.DataFrame(get_date)\n",
    "        merge_left_data_table.columns = ['datadate']\n",
    "        temp_price=merge_left_data_table.merge(sp500_temp, on=['datadate'], how='left')\n",
    "        temp_price = temp_price.dropna()\n",
    "        temp_price['daily_return']=temp_price.adj_price.pct_change()\n",
    "\n",
    "        return_table=return_table.append(temp_price,ignore_index=True)\n",
    "    all_return_table[trade_date[i]] = return_table\n",
    "end = time.time()\n",
    "print(\"Time consuming: \", (end-start)/60, \" minutes\")\n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Save to pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 419,
   "metadata": {},
   "outputs": [],
   "source": [
    "#with open('Data/all_return_table.pickle', 'wb') as handle: \n",
    "#    pickle.dump(all_return_table, handle, protocol=pickle.HIGHEST_PROTOCOL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 420,
   "metadata": {},
   "outputs": [],
   "source": [
    "#with open('Data/all_stocks_info.pickle', 'wb') as handle:\n",
    "#    pickle.dump(all_stocks_info, handle, protocol=pickle.HIGHEST_PROTOCOL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "#with open('Data/all_return_table.pickle', 'rb') as handle:\n",
    "#    all_return_table = pickle.load(handle)\n",
    "\n",
    "#with open('Data/all_stocks_info.pickle', 'rb') as handle:\n",
    "#    all_stocks_info = pickle.load(handle)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4. Potfolio Optimization using pypfopt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ubuntu/anaconda3/lib/python3.6/site-packages/pypfopt/objective_functions.py:61: RuntimeWarning: invalid value encountered in sqrt\n",
      "  sigma = np.sqrt(np.dot(weights, np.dot(cov_matrix, weights.T)))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "19950601 : Done\n",
      "19950901 : Done\n",
      "19951201 : Done\n",
      "19960301 : Done\n",
      "19960603 : Done\n",
      "19960903 : Done\n",
      "19961202 : Done\n",
      "19970303 : Done\n",
      "19970602 : Done\n",
      "19970902 : Done\n",
      "19971201 : Done\n",
      "19980302 : Done\n",
      "19980601 : Done\n",
      "19980901 : Done\n",
      "19981201 : Done\n",
      "19990301 : Done\n",
      "19990601 : Done\n",
      "19990901 : Done\n",
      "19991201 : Done\n",
      "20000301 : Done\n",
      "20000601 : Done\n",
      "20000901 : Done\n",
      "20001201 : Done\n",
      "20010301 : Done\n",
      "20010601 : Done\n",
      "20010904 : Done\n",
      "20011203 : Done\n",
      "20020301 : Done\n",
      "20020603 : Done\n",
      "20020903 : Done\n",
      "20021202 : Done\n",
      "20030303 : Done\n",
      "20030602 : Done\n",
      "20030902 : Done\n",
      "20031201 : Done\n",
      "20040301 : Done\n",
      "20040601 : Done\n",
      "20040901 : Done\n",
      "20041201 : Done\n",
      "20050301 : Done\n",
      "20050601 : Done\n",
      "20050901 : Done\n",
      "20051201 : Done\n",
      "20060301 : Done\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ubuntu/anaconda3/lib/python3.6/site-packages/pypfopt/base_optimizer.py:56: RuntimeWarning: invalid value encountered in less\n",
      "  clean_weights[np.abs(clean_weights) < cutoff] = 0\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "20060601 : Done\n",
      "20060901 : Done\n",
      "20061201 : Done\n",
      "20070301 : Done\n",
      "20070601 : Done\n",
      "20070904 : Done\n",
      "20071203 : Done\n",
      "20080303 : Done\n",
      "20080602 : Done\n",
      "20080902 : Done\n",
      "20081201 : Done\n",
      "20090302 : Done\n",
      "20090601 : Done\n",
      "20090901 : Done\n",
      "20091201 : Done\n",
      "20100301 : Done\n",
      "20100601 : Done\n",
      "20100901 : Done\n",
      "20101201 : Done\n",
      "20110301 : Done\n",
      "20110601 : Done\n",
      "20110901 : Done\n",
      "20111201 : Done\n",
      "20120301 : Done\n",
      "20120601 : Done\n",
      "20120904 : Done\n",
      "20121203 : Done\n",
      "20130301 : Done\n",
      "20130603 : Done\n",
      "20130903 : Done\n",
      "20131202 : Done\n",
      "20140303 : Done\n",
      "20140602 : Done\n",
      "20140902 : Done\n",
      "20141201 : Done\n",
      "20150302 : Done\n",
      "20150601 : Done\n",
      "20150901 : Done\n",
      "20151201 : Done\n",
      "20160301 : Done\n",
      "20160601 : Done\n",
      "20160901 : Done\n",
      "20161201 : Done\n",
      "20170301 : Done\n",
      "20170601 : Done\n"
     ]
    }
   ],
   "source": [
    "# took under 5 minutes to run\n",
    "\n",
    "stocks_weight_table = pd.DataFrame([])\n",
    "\n",
    "for i in range(len(trade_date)):\n",
    "    # get selected stocks information\n",
    "    p1_alldata=(all_stocks_info[trade_date[i]])\n",
    "    # sort it by tic\n",
    "    p1_alldata=p1_alldata.sort_values('tic')\n",
    "    p1_alldata = p1_alldata.reset_index()\n",
    "    del p1_alldata['index']\n",
    "    \n",
    "    \n",
    "    # get selected stocks tic\n",
    "    p1_stock = p1_alldata.tic\n",
    "    \n",
    "    # get predicted return from selected stocks\n",
    "    p1_predicted_return=p1_alldata.pivot_table(index = 'trade_date',columns = 'tic', values = 'predicted_return')\n",
    "    # use the predicted returns as the Expected returns to feed into the portfolio object\n",
    "    mu = p1_predicted_return.T.values\n",
    "\n",
    "    # get the 1-year historical return\n",
    "    p1_return_table=all_return_table[trade_date[i]]\n",
    "    p1_return_table_pivot=p1_return_table.pivot_table(index = 'datadate',columns = 'tic', values = 'daily_return')\n",
    "    # use the 1-year historical return table to calculate covariance matrix between selected stocks\n",
    "    S = risk_models.sample_cov(p1_return_table_pivot)\n",
    "    del S.index.name \n",
    "    \n",
    "    # mean variance\n",
    "    ef_mean = EfficientFrontier(mu, S,weight_bounds=(0, 0.05))\n",
    "    raw_weights_mean = ef_mean.max_sharpe()\n",
    "    cleaned_weights_mean = ef_mean.clean_weights()\n",
    "    #print(raw_weights_mean)\n",
    "    #ef.portfolio_performance(verbose=True)\n",
    "\n",
    "    # minimum variance\n",
    "    ef_min = EfficientFrontier([0]*len(p1_stock), S,weight_bounds=(0, 0.05))\n",
    "    raw_weights_min = ef_min.max_sharpe()\n",
    "    cleaned_weights_min = ef_min.clean_weights()\n",
    "    #print(cleaned_weights_min)\n",
    "    \n",
    "    p1_alldata['mean_weight'] = cleaned_weights_mean.values()\n",
    "    p1_alldata['min_weight'] = cleaned_weights_min.values()\n",
    "    \n",
    "    #ef.portfolio_performance(verbose=True)\n",
    "\n",
    "    \n",
    "    stocks_weight_table = stocks_weight_table.append(pd.DataFrame(p1_alldata), ignore_index=True)\n",
    "    print(trade_date[i], \": Done\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tic</th>\n",
       "      <th>predicted_return</th>\n",
       "      <th>trade_date</th>\n",
       "      <th>mean_weight</th>\n",
       "      <th>min_weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ACV.1</td>\n",
       "      <td>0.024449</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AES</td>\n",
       "      <td>0.096917</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AHM.1</td>\n",
       "      <td>0.044516</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.01200</td>\n",
       "      <td>0.00522</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AMH.1</td>\n",
       "      <td>0.105036</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AMT.1</td>\n",
       "      <td>0.085373</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>AOS</td>\n",
       "      <td>0.061494</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>APCC.</td>\n",
       "      <td>0.160571</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00872</td>\n",
       "      <td>0.02036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>APH</td>\n",
       "      <td>0.080985</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.01136</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>ARG</td>\n",
       "      <td>0.059334</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>ATI.1</td>\n",
       "      <td>0.170435</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00926</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>AVATQ</td>\n",
       "      <td>0.051080</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00020</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>BAY.3</td>\n",
       "      <td>0.088882</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00975</td>\n",
       "      <td>0.00797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>BBBY</td>\n",
       "      <td>0.108766</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00374</td>\n",
       "      <td>0.05000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>BBY</td>\n",
       "      <td>0.184360</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.01155</td>\n",
       "      <td>0.05000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>BEV</td>\n",
       "      <td>0.064850</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>BF.B</td>\n",
       "      <td>0.036150</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.02235</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>BGEN</td>\n",
       "      <td>0.057776</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.01425</td>\n",
       "      <td>0.05000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>BGG</td>\n",
       "      <td>0.066459</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.05000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>BIIB</td>\n",
       "      <td>0.081258</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.01236</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>BLL</td>\n",
       "      <td>0.039363</td>\n",
       "      <td>19950601</td>\n",
       "      <td>0.03366</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      tic  predicted_return  trade_date  mean_weight  min_weight\n",
       "0   ACV.1          0.024449    19950601      0.00000     0.00000\n",
       "1     AES          0.096917    19950601      0.00000     0.00000\n",
       "2   AHM.1          0.044516    19950601      0.01200     0.00522\n",
       "3   AMH.1          0.105036    19950601      0.00000     0.00000\n",
       "4   AMT.1          0.085373    19950601      0.00000     0.00000\n",
       "5     AOS          0.061494    19950601      0.00000     0.00000\n",
       "6   APCC.          0.160571    19950601      0.00872     0.02036\n",
       "7     APH          0.080985    19950601      0.01136     0.00000\n",
       "8     ARG          0.059334    19950601      0.00000     0.00000\n",
       "9   ATI.1          0.170435    19950601      0.00926     0.00000\n",
       "10  AVATQ          0.051080    19950601      0.00020     0.00000\n",
       "11  BAY.3          0.088882    19950601      0.00975     0.00797\n",
       "12   BBBY          0.108766    19950601      0.00374     0.05000\n",
       "13    BBY          0.184360    19950601      0.01155     0.05000\n",
       "14    BEV          0.064850    19950601      0.00000     0.00000\n",
       "15   BF.B          0.036150    19950601      0.02235     0.00000\n",
       "16   BGEN          0.057776    19950601      0.01425     0.05000\n",
       "17    BGG          0.066459    19950601      0.00000     0.05000\n",
       "18   BIIB          0.081258    19950601      0.01236     0.00000\n",
       "19    BLL          0.039363    19950601      0.03366     0.00000"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks_weight_table.head(20)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(12932, 5)"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks_weight_table.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## save to excel or csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "stocks_weight_table.to_excel('Data/stocks_weight_table.xlsx','Sheet1')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}