-
Notifications
You must be signed in to change notification settings - Fork 328
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Yangguang
authored and
Yangguang
committed
Jul 9, 2018
1 parent
57ef708
commit 6b292ee
Showing
6 changed files
with
295 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import os | ||
from datetime import datetime | ||
|
||
import scrapy | ||
from scrapy import Request | ||
from scrapy import signals | ||
import pandas as pd | ||
|
||
from fooltrader.api.quote import parse_shfe_data, parse_shfe_day_data | ||
from fooltrader.contract.files_contract import get_exchange_cache_dir, get_exchange_cache_path | ||
from fooltrader.utils.utils import to_timestamp | ||
|
||
|
||
class FutureCffexSpider(scrapy.Spider): | ||
name = "future_cffex_spider" | ||
|
||
custom_settings = { | ||
# 'DOWNLOAD_DELAY': 2, | ||
# 'CONCURRENT_REQUESTS_PER_DOMAIN': 8, | ||
|
||
} | ||
|
||
def __init__(self, name=None, **kwargs): | ||
super().__init__(name, **kwargs) | ||
self.trading_dates = None | ||
|
||
def start_requests(self): | ||
if self.dataType is None or self.dataType=='dayk': | ||
daterange=pd.date_range(start='2006-06-30',end=pd.Timestamp.today()) | ||
daterange=daterange[daterange.dayofweek<5] | ||
for i in daterange: | ||
the_dir = get_exchange_cache_path(security_type='future',exchange='cffex',data_type='day_kdata',the_date=to_timestamp(i))+".csv" | ||
if not os.path.exists(the_dir): | ||
yield Request(url="http://www.cffex.com.cn/sj/hqsj/rtj/"+i.strftime("%Y%m/%d/%Y%m%d")+"_1.csv",callback=self.download_cffex_history_data_file,meta={'filename':the_dir}) | ||
elif self.dataType =='inventory': | ||
daterange=pd.date_range(start='2006-06-30',end=pd.Timestamp.today()) | ||
k=['IF','IC','IH','T','TF'] | ||
daterange=daterange[daterange.dayofweek<5] | ||
for i in daterange: | ||
for j in k: | ||
the_dir = get_exchange_cache_path(security_type='future',exchange='cffex',data_type='inventory',the_date=to_timestamp(i))+j+".csv" | ||
if not os.path.exists(the_dir): | ||
yield Request(url="http://www.cffex.com.cn/sj/ccpm/"+i.strftime("%Y%m/%d/")+j+"_1.csv",callback=self.download_cffex_history_data_file,meta={'filename':the_dir}) | ||
|
||
|
||
|
||
|
||
def download_cffex_history_data_file(self,response): | ||
content_type_header = response.headers.get('content-type', None) | ||
the_path = response.meta['filename'] | ||
|
||
if content_type_header.decode("utf-8") == 'application/zip' or content_type_header.decode("utf-8") == 'text/csv': | ||
with open(the_path, "wb") as f: | ||
f.write(response.body) | ||
f.flush() | ||
|
||
else: | ||
self.logger.error( | ||
"get cffex year data failed:the_path={} url={} content type={} ".format( | ||
the_path, | ||
response.url, | ||
content_type_header)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import os | ||
from datetime import datetime | ||
import pandas as pd | ||
|
||
import scrapy | ||
from scrapy import Request | ||
from scrapy import signals | ||
|
||
from fooltrader.api.quote import parse_shfe_data, parse_shfe_day_data | ||
from fooltrader.contract.files_contract import get_exchange_cache_dir, get_exchange_cache_path | ||
from fooltrader.utils.utils import to_timestamp | ||
|
||
|
||
class FutureCzceSpider(scrapy.Spider): | ||
name = "future_czce_spider" | ||
|
||
custom_settings = { | ||
# 'DOWNLOAD_DELAY': 2, | ||
# 'CONCURRENT_REQUESTS_PER_DOMAIN': 8, | ||
|
||
} | ||
|
||
def __init__(self, name=None, **kwargs): | ||
super().__init__(name, **kwargs) | ||
self.trading_dates = None | ||
|
||
def start_requests(self): | ||
if self.dataType is None: | ||
today = pd.Timestamp.today() | ||
for date in pd.date_range(start=today.date()-pd.Timedelta(days=today.dayofyear-1),end=today): | ||
the_dir = get_exchange_cache_path(security_type='future',exchange='czce',the_date=to_timestamp(date),data_type='day_kdata')+'.xls' | ||
if(date.dayofweek<5 and not os.path.exists(the_dir)): | ||
yield Request(url="http://www.czce.com.cn/portal/DFSStaticFiles/Future/"+date.strftime("%Y/%Y%m%d")+"/FutureDataDaily.xls",callback=self.download_czce_kline_data,meta={'filename':the_dir}) | ||
elif self.dataType=='historyk': | ||
yield Request(url="http://www.czce.com.cn/portal/jysj/qhjysj/lshqxz/A09112017index_1.htm",callback=self.download_czce_history_data) | ||
elif self.dataType=='inventory': | ||
today = pd.Timestamp.today() | ||
for date in pd.date_range(start=today.date()-pd.Timedelta(weeks=450),end=today): | ||
the_dir = get_exchange_cache_path(security_type='future',exchange='czce',the_date=to_timestamp(date),data_type='inventory')+'.xls' | ||
if(date.dayofweek<5 and not os.path.exists(the_dir)): | ||
yield Request(url="http://www.czce.com.cn/portal/DFSStaticFiles/Future/"+date.strftime("%Y/%Y%m%d")+"/FutureDataHolding.xls",callback=self.download_czce_kline_data,meta={'filename':the_dir}) | ||
|
||
|
||
|
||
def download_czce_kline_data(self,response): | ||
content_type_header = response.headers.get('content-type', None) | ||
the_path = response.meta['filename'] | ||
|
||
if content_type_header.decode("utf-8") == 'application/zip' or content_type_header.decode("utf-8") == 'text/csv' or content_type_header.decode("utf-8") == 'application/x-zip-compressed' or content_type_header.decode("utf-8") == 'application/excel': | ||
with open(the_path, "wb") as f: | ||
f.write(response.body) | ||
f.flush() | ||
|
||
else: | ||
self.logger.error( | ||
"get czce year data failed:the_path={} url={} content type={} ".format( | ||
the_path, | ||
response.url, | ||
content_type_header)) | ||
|
||
def download_czce_history_data(self,response): | ||
the_dir = get_exchange_cache_dir(security_type='future', exchange='czce') | ||
for filepath in response.xpath('//a[contains(@href,"zip")]').xpath('@href').extract(): | ||
yield Request(url="http://www.czce.com.cn/"+filepath, | ||
meta={'filename':os.path.join(the_dir,("" if filepath.split("/")[-2] == "exchange" else filepath.split("/")[-2]) +filepath.split("/")[-1])}, | ||
callback=self.download_czce_history_data_file) | ||
|
||
def download_czce_history_data_file(self,response): | ||
content_type_header = response.headers.get('content-type', None) | ||
the_path = response.meta['filename'] | ||
|
||
if content_type_header.decode("utf-8") == 'application/zip' or content_type_header.decode("utf-8") == 'text/csv' or content_type_header.decode("utf-8") == 'application/x-zip-compressed': | ||
with open(the_path, "wb") as f: | ||
f.write(response.body) | ||
f.flush() | ||
|
||
else: | ||
self.logger.error( | ||
"get shfe year data failed:the_path={} url={} content type={} ".format( | ||
the_path, | ||
response.url, | ||
content_type_header)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import os | ||
from datetime import datetime | ||
import pandas as pd | ||
|
||
import scrapy | ||
from scrapy import Request,FormRequest | ||
from scrapy import signals | ||
|
||
from fooltrader.api.quote import parse_shfe_data, parse_shfe_day_data | ||
from fooltrader.contract.files_contract import get_exchange_cache_dir, get_exchange_cache_path | ||
from fooltrader.utils.utils import to_timestamp | ||
|
||
|
||
class FutureDceSpider(scrapy.Spider): | ||
name = "future_dce_spider" | ||
|
||
custom_settings = { | ||
# 'DOWNLOAD_DELAY': 2, | ||
# 'CONCURRENT_REQUESTS_PER_DOMAIN': 8, | ||
|
||
} | ||
|
||
def __init__(self, name=None, **kwargs): | ||
super().__init__(name, **kwargs) | ||
|
||
def start_requests(self): | ||
if self.dataType is None: | ||
return self.request_currentyear_kdata() | ||
elif self.dataType == 'historyk': | ||
return self.request_history_kdata() | ||
elif self.dataType == 'inventory': | ||
return self.request_inventory_data() | ||
else: | ||
return self.request_currentyear_kdata() | ||
|
||
def request_inventory_data(self): | ||
today = pd.Timestamp.today() | ||
requests = [] | ||
for date in pd.date_range(start=today.date()-pd.Timedelta(weeks=520),end=today): | ||
the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_inventory")+'.zip' | ||
if(date.dayofweek<5 and not os.path.exists(the_dir)): | ||
requests.append(FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportMemberDealPosiQuotesBatchData.html",formdata={ | ||
'batchExportFlag':'batch', | ||
'contract.contract_id':'all', | ||
'contract.variety_id':'a', | ||
'year':str(date.year), | ||
'month':str(date.month-1), | ||
'day':str(date.day), | ||
'memberDealPosiQuotes.trade_type':'0', | ||
'memberDealPosiQuotes.variety':'all' | ||
},callback=self.download_dce_kline_data,meta={ | ||
'filename':the_dir | ||
})) | ||
return requests | ||
|
||
def request_currentyear_kdata(self): | ||
today = pd.Timestamp.today() | ||
requests=[] | ||
for date in pd.date_range(start=today.date()-pd.Timedelta(days=today.dayofyear-1),end=today): | ||
the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_kdata")+'.xls' | ||
if(date.dayofweek<5 and not os.path.exists(the_dir)): | ||
requests.append( FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportDayQuotesChData.html",formdata={ | ||
'year':str(date.year), | ||
'month':str(date.month-1), | ||
'day':str(date.day), | ||
'dayQuotes.trade_type':'0', | ||
'dayQuotes.variety':'all', | ||
'exportType':'excel' | ||
},callback=self.download_dce_kline_data,meta={ | ||
'filename':the_dir | ||
})) | ||
return requests | ||
|
||
def request_history_kdata(self): | ||
return [Request(url="http://www.dce.com.cn/dalianshangpin/xqsj/lssj/index.html",callback=self.download_dce_history_data)] | ||
|
||
|
||
def download_dce_history_data(self,response): | ||
the_dir = get_exchange_cache_dir(security_type='future', exchange='dce') | ||
for filepath in response.css('input').xpath('@rel').extract(): | ||
yield Request(url="http://www.dce.com.cn/"+filepath, | ||
meta={'filename':os.path.join(the_dir,filepath.split("/")[-1])}, | ||
callback=self.download_dce_history_data_file) | ||
|
||
|
||
def download_dce_kline_data(self,response): | ||
content_type_header = response.headers.get('content-type', None) | ||
if content_type_header is None: | ||
content_type_header = response.headers.get('Content-Type',None) | ||
the_path = response.meta['filename'] | ||
|
||
if content_type_header.decode("utf-8") == 'application/zip' or content_type_header.decode("utf-8") == 'text/csv' or content_type_header.decode("utf-8") == 'application/octet-stream;charset=utf-8': | ||
with open(the_path, "wb") as f: | ||
f.write(response.body) | ||
f.flush() | ||
|
||
else: | ||
self.logger.error( | ||
"get dce year kline data failed:the_path={} url={} content type={} ".format( | ||
the_path, | ||
response.url, | ||
content_type_header)) | ||
|
||
def download_dce_history_data_file(self,response): | ||
content_type_header = response.headers.get('content-type', None) | ||
the_path = response.meta['filename'] | ||
|
||
if content_type_header.decode("utf-8") == 'application/zip' or content_type_header.decode("utf-8") == 'text/csv': | ||
with open(the_path, "wb") as f: | ||
f.write(response.body) | ||
f.flush() | ||
|
||
else: | ||
self.logger.error( | ||
"get shfe year data failed:the_path={} url={} content type={} ".format( | ||
the_path, | ||
response.url, | ||
content_type_header)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Automatically created by: scrapy startproject | ||
# | ||
# For more information about the [deploy] section see: | ||
# https://scrapyd.readthedocs.org/en/latest/deploy.html | ||
|
||
[settings] | ||
default = fooltrader.settings | ||
|
||
[deploy] | ||
#url = http://localhost:6800/ | ||
project = fooltrader |