Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 74 additions & 61 deletions finam/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,10 @@ class Exporter(object):
EMPTY_RESULT_NOT_TICKS = '<DATE>;<TIME>;<OPEN>;<HIGH>;<LOW>;<CLOSE>;<VOL>'
EMPTY_RESULT_TICKS = '<TICKER>;<PER>;<DATE>;<TIME>;<LAST>;<VOL>'

ERROR_TOO_MUCH_WANTED = (u'Вы запросили данные за слишком '
u'большой временной период')
ERROR_TOO_MUCH_WANTED = [(u'вы запросили данные за слишком '
u'большой временной период'),
"максимальная глубина данных",
"делайте несколько запросов"]

ERROR_THROTTLING = 'Forbidden: Access is denied'
ERROR_ALREADY_IN_PROGRESS = u'Система уже обрабатывает Ваш запрос'
Expand Down Expand Up @@ -286,7 +288,7 @@ def _postprocess(self, data, timeframe):
return data

def _sanity_check(self, data):
if self.ERROR_TOO_MUCH_WANTED in data:
if any(error in data.lower() for error in self.ERROR_TOO_MUCH_WANTED):
raise FinamTooLongTimeframeError

if self.ERROR_THROTTLING in data:
Expand All @@ -310,7 +312,8 @@ def download(self,
timeframe=Timeframe.DAILY,
delay=1,
max_in_progress_retries=10,
fill_empty=False):
fill_empty=False,
max_interval_divider = 10):
items = self._meta.lookup(id_=id_, market=market)
# i.e. for markets 91, 519, 2
# id duplicates are feasible, looks like corrupt data on finam
Expand All @@ -323,64 +326,74 @@ def download(self,
if end_date is None:
end_date = datetime.date.today()

df = None
chunks = split_interval(start_date, end_date, timeframe.value)
counter = 0
for chunk_start_date, chunk_end_date in chunks:
counter += 1
logger.info('Processing chunk %d of %d', counter, len(chunks))
if counter > 1:
logger.info('Sleeping for {} second(s)'.format(delay))
time.sleep(delay)

params = {
'p': timeframe.value,
'em': id_,
'market': market.value,
'df': chunk_start_date.day,
'mf': chunk_start_date.month - 1,
'yf': chunk_start_date.year,
'dt': chunk_end_date.day,
'mt': chunk_end_date.month - 1,
'yt': chunk_end_date.year,
'cn': code,
'code': code,
# I would guess this param denotes 'data format'
# that differs for ticks only
'datf': 6 if timeframe == Timeframe.TICKS.value else 5,
'fsp': 1 if fill_empty else 0
}

url = self._build_url(params)
# deliberately not using pd.read_csv's ability to fetch
# urls to fully control what's happening
retries = 0
while True:
data = self._fetcher(url)
data = self._postprocess(data, timeframe)
try:
self._sanity_check(data)
except FinamAlreadyInProgressError:
if retries <= max_in_progress_retries:
retries += 1
logger.info('Finam work is in progress, sleeping'
' for {} second(s) before retry #{}'
.format(delay, retries))
interval_divider = 1
while True:
df = None
chunks = split_interval(start_date, end_date, timeframe.value, interval_divider)
counter = 0
try:
for chunk_start_date, chunk_end_date in chunks:
counter += 1
logger.info('Processing chunk %d of %d', counter, len(chunks))
if counter > 1:
logger.info('Sleeping for {} second(s)'.format(delay))
time.sleep(delay)
continue
else:
raise
break

try:
chunk_df = pd.read_csv(StringIO(data), sep=';')
chunk_df.sort_index(inplace=True)
except ParserError as e:
raise FinamParsingError(e)
params = {
'p': timeframe.value,
'em': id_,
'market': market.value,
'df': chunk_start_date.day,
'mf': chunk_start_date.month - 1,
'yf': chunk_start_date.year,
'dt': chunk_end_date.day,
'mt': chunk_end_date.month - 1,
'yt': chunk_end_date.year,
'cn': code,
'code': code,
# I would guess this param denotes 'data format'
# that differs for ticks only
'datf': 6 if timeframe == Timeframe.TICKS.value else 5,
'fsp': 1 if fill_empty else 0
}

url = self._build_url(params)
# deliberately not using pd.read_csv's ability to fetch
# urls to fully control what's happening
retries = 0
while True:
data = self._fetcher(url)
data = self._postprocess(data, timeframe)
try:
self._sanity_check(data)
except FinamAlreadyInProgressError:
if retries <= max_in_progress_retries:
retries += 1
logger.info('Finam work is in progress, sleeping'
' for {} second(s) before retry #{}'
.format(delay, retries))
time.sleep(delay)
continue
else:
raise
break

try:
chunk_df = pd.read_csv(StringIO(data), sep=';')
chunk_df.sort_index(inplace=True)
except ParserError as e:
raise FinamParsingError(e)

if df is None:
df = chunk_df
else:
df = df.append(chunk_df)

if df is None:
df = chunk_df
else:
df = df.append(chunk_df)
return df
except FinamTooLongTimeframeError:
if interval_divider < max_interval_divider:
interval_divider = interval_divider * 2
time.sleep(delay)
else:
raise

return df
3 changes: 2 additions & 1 deletion finam/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
}


def split_interval(start_date, end_date, timeframe):
def split_interval(start_date, end_date, timeframe, interval_divider = 1):
if end_date < start_date:
raise ValueError('start_date must be >= end_date, but got {} and {}'
.format(start_date, end_date))
delta_days = (end_date - start_date).days + 1
max_days = _MAX_DAYS_PER_TIMEFRAME[timeframe]
max_days = max_days // interval_divider
chunks_count, remainder = divmod(delta_days, max_days)
if remainder != 0:
chunks_count += 1
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# coding: utf8
from setuptools import setup

VERSION = '5.1.1'
VERSION = '5.1.2'

long_description = open('README.md').read()

Expand Down
29 changes: 29 additions & 0 deletions tests/test_FinamTooLongTimeframeError.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import unittest
from finam.export import fetch_url, Exporter, Timeframe
from finam.exception import FinamTooLongTimeframeError
from finam.const import Market
from datetime import date
from nose.tools import assert_raises

class FinamTooLongTimeframeErrorTestCase(object):
def test_tooDeep(self):
url = "https://export.finam.ru/export9.out?market=5&em=83&token=03AGdBq24TszZ-dPqHt33nEErdKlymyopEkaclaoED4Nm5NP078LpwSMcZApE1B-iL18KsheOiMdb3sVACEi7Ou91RxlZn7yQmfLoS3FgoCx7rSfIHjf-2ab4Ye5xASsdLFzND7PiF7Fuuv3qx0NswSb9V_dL3Q8sH304ZT8CjCg5XZFrVKwIVIErqi63hFli_xsOgUXFHiukCDeG6KtPG9znQzRMDxgDaApBsjGApFh4CAX5RFi40HqXSdNEJ92uMQHXaIrZT75--AzX-PBXFDtM4DFpHGzZKvMGAGc5IlM2WBZZiIIlHvFzCRwRxG2d5CtfWQpfYKN6g3GnH5w2NGf9ApwGqvmwN2AZrjsMYLKyuJwEmI-7O2HS7xj4MuJkvlcdPxhvgH9Om2mhJ-OR-_7CWSuY3H8xvWGwFlf-wDepX7TpxmKHvwucx_yoydfTy61DTwCf_P0Mf&code=EURUSD&apply=0&df=15&mf=7&yf=2019&from=15.08.2019&dt=15&mt=7&yt=2021&to=15.08.2021&p=7&f=EURUSD_190815_210815&e=.txt&cn=EURUSD&dtf=1&tmf=1&MSOR=1&mstime=on&mstimever=1&sep=1&sep2=1&datf=1&at=1"
data = fetch_url(url)

export = Exporter()

with assert_raises(FinamTooLongTimeframeError):
export._sanity_check(data)

def test_if_divide_then_get_data_when_load_tooLongPeriod(self):
date_from = date(2020,1,1)
date_till = date(2021,1,2)
export = Exporter()
export.download(83,Market(5),date_from, date_till,Timeframe.MINUTES15)

def test_get_exception_when_load_tooLongPeriod(self):
date_from = date(2020,1,1)
date_till = date(2021,2,2)
export = Exporter()
with assert_raises(FinamTooLongTimeframeError):
export.download(83,Market(5),date_from, date_till,Timeframe.MINUTES15,max_interval_divider=1)