77# third party
88import pandas as pd
99import requests
10+ from bs4 import BeautifulSoup
1011from requests .adapters import HTTPAdapter
1112from requests .exceptions import ConnectionError , RetryError , SSLError
1213from requests .packages .urllib3 .util .retry import Retry
1718
1819
1920DEFAULT_TIMEOUT = 5
20-
21+ DEFAULT_SESSION_URL = "https://finance.yahoo.com"
2122CRUMB_FAILURE = (
2223 "Failed to obtain crumb. Ability to retrieve data will be significantly limited."
2324)
@@ -1366,8 +1367,8 @@ def initialize_session(session=None, **kwargs):
13661367 return session
13671368
13681369
1369- def setup_session (session : requests .Session ):
1370- url = "https://finance.yahoo.com"
1370+ def setup_session (session : requests .Session , url : str = None ):
1371+ url = url or DEFAULT_SESSION_URL
13711372 try :
13721373 response = session .get (url , allow_redirects = True )
13731374 except SSLError :
@@ -1380,10 +1381,39 @@ def setup_session(session: requests.Session):
13801381 except SSLError :
13811382 counter += 1
13821383
1383- if not isinstance (session , FuturesSession ):
1384- return session
1384+ if isinstance (session , FuturesSession ):
1385+ response = response .result ()
1386+
1387+ # check for and handle consent page:w
1388+ if response .url .find ("consent" ) >= 0 :
1389+ logger .debug (f'Redirected to consent page: "{ response .url } "' )
1390+
1391+ soup = BeautifulSoup (response .content , "html.parser" )
1392+
1393+ params = {}
1394+ for param in ["csrfToken" , "sessionId" ]:
1395+ try :
1396+ params [param ] = soup .find ("input" , attrs = {"name" : param })["value" ]
1397+ except Exception as exc :
1398+ logger .critical (
1399+ f'Failed to find or extract "{ param } " from response. Exception={ exc } '
1400+ )
1401+ return session
1402+
1403+ logger .debug (f"params: { params } " )
1404+
1405+ response = session .post (
1406+ "https://consent.yahoo.com/v2/collectConsent" ,
1407+ data = {
1408+ "agree" : ["agree" , "agree" ],
1409+ "consentUUID" : "default" ,
1410+ "sessionId" : params ["sessionId" ],
1411+ "csrfToken" : params ["csrfToken" ],
1412+ "originalDoneUrl" : url ,
1413+ "namespace" : "yahoo" ,
1414+ },
1415+ )
13851416
1386- _ = response .result ()
13871417 return session
13881418
13891419
0 commit comments