diff --git a/app.py b/app.py index 07b7609..8a356c5 100644 --- a/app.py +++ b/app.py @@ -23,6 +23,8 @@ def eval(cmd, input=None): return wordOfTheDay.eval() elif cmd['service'] == 'MBTA': return MBTA.eval(cmd['args']) + elif cmd['service'] == 'H': ##Crimson headlines + return headlines.eval() else: return "ERROR 42: service not recognized" diff --git a/data.py b/data.py index e338a06..afb70a1 100755 --- a/data.py +++ b/data.py @@ -143,6 +143,7 @@ {'service': 'S', 'args':{'endpoint': 'route', 'routeid': '4007650' , 'label': 'Allston Campus Express Shuttle Route'}, 'tags':['ALLSTON', 'CAMPUS', 'EXPRESS', 'SHUTTLE', 'ROUTE']}, {'service': 'W', 'args':{}, 'tags':['WEATHER']}, {'service': 'D', 'args':{}, 'tags':['WORDOFTHEDAY']}, + {'service': 'H', 'args':{}, 'tags':['HEADLINES']}, {'service': 'MBTA', 'args': {'pg': ['green/lake']}, 'tags': ['MBTA', 'SUBWAY', 'T', 'SCHEDULE', 'LINE', 'GREEN', 'COLLEGE', 'BOSTON']}, {'service': 'MBTA', 'args': {'pg': ['green/sougr']}, 'tags': ['MBTA', 'SUBWAY', 'T', 'SCHEDULE', 'LINE', 'ST', 'STREET', 'SOUTH', 'GREEN']}, {'service': 'MBTA', 'args': {'pg': ['green/chill']}, 'tags': ['MBTA', 'SUBWAY', 'T', 'SCHEDULE', 'LINE', 'HILL', 'AVE', 'GREEN', 'CHESTNUT']}, diff --git a/services/__init__.py b/services/__init__.py index 5bbee48..60aa9cd 100644 --- a/services/__init__.py +++ b/services/__init__.py @@ -3,3 +3,4 @@ from weather import weather from wordOfTheDay import wordOfTheDay from MBTA import MBTA +from headlines import headlines diff --git a/services/headlines/__init__.py b/services/headlines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/headlines/headlines.py b/services/headlines/headlines.py new file mode 100644 index 0000000..6c74b03 --- /dev/null +++ b/services/headlines/headlines.py @@ -0,0 +1,42 @@ +import urllib2, urllib +import re +from bs4 import BeautifulSoup + +##################################### +## Crimson Headlines Function ## +##################################### + +def getHeadlines(): + url = 'https://www.thecrimson.com/' + hdr = {'User-Agent': 'Chrome'} + req = urllib2.Request(url,headers=hdr) + website = urllib2.urlopen(req) + soup = BeautifulSoup(website.read(), 'html.parser') + msg = "The 5 most read Crimson articles:\n" + + try: + headlines = soup.find("div", {"id": "most-read-box"}) + for li in headlines.find_all('li'): + link = li.find('a') + msg += link.get_text()+ " " + "https://www.thecrimson.com" + link['href']+ " " + + + except Exception, e: + print str(e) + return "Could not find headline data." + + return msg + +############################ +## Top-Level ## +############################ + +def makeSpecial(): + s = 'This will get the top Crimson headlines.' + return s + +## return proper format +special = makeSpecial() + +def eval(): + return getHeadlines()