Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,10 @@ service-account-credentials.json
# File Types
*.env
*.zip

# Python bytecode
__pycache__/
*.pyc

# macOS
.DS_Store
25 changes: 22 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,26 @@ services:
image: cornellappdev/transit-dev:${IMAGE_TAG}
env_file: .env
volumes:
- ./service-account-credentials.json:/app/service-account-credentials.json:ro
- .:/usr/src/app
- /usr/src/app/node_modules

ghopper:
image: cornellappdev/transit-ghopper:03_01_25
ports:
- "8080:3000"

- "8988:8988"

map:
image: cornellappdev/transit-map
ports:
- "8989:8989"

ghopper-walking:
image: cornellappdev/transit-ghopper-walking:latest
ports:
- "8987:8987"

live-tracking:
image: cornellappdev/transit-python:03_01_25
env_file: python.envrc
ports:
- "5000:5000"
1 change: 1 addition & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions python.envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
TOKEN=TOKEN
TWITTER_KEY=TWITTER_KEY
TWITTER_KEY_SECRET=TWITTER_KEY_SECRET
TWITTER_TOKEN=TWITTER_TOKEN
TWITTER_TOKEN_SECRET=TWITTER_TOKEN_SECRET
Binary file modified src/.DS_Store
Binary file not shown.
11 changes: 11 additions & 0 deletions src/controllers/EcosystemController.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,15 @@ router.get("/printers", async (req, res) => {
}
});

// Fetch all restaurants
router.get("/restaurants", async (req, res) => {
try {
const restaurants = await EcosystemUtils.fetchAllRestaurants();
res.status(200).json({ success: true, data: restaurants });
} catch (error) {
console.error("Error fetching restaurants:", error.message);
res.status(500).json({ error: "Failed to fetch restaurants" });
}
});

export default router;
Binary file added src/data/.DS_Store
Binary file not shown.
Binary file added src/data/db/__pycache__/database.cpython-312.pyc
Binary file not shown.
Binary file added src/data/db/__pycache__/models.cpython-312.pyc
Binary file not shown.
13 changes: 13 additions & 0 deletions src/data/db/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,18 @@ def insert_printer(location, description, latitude, longitude):
VALUES (?, ?, ?, ?)
''', (location, description, latitude, longitude))

conn.commit()
conn.close()

def insert_restaurant(name, category, address, latitude, longitude, image_url, web_url):
"""Insert a restaurant into the database."""
conn = get_db_connection()
cursor = conn.cursor()

cursor.execute('''
INSERT OR IGNORE INTO restaurants (name, category, address, latitude, longitude, image_url, web_url)
VALUES (?, ?, ?, ?, ?, ?, ?)
''', (name, category, address, latitude, longitude, image_url, web_url))

conn.commit()
conn.close()
13 changes: 13 additions & 0 deletions src/data/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@ def create_tables():
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS restaurants (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE,
category TEXT,
address TEXT,
latitude REAL,
longitude REAL,
image_url TEXT,
web_url TEXT
)
''')

conn.commit()
conn.close()

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
35 changes: 22 additions & 13 deletions src/data/scrapers/libraries.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,32 @@
import requests
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup

# URL of the CU Print directory page
URL = "https://www.cornell.edu/about/maps/directory/?notes=Library&caption=%20Libraries"
URL = "https://www.cornell.edu/about/maps/directory/?layer=Library&sublayer="

def scrape_libraries():
# Send a GET request to fetch the HTML content
response = requests.get(URL)
soup = BeautifulSoup(response.text, 'html.parser')
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(URL, wait_until="networkidle")

# Locate the table
# Get the rendered HTML after JS loads
content = page.content()
browser.close()

soup = BeautifulSoup(content, 'html.parser')
table = soup.find("table", {"id": "directoryTable"})
rows = table.find("tbody").find_all("tr")
if not table:
print("Could not find the table.")
return []

# Extract data
rows = table.find("tbody").find_all("tr")
data = []
for row in rows:
cols = row.find_all("td")
if len(cols) < 3:
continue
location_name = cols[0].text.strip().split('\n\n\n')[0]

location_name = cols[0].text.strip()
address = cols[1].text.strip()
coordinates_string = cols[2].text.strip()
coordinates = [float(x) for x in coordinates_string.split(', ')]
Expand All @@ -30,5 +36,8 @@ def scrape_libraries():
"Address": address,
"Coordinates": coordinates
})

return data

return data

if __name__ == "__main__":
scrape_libraries()
58 changes: 27 additions & 31 deletions src/data/scrapers/printers.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,33 @@
import requests
from bs4 import BeautifulSoup

# URL of the CU Print directory page
URL = "https://www.cornell.edu/about/maps/directory/?layer=CUPrint&caption=%20CU%20Print%20Printers" # Replace with the actual URL
from playwright.sync_api import sync_playwright

def scrape_printers():
# Send a GET request to fetch the HTML content
response = requests.get(URL)
soup = BeautifulSoup(response.text, 'html.parser')
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto("https://www.cornell.edu/about/maps/directory/?layer=CUPrint")

# Wait for the dynamic table to load
page.wait_for_selector("table#directoryTable")

rows = page.query_selector_all("table#directoryTable > tbody > tr")
data = []

# Locate the table
table = soup.find("table", {"id": "directoryTable"})
rows = table.find("tbody").find_all("tr")
for row in rows:
cols = row.query_selector_all("td")
if len(cols) < 3:
continue
location = cols[0].inner_text().strip()
description = cols[1].inner_text().strip()
coordinates = [float(x.strip()) for x in cols[2].inner_text().split(",")]

# Extract data
data = []
for row in rows:
cols = row.find_all("td")
if len(cols) < 3: # Ensure row has enough columns
continue

location_name = cols[0].text.strip()
description = cols[1].text.strip()

# Extract coordinates from the hyperlink <a> tag inside <td>
coordinates_link = cols[2].find("a")
coordinates_string = coordinates_link.text.strip() if coordinates_link else ""
coordinates = [float(x) for x in coordinates_string.split(', ')]
data.append({
"Location": location,
"Description": description,
"Coordinates": coordinates
})

browser.close()
return data

data.append({
"Location": location_name,
"Description": description,
"Coordinates": coordinates
})
return data
if __name__ == "__main__":
scrape_printers()
70 changes: 70 additions & 0 deletions src/data/scrapers/restaurants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import requests
import pprint
from playwright.sync_api import sync_playwright

def scrape_restaurants():
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
page.goto("https://www.visitithaca.com")
page.wait_for_timeout(1000)

# Get a fresh token from token endpoint
token_response = context.request.get("https://www.visitithaca.com/plugins/core/get_simple_token/")

token = token_response.text()

# Build your API request URL with the new token
api_url = (
"https://www.visitithaca.com/includes/rest_v2/plugins_listings_listings/find/"
"?json=%7B%22filter%22%3A%7B%22%24and%22%3A%5B%7B%22filter_tags%22%3A%7B%22%24in%22%3A%5B"
"%22site_primary_subcatid_307%22%2C%22site_primary_subcatid_308%22%2C%22site_primary_subcatid_309%22%2C"
"%22site_primary_subcatid_311%22%2C%22site_primary_subcatid_312%22%2C%22site_primary_subcatid_504%22%2C"
"%22site_primary_subcatid_505%22%2C%22site_primary_subcatid_506%22%2C%22site_primary_subcatid_508%22%2C"
"%22site_primary_subcatid_509%22%2C%22site_primary_subcatid_510%22%2C%22site_primary_subcatid_511%22%2C"
"%22site_primary_subcatid_512%22%2C%22site_primary_subcatid_513%22%2C%22site_primary_subcatid_514%22%2C"
"%22site_primary_subcatid_516%22%2C%22site_primary_subcatid_520%22%2C%22site_primary_subcatid_532%22%2C"
"%22site_primary_subcatid_536%22%5D%7D%7D%2C%7B%22regionid%22%3A%7B%22%24in%22%3A%5B8%5D%7D%7D%5D%7D%2C"
"%22options%22%3A%7B%22limit%22%3A100%2C%22skip%22%3A0%2C%22count%22%3Atrue%2C%22castDocs%22%3Afalse%2C"
"%22fields%22%3A%7B%22recid%22%3A1%2C%22title%22%3A1%2C%22primary_category%22%3A1%2C%22address1%22%3A1%2C"
"%22city%22%3A1%2C%22url%22%3A1%2C%22isDTN%22%3A1%2C%22latitude%22%3A1%2C%22longitude%22%3A1%2C"
"%22primary_image_url%22%3A1%2C%22qualityScore%22%3A1%2C%22rankOrder%22%3A1%2C%22weburl%22%3A1%2C"
"%22dtn.rank%22%3A1%2C%22yelp.rating%22%3A1%2C%22yelp.url%22%3A1%2C%22yelp.review_count%22%3A1%2C"
"%22yelp.price%22%3A1%2C%22booking_price_avg%22%3A1%2C%22booking_price_total%22%3A1%2C%22booking_full%22%3A1%7D%2C"
"%22hooks%22%3A%5B%5D%2C%22sort%22%3A%7B%22rankorder%22%3A1%2C%22sortcompany%22%3A1%7D%7D%7D"
f"&token={token}"
)

# Make the API request
api_response = context.request.get(api_url)

# Parse JSON data
json_body = api_response.json()

# Extract the restaurant data
restaurants_data = json_body.get("docs", {}).get("docs", [])

data = []
for item in restaurants_data:
name = item.get("title")
category = item.get("primary_category", {}).get("subcatname")
address = item.get("address1")
coordinates = [item.get("latitude"), item.get("longitude")]
image_url = item.get("primary_image_url")
web_url = item.get("weburl")

data.append({
"Name": name,
"Category": category,
"Address": address,
"Coordinates": coordinates,
"Image URL": image_url,
"Web URL": web_url,
})

browser.close()
return data

if __name__ == "__main__":
scrape_restaurants()
8 changes: 7 additions & 1 deletion src/data/scripts/populate_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from data.scrapers.libraries import scrape_libraries
from data.scrapers.printers import scrape_printers
from data.db.database import insert_library, insert_printer
from data.scrapers.restaurants import scrape_restaurants
from data.db.database import insert_library, insert_printer, insert_restaurant
from data.db.models import create_tables

def populate_db():
Expand All @@ -19,6 +20,11 @@ def populate_db():
printers = scrape_printers()
for printer in printers:
insert_printer(printer['Location'], printer['Description'], printer['Coordinates'][0], printer['Coordinates'][1])

# Insert restaurants
restaurants = scrape_restaurants()
for restaurant in restaurants:
insert_restaurant(restaurant['Name'], restaurant['Category'], restaurant['Address'], restaurant['Coordinates'][0], restaurant['Coordinates'][1], restaurant['Image URL'], restaurant['Web URL'])

if __name__ == "__main__":
populate_db()
Binary file modified src/data/transit.db
Binary file not shown.
24 changes: 16 additions & 8 deletions src/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,7 @@
],
"responses": {
"200": {
"description": "{\"success\": true, \"data\": [\"id\": 1, \"location\": \"Africana Studies and Research Center\", \"address\": \"310 Triphammer Rd, Ithaca, NY 14850\", \"latitude\": 42.4574, \"longitude\": -76.4823]}",
"schema": {
"$ref": "#/components/schemas/BusStop"
}
"description": "{\"success\": true, \"data\": [\"id\": 1, \"location\": \"Africana Studies and Research Center\", \"address\": \"310 Triphammer Rd, Ithaca, NY 14850\", \"latitude\": 42.4574, \"longitude\": -76.4823]}"
}
}
}
Expand All @@ -66,10 +63,21 @@
],
"responses": {
"200": {
"description": "{\"success\": true, \"data\": [{\"id\": 1, \"location\": \"Akwe:kon\", \"description\": \"Color - Room 115\", \"latitude\": 42.4563, \"longitude\": -76.4806}]}",
"schema": {
"$ref": "#/components/schemas/BusStop"
}
"description": "{\"success\": true, \"data\": [{\"id\": 1, \"location\": \"Akwe:kon\", \"description\": \"Color - Room 115\", \"latitude\": 42.4563, \"longitude\": -76.4806}]}"
}
}
}
},
"/api/v1/restaurants": {
"get": {
"summary": "Returns a list of all restaurants in Ithaca.",
"description": "A list of all restaurants.",
"produces": [
"application/json"
],
"responses": {
"200": {
"description": "{\"success\": true, \"data\": [{\"id\": 1, \"name\": \"Alley Cat Cafe\", \"category\": \"Coffee\", \"address\": \"112 N Cayuga St.\", \"latitude\": 42.4407309, \"longitude\": -76.4950526, \"image_url\": \"https://assets.simpleviewinc.com/simpleview/image/upload/crm/ithacany/Alley-Cat-Logo_1872F7A2-5056-A36A-09631186ACCA298F-1872f6fe5056a36_1872f7f6-5056-a36a-098fa97e4ad0cd49.jpg\", \"web_url\": \"https://www.alleycatithaca.com\"}]}"
}
}
}
Expand Down
Loading