From fcf567909966edbdf74e5a46f4e35735ad66d628 Mon Sep 17 00:00:00 2001 From: Lauren Ah-Hot Date: Wed, 19 Mar 2025 18:04:20 -0400 Subject: [PATCH 1/6] Wrote scraper function for restaurants --- docker-compose.yml | 25 +++++++++++++++++--- src/data/db/database.py | 13 +++++++++++ src/data/db/models.py | 13 +++++++++++ src/data/scrapers/restaurants.py | 40 ++++++++++++++++++++++++++++++++ src/data/scripts/populate_db.py | 8 ++++++- 5 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 src/data/scrapers/restaurants.py diff --git a/docker-compose.yml b/docker-compose.yml index efb2be08..0263584f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,26 @@ services: image: cornellappdev/transit-dev:${IMAGE_TAG} env_file: .env volumes: - - ./service-account-credentials.json:/app/service-account-credentials.json:ro + - .:/usr/src/app + - /usr/src/app/node_modules + + ghopper: + image: cornellappdev/transit-ghopper:03_01_25 ports: - - "8080:3000" - + - "8988:8988" + + map: + image: cornellappdev/transit-map + ports: + - "8989:8989" + + ghopper-walking: + image: cornellappdev/transit-ghopper-walking:latest + ports: + - "8987:8987" + + live-tracking: + image: cornellappdev/transit-python:03_01_25 + env_file: python.envrc + ports: + - "5000:5000" diff --git a/src/data/db/database.py b/src/data/db/database.py index 8ef6331f..608300a0 100644 --- a/src/data/db/database.py +++ b/src/data/db/database.py @@ -32,5 +32,18 @@ def insert_printer(location, description, latitude, longitude): VALUES (?, ?, ?, ?) ''', (location, description, latitude, longitude)) + conn.commit() + conn.close() + +def insert_restaurant(name, category, address, latitude, longitude, image_url, web_url): + """Insert a restaurant into the database.""" + conn = get_db_connection() + cursor = conn.cursor() + + cursor.execute(''' + INSERT OR IGNORE INTO restaurants (name, category, address, latitude, longitude, image_url, web_url) + VALUES (?, ?, ?, ?, ?, ?, ?) + ''', (name, category, address, latitude, longitude, image_url, web_url)) + conn.commit() conn.close() \ No newline at end of file diff --git a/src/data/db/models.py b/src/data/db/models.py index d35f9269..69567993 100644 --- a/src/data/db/models.py +++ b/src/data/db/models.py @@ -30,6 +30,19 @@ def create_tables(): ) ''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS restaurants ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE, + category TEXT, + address TEXT, + latitude REAL, + longitude REAL, + image_url TEXT, + web_url TEXT + ) + ''') + conn.commit() conn.close() diff --git a/src/data/scrapers/restaurants.py b/src/data/scrapers/restaurants.py new file mode 100644 index 00000000..61c7073a --- /dev/null +++ b/src/data/scrapers/restaurants.py @@ -0,0 +1,40 @@ +import requests +import pprint + +# URL of the Ithaca restaurant directory API +URL = "https://www.visitithaca.com/includes/rest_v2/plugins_listings_listings/find/?json=%7B%22filter%22%3A%7B%22%24and%22%3A%5B%7B%22filter_tags%22%3A%7B%22%24in%22%3A%5B%22site_primary_subcatid_307%22%2C%22site_primary_subcatid_308%22%2C%22site_primary_subcatid_309%22%2C%22site_primary_subcatid_311%22%2C%22site_primary_subcatid_312%22%2C%22site_primary_subcatid_504%22%2C%22site_primary_subcatid_505%22%2C%22site_primary_subcatid_506%22%2C%22site_primary_subcatid_508%22%2C%22site_primary_subcatid_509%22%2C%22site_primary_subcatid_510%22%2C%22site_primary_subcatid_511%22%2C%22site_primary_subcatid_512%22%2C%22site_primary_subcatid_513%22%2C%22site_primary_subcatid_514%22%2C%22site_primary_subcatid_516%22%2C%22site_primary_subcatid_520%22%2C%22site_primary_subcatid_532%22%2C%22site_primary_subcatid_536%22%5D%7D%7D%2C%7B%22regionid%22%3A%7B%22%24in%22%3A%5B8%5D%7D%7D%5D%7D%2C%22options%22%3A%7B%22limit%22%3A100%2C%22skip%22%3A0%2C%22count%22%3Atrue%2C%22castDocs%22%3Afalse%2C%22fields%22%3A%7B%22recid%22%3A1%2C%22title%22%3A1%2C%22primary_category%22%3A1%2C%22address1%22%3A1%2C%22city%22%3A1%2C%22url%22%3A1%2C%22isDTN%22%3A1%2C%22latitude%22%3A1%2C%22longitude%22%3A1%2C%22primary_image_url%22%3A1%2C%22qualityScore%22%3A1%2C%22rankOrder%22%3A1%2C%22weburl%22%3A1%2C%22dtn.rank%22%3A1%2C%22yelp.rating%22%3A1%2C%22yelp.url%22%3A1%2C%22yelp.review_count%22%3A1%2C%22yelp.price%22%3A1%2C%22booking_price_avg%22%3A1%2C%22booking_price_total%22%3A1%2C%22booking_full%22%3A1%7D%2C%22hooks%22%3A%5B%5D%2C%22sort%22%3A%7B%22rankorder%22%3A1%2C%22sortcompany%22%3A1%7D%7D%7D&token=6520d36b63a2aa0c7d91ebb18aa5d2c6" + +def scrape_restaurants(): + # Send a GET request to fetch the API data + response = requests.get(URL) + print(response.status_code) + print(response.text) + restaurants_data = response.json().get("docs").get("docs") + pprint.pprint(restaurants_data, compact=True) + + # Extract data + data = [] + for item in restaurants_data: + name = item.get("title") + category = item.get("primary_category").get("subcatname") + address = item.get("address1") + coordinates = [] + coordinates.append(item.get("latitude")) + coordinates.append(item.get("longitude")) + image_url = item.get("primary_image_url") + web_url = item.get("weburl") + + data.append({ + "Name": name, + "Category": category, + "Address": address, + "Coordinates": coordinates, + "Image URL": image_url, + "Web URL": web_url, + }) + + print(data) + return data + +if __name__ == "__main__": + scrape_restaurants() \ No newline at end of file diff --git a/src/data/scripts/populate_db.py b/src/data/scripts/populate_db.py index fa6a23f4..59a4f7d2 100644 --- a/src/data/scripts/populate_db.py +++ b/src/data/scripts/populate_db.py @@ -3,7 +3,8 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) from data.scrapers.libraries import scrape_libraries from data.scrapers.printers import scrape_printers -from data.db.database import insert_library, insert_printer +from data.scrapers.restaurants import scrape_restaurants +from data.db.database import insert_library, insert_printer, insert_restaurant from data.db.models import create_tables def populate_db(): @@ -19,6 +20,11 @@ def populate_db(): printers = scrape_printers() for printer in printers: insert_printer(printer['Location'], printer['Description'], printer['Coordinates'][0], printer['Coordinates'][1]) + + # Insert restaurants + restaurants = scrape_restaurants() + for restaurant in restaurants: + insert_restaurant(restaurant['Name'], restaurant['Category'], restaurant['Address'], restaurant['Coordinates'][0], printer['Coordinates'][1], restaurant['Image URL'], restaurant['Web URL']) if __name__ == "__main__": populate_db() \ No newline at end of file From a5ad127e265b667b22497e5576250c97b9acd477 Mon Sep 17 00:00:00 2001 From: Lauren Ah-Hot Date: Sat, 22 Mar 2025 00:45:14 -0400 Subject: [PATCH 2/6] Adding restaurants to ecosystem --- .DS_Store | Bin 6148 -> 6148 bytes package-lock.json | 1 + python.envrc | 5 + src/.DS_Store | Bin 10244 -> 10244 bytes src/controllers/EcosystemController.js | 11 ++ src/data/.DS_Store | Bin 0 -> 6148 bytes .../db/__pycache__/database.cpython-312.pyc | Bin 0 -> 2471 bytes .../db/__pycache__/models.cpython-312.pyc | Bin 0 -> 1912 bytes .../__pycache__/libraries.cpython-312.pyc | Bin 0 -> 1680 bytes .../__pycache__/printers.cpython-312.pyc | Bin 0 -> 1734 bytes .../__pycache__/restaurants.cpython-312.pyc | Bin 0 -> 3925 bytes src/data/scrapers/restaurants.py | 96 ++++++++++++------ src/data/scripts/populate_db.py | 2 +- src/data/transit.db | Bin 24576 -> 49152 bytes src/utils/EcosystemUtils.js | 29 +++++- 15 files changed, 109 insertions(+), 35 deletions(-) create mode 100644 python.envrc create mode 100644 src/data/.DS_Store create mode 100644 src/data/db/__pycache__/database.cpython-312.pyc create mode 100644 src/data/db/__pycache__/models.cpython-312.pyc create mode 100644 src/data/scrapers/__pycache__/libraries.cpython-312.pyc create mode 100644 src/data/scrapers/__pycache__/printers.cpython-312.pyc create mode 100644 src/data/scrapers/__pycache__/restaurants.cpython-312.pyc diff --git a/.DS_Store b/.DS_Store index 5b928bb719a93f6dc27edc3b82270d14a3109785..c4187498caaa6fa1164c037552c35bd0acfac614 100644 GIT binary patch literal 6148 zcmeHKO>fgM7=FDLnr;IX1kx@@k+^Q9i~*XsblrBC1RPcb2S6ohvn?!*t29|vRh4>% z|G;11$}i!+aDwNxJ&~mBiV*T!IgkDNef;snj%y+koow)ss6j*y5@U50k3SgKb6T<$ z<6*K;Z3L9i7WGNea4Fh4tO8bne@y}2ySHh89owgK%J1J1d}DZ;G{tU?jw0FjBk3ca zIeSRWpURLUTBUoHxRhZMOXjll6ZU*w5nA=?uM%@WY5p z(Vt7vRYa$BLgP~0k5XKXp3@QS(;>a2E*%!5S58aO>lnF&ZUOZ$a?WKu<8m)br!P63 zZeh%x(%Qqg0(>naqg#_cLT^cNv1ek{@-3{FTDXFzR4enQP=b|d4{XjO@cQYVx~Mto-v;a8G26D36a3h>Tx+leFV-GGf~>- z+pi#YY22)|xS3PhMER5P$gJCM{wNB^X{+^JtW>JkuGgHJv*x_-p2ZD|CIuwdeA-SU`pm}U78%9wJy>JBsPv) nXjBwD%yFzTd=zgWDMOvh8DQJsLL)pd`$s^@U>{b2zpB75tMC;% delta 132 zcmZoMXfc=|#>B)qu~2NHo+2a5#(>?7tSk%+41ALXSY#%@VQb(pvNY3CFflNiT*7Y8 z2o~lk3ogpb$7 diff --git a/package-lock.json b/package-lock.json index 826744f3..605701ae 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3242,6 +3242,7 @@ "resolved": "https://registry.npmjs.org/nodemon/-/nodemon-3.1.9.tgz", "integrity": "sha512-hdr1oIb2p6ZSxu3PB2JWWYS7ZQ0qvaZsc3hK8DR8f02kRzc8rjYmxAIvdz+aYC+8F2IjNaB7HMcSDg8nQpJxyg==", "dev": true, + "license": "MIT", "dependencies": { "chokidar": "^3.5.2", "debug": "^4", diff --git a/python.envrc b/python.envrc new file mode 100644 index 00000000..f426cffc --- /dev/null +++ b/python.envrc @@ -0,0 +1,5 @@ +TOKEN=TOKEN +TWITTER_KEY=TWITTER_KEY +TWITTER_KEY_SECRET=TWITTER_KEY_SECRET +TWITTER_TOKEN=TWITTER_TOKEN +TWITTER_TOKEN_SECRET=TWITTER_TOKEN_SECRET diff --git a/src/.DS_Store b/src/.DS_Store index a39c96c47949260ec6c8cd9be8d9394560f28cf6..13a4eeddaf063289b536ebcca45011205674e3e7 100644 GIT binary patch delta 395 zcmZn(XbG6$&nUPtU^hRb;A9>FDH+e4{N$vZ{3Hej1_2;;1Y*7aV8FltQZV_BK!b>h zrMZrRk*PtgjzYDik%5kaiLu$_0zu=+`~n*$TMCFflm!>%<>cq319dY3F$+TqLn1>7 zLn4~#>yb@IR)^P?dbn|D)^IRnGUNfRN@b{E&|}DAC`Pr8Q5MZQ9)@Cua-hS~fiRV! z2%?7}pCJ!TUpJ~g4F*<*QlM`#8FDbJ_=&7^@)^NCoME+jq0lVu&Fl)lST>sqGcyAK D6fA6f delta 167 zcmZn(XbG6$&&ahgU^hP_*JK63&9Zz9DGd1x$qd;HsSHI7xw_lX!=0JRz# c7@`}qIag#B_r``ZjGNgNez9ymE5^(W0F`MgtpET3 diff --git a/src/controllers/EcosystemController.js b/src/controllers/EcosystemController.js index b302eb29..4dc6ed8e 100644 --- a/src/controllers/EcosystemController.js +++ b/src/controllers/EcosystemController.js @@ -25,4 +25,15 @@ router.get("/printers", async (req, res) => { } }); +// Fetch all restaurants +router.get("/restaurants", async (req, res) => { + try { + const restaurants = await EcosystemUtils.fetchAllRestaurants(); + res.status(200).json({ success: true, data: restaurants }); + } catch (error) { + console.error("Error fetching restaurants:", error.message); + res.status(500).json({ error: "Failed to fetch restaurants" }); + } +}); + export default router; diff --git a/src/data/.DS_Store b/src/data/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..443fc85bcd1a16b5e456ed69c38cc567673255b2 GIT binary patch literal 6148 zcmeH~&u`N(6vv;tLfuVB6=KpZNRhZ!rHlcZxP&qeTo(ifKqXlU1TwoM>3XOt%C&lf9z5@Kl@w{J zD5KYONE7-%Gx!ocB@O5uye8F{_cQ9w&MT31L^JdZKln{EXw|X*QXUd)FRnRmGW_6s90c zCrEkmEK{kTkMvBXrKyeVfb7cdxVN)d?Csy{2fKHM%YLxf-yQaYz1w$}%dXtMar6G+ z_X4Eli6>L*C^NB-jY+0F+YIPKUn+a}|G3mT+=vRX7m6C3Jjct^1iIG7% z)ibE%w zEF&AbL=@^O*J>XKQeJ|Y#Lr3PQ&2W7|TD6 z`SE9QqefhZFa6Xn8v0~P;FFsIpG<4|WU@#t>E>~D`Ev)2 z8xavA)?@lnCy5baYB z;vjip?dHbkoAn*-y}y-c?CL=R#I)t{rSV61_GESW_R{SiCjM`LsS*wqOgaIk*355b zzywSGg%^ZfgP7F@c*6X>9A=2fSt?G%du7SC4ao&=A$>x;BE+d62o6Us@|mK(3<$^f_Ft5i1S9!zH1dMlHuV zrhylY$-*g9L#O0C&qNC{Olx{!O5mjpMj}oDJ3`o?liq+%S{Ea4^I4a8btk#_PmFSI zP2CvSyuS6tFWRr!9c^4d>W*|Grc&KRe2ZAbahY7z={!@AN&Jf`ddc-IYOy$LTDVwb z>Wz`*py>`GRAD>vSMl6{XW>-z2+L%Ydqi|SsYYjt2{U^TsxUVFIN cx9ROBh8C4WIU>Dts3xV}!`Hbu62kZU4`O&7e*gdg literal 0 HcmV?d00001 diff --git a/src/data/db/__pycache__/models.cpython-312.pyc b/src/data/db/__pycache__/models.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e1a0893ca69f5df68df07bf36e5fe64e40efc1f GIT binary patch literal 1912 zcmcgs&2Jk;6rZ)%-nAVErOGAcG7^W>QtA~@WDbbn#9MHaY+7d{wJvBi-krGXu6NDs z7-}n(9H~{XIG9_hgw!5E`BS(N35CrC@p0g05l$5cX4W6Xm0ggi7-{G2d++z=eaxHp zZ91IFvL!wM`904<>;i=$2A#ca3Ac0X;m3Ta$ndNYEx=q z38NDPFar_!M?~}-0@P9yp@z&T3g!dU(l64t#7wNMtP_kV_8~@Cs%o~(=Nc=lbge#2Fdw5DYF60Ui&1?)PiVxvi%Db7B{m9b`Ap+R$K z87Ywbj~(_H-V7wub6srGKq{;m*Yo-ZLELN--y?y9?_sk^F%x+8x=n+a>3BZQL;{i9 z5I$9XK5ig)<}xOij0+lXEy7Y)mmBih>IOZ$u}AW{E0H0W{OSkAQN34vX?8! zyo+5+@rkKeh$6+RD0R=mj-PF`g0vaFZ)5Ph2_FbDcfH594)`T?p|{#y{VI2ydaYA9 zNiXzncW;0GQRnK9V)9Ts(E31Ll>Y#7EYV*CvD8dailb5tbrHl8hZ_eQ54KK3`S9Ao zwTGK$%o7M?!>A+MHH=J(@QE5kJU_PEt6#r;{OXnC)LR|-Bstex?k@LMx+`DYd%E~^|2TQ2BmFKe c3k&^{0K~b^+TTT&eh?QPZug@=ycl}_3rCTpVE_OC literal 0 HcmV?d00001 diff --git a/src/data/scrapers/__pycache__/libraries.cpython-312.pyc b/src/data/scrapers/__pycache__/libraries.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3781a20b60e24a7dc72decc6d6f662d9503f6137 GIT binary patch literal 1680 zcmbVMO-vg{6rTO@4_+IC6JVRvDYOLY6xbF;X-Y#QimQT(qS7>~iV~sq&SKVCdzYDU z`O#W(r5;R+1W2s_DH0r0ap|$g9x7E;FSwR!Ym!Ql+6#wDw5W&X)LG-jIdIBIGw;p& z`R1FaH}gv*5&~>|9{e@^mjJ*Y?#3H@Lpgq#DO&&n$eCcqmAuJi`5B%AP?!;*_yw2| zm$`)R&`r3N;ArGsUCk3?E^pqp^7hJy^Mu%VL{SzO7l$+p<#f{=(qUdvQ&yfRS=Gi0 zG?1nA2d$6A9~T?E+5sDop~2yX$xdCrV? z0nflB5xvcRBd!35i%=g9$OJvTDIvGtKq_z!mywztPlG&UWiXm?yu+Uv=krd@M7-Rl zgnrLk5YnI^IzkfM`w4)8&+(lZA2^NolcFT+`-hFyx(jf>{VKbm@7}3qh6{ey+?Gr{zp;5szCEtcNr{4K)59t8^hmIz0Z) zDgRA=Eh>V7R0ug@rpxm;r4w+1js#mD2p%d2)_xX&Bi?4X9GivEB1lAUtw{4EYYy2e z!aAZpqNYrp@&=@>r(OMV-R2`H3ocWhAU5Hl+qG`ff)ybY3LUx)d05}_1KkOM2B$1d zb$g!%#vnvG##9=&ECh|5%3eq!OoK>Y$m^J3Dx`Hng*zWjAs3X2L|-CQoHKHe2Hka5 zHBBm*dXD-qL59tQZ5o97=1fZ^)UVpMo`WZl*tT+5ry^!=7y0UN$Xdiygep;~z;r5V zmWgS^Y!IBysac%{)CS9#)?_*B9+yi~Zi!0Im~oilc0V(oYRVl}J*=2&9_dQb((+k7 zN3b$}@?R)pww=^JQwUOX*dU5Q=2cA{Y-|Tps&=1UK*2~;T$&2jkZRZQ>i-D)G=}VD z+IC`|H9aGGmGN-jKfb`qUj%Z z(`ET)DN>7dZY*r}Zw+h?RAas6SZ^iPR|?ePJ$pSnpX}b)Cyxi<6~bN&zX(}@lM literal 0 HcmV?d00001 diff --git a/src/data/scrapers/__pycache__/printers.cpython-312.pyc b/src/data/scrapers/__pycache__/printers.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a76130d5079888837b05be5af987ed3bb1c9c4e GIT binary patch literal 1734 zcmbVM-A^M`6un=VJH)J|9lqu7h**mxL zk!eyBABvIyiDn_$1RgQ`6F&H0)Q8p@Tko!kiN5fL5+B?r@01SZffv2$J?GqW&-wLt z&)jpr4~K(*j!*nQX1?VC_}y8!gKZioFH++Xkb%r-V9wF3#^l&JmH{$1$I1MsV2)p9 zlAa?cVJ69t@Y{-*$Lc~}yJzIhwT~7tHmy-XSYBQplnkURnl`A&c|lAYc`W2a(-LGA zDH1l&s_>R3t}1AJa`t_s>bO@DO{^OFc+b$Qle1JbyslP|B{f+#l#fb|&XW~N{t>{8 zyP0(l<2tCu^uVX%uo+hX*g>vVV#4M$E=bwpR{3M*}W8 zUAEoLZ0W(@faNyr^EGGv)z1fV%9!e`TgKa<~w*gV{?h)~+SDx#%gd z2_-fIB=!NVcLCxnZj24xm~D0=!h=Gn0Bt_o>F&3*?X&%MNRB+_+zbY6{J;a7zejNy zIty?aB%?EHa1rOUK~qGQf`|u;X-y%lDwDRedw##+^Wd~0uM!p`I^`qh?jq-c9XU7h z$gv0`60pYhcPBaGzhg+E(+vcs6iY&CqcsSeGz=uGx`-7k86tk9Jj^Q=wg{I|FyUrD zxPu&E!eeCx6MjL}W#V__yr^k}(-fU}EKJd;GBp(w&w^%%n0Q6gRCGBRb|5X&&@F}V z7VT){X}}?4*&@6wVv%svCcI>5771%jlJmNlQwWsLF^x(srt_MrFOiP(9*2aUASUvRP6u}>n&P{vWemq%? zbQhnFR`0A2v^~)V(Qon!tI;W>(g7AeZJg#tNi?MH8fIurw*f=qwAwvnJT3>O?r^io`g$VuQzTZa{8}P)33k@H390Q;+cOeXUKU?xs3$6bh^NbQ*q2O4D$p; Vegz52!KW}q?u5V2MA0DK{1;E5o)Z87 literal 0 HcmV?d00001 diff --git a/src/data/scrapers/__pycache__/restaurants.cpython-312.pyc b/src/data/scrapers/__pycache__/restaurants.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e1afe133b39669fa542eeb9c5796e88091c8ec20 GIT binary patch literal 3925 zcma)9Pi))P8Gn>0iLxb2mL1D~k~*%dw$>WclB3y8=e9|VWLb+0SW^@YFc38PEK#ON zmZU7plh7S@P_P1-PfoB6>9$LX^pKsF<4(KmVoe>Sy=+5(UDlh{U?`B&K2oG&S+OKSLS}*)Rm7(27CgloUa~V`c6_j^_bH(tT_Z=EIU6K0I^T4rDr7BcD(2sE~S3MO^ zK6rxMTk+MfH*Y!XlKtLLgJ|cYx;mMC2l|$^K{d1zv)vD4JIzQHr=tk}n{T1IY z(XTyKKdj;Zv|sUe?FTA>{HYV{GZp5A5e6jmz^TNG<=L*h8qA{VP$jsIe*7DRs>7Ax z7iuuwm_o-Of&+XGbuEOCu`t|4j2wdqxrk%rVOO5@R3pb=MqEradJH1cMT}O4^6~!s zS3;F=Wuy|RL@T3W^jF<_QRuVZ(FoW))vx1n0=PINcwm#l$E^{CK0xy*g$%0Xq0q;O zf=?Okct1uZYGL&H4cMGa4)dan4P$pEXPTOEfn`gj(&DycNTy`wc!6IO)PnSH*qyr< zPC&4lT+B*}!3wI5*(^3WLn>%8=1g@9D=dG`kI+f#k|M}O5gV+I4U^kWyY@I)GE7Oy z8r=@NDJdfRPTo+JwZzJ$L^{1G$tKpJlUTmknMtJ2^NQ%qWG)?saVDQNj$kF_IW(Py zm19oRr2?;)Iit8C@TMek%S$U3D18~);}EL@5pNDcq*4PB5X6abWR}hkL}UgczB>>x zXtkN8L95NAmIiqln2$_qU_MgI1HBB)SZZKC(gWABJTTH_+e>CWv10!L*=&OAI4h}& zs^D1LzZ(^^iHZuwtv=*2ey3{b4N{oQsiZD|ABS7^%&E6 zW$TB!=pH{#s3p8{)PiU#ix#*?yNqQGW~~FRfBvY24j6AsxWpZ*kmJ&p!Xkv;_%>8^ z3#?n`nSeQdyVp;z)KpErH>0k6vnY23Kux=(Ik46WxZ`IYro?Hgl_&?1nH_-y8os9PSFE@Y^4GAn!m6UOBWVcd$7pzM~JjJnF* z+7f@KLJNLEU$-q>c4$Zf7dzY|!;|t{v^e|WkKW%6e1tcw&O%tXez+1w z$mwuH77N5L^F>9-kzvKIkYeH;lQ7mm17$*IRZ}I=5-*wDrmAzMRKRM{Bmo_NQiNNN zc;VV4w5>&3*BkNknuZmT_yk!s@Is7G8VPLZYRSMl2?%P(18rL$vD3!Ew$WhAV@RBJ z@sa?qNfw()$m(>ZLCZe3p^9Y^wQgL#2;4LcPUKAU)g(d6v@r+c3^qFd4Hu5U#^9~ z;HkOhc%nX@XpFyB3&Qwx;@;ZbwfhqfV~v^f&6(Bu%<7|wKf+-BfSG&}8LiQe$6u+f zx5nc8t9z?=t~SQ3QG8~9XK&}=Z2IA)Mtr3if2$sU>(Tp-__f;iTNBg!xxL(--Nr<+ zcC|G#dvEpb>gPXb%&-r~|1kOc$w%L5q^=xHU#-2@nmm1vxyu~Poo`HLYS&!FoyNrL zwX07;(cACcvM{aFue4@fZp|iJXI_QQf5L&Ok^iDVIMNHSmEcV#(17>+COlEQJYYZ+(H7{sr^?p91P_WT8N z(SG3szS|qrF1w{{F`47OKu^Ii(o@hpW4>2V#iESg)-S-&dT%rqplN$4ih7J9k5Tv^ ji2a6sn;LJw>p^s=w)+?F)ZgfdztWRXEP7gh6VU$yj&B}V literal 0 HcmV?d00001 diff --git a/src/data/scrapers/restaurants.py b/src/data/scrapers/restaurants.py index 61c7073a..d3307302 100644 --- a/src/data/scrapers/restaurants.py +++ b/src/data/scrapers/restaurants.py @@ -1,40 +1,70 @@ import requests import pprint - -# URL of the Ithaca restaurant directory API -URL = "https://www.visitithaca.com/includes/rest_v2/plugins_listings_listings/find/?json=%7B%22filter%22%3A%7B%22%24and%22%3A%5B%7B%22filter_tags%22%3A%7B%22%24in%22%3A%5B%22site_primary_subcatid_307%22%2C%22site_primary_subcatid_308%22%2C%22site_primary_subcatid_309%22%2C%22site_primary_subcatid_311%22%2C%22site_primary_subcatid_312%22%2C%22site_primary_subcatid_504%22%2C%22site_primary_subcatid_505%22%2C%22site_primary_subcatid_506%22%2C%22site_primary_subcatid_508%22%2C%22site_primary_subcatid_509%22%2C%22site_primary_subcatid_510%22%2C%22site_primary_subcatid_511%22%2C%22site_primary_subcatid_512%22%2C%22site_primary_subcatid_513%22%2C%22site_primary_subcatid_514%22%2C%22site_primary_subcatid_516%22%2C%22site_primary_subcatid_520%22%2C%22site_primary_subcatid_532%22%2C%22site_primary_subcatid_536%22%5D%7D%7D%2C%7B%22regionid%22%3A%7B%22%24in%22%3A%5B8%5D%7D%7D%5D%7D%2C%22options%22%3A%7B%22limit%22%3A100%2C%22skip%22%3A0%2C%22count%22%3Atrue%2C%22castDocs%22%3Afalse%2C%22fields%22%3A%7B%22recid%22%3A1%2C%22title%22%3A1%2C%22primary_category%22%3A1%2C%22address1%22%3A1%2C%22city%22%3A1%2C%22url%22%3A1%2C%22isDTN%22%3A1%2C%22latitude%22%3A1%2C%22longitude%22%3A1%2C%22primary_image_url%22%3A1%2C%22qualityScore%22%3A1%2C%22rankOrder%22%3A1%2C%22weburl%22%3A1%2C%22dtn.rank%22%3A1%2C%22yelp.rating%22%3A1%2C%22yelp.url%22%3A1%2C%22yelp.review_count%22%3A1%2C%22yelp.price%22%3A1%2C%22booking_price_avg%22%3A1%2C%22booking_price_total%22%3A1%2C%22booking_full%22%3A1%7D%2C%22hooks%22%3A%5B%5D%2C%22sort%22%3A%7B%22rankorder%22%3A1%2C%22sortcompany%22%3A1%7D%7D%7D&token=6520d36b63a2aa0c7d91ebb18aa5d2c6" +from playwright.sync_api import sync_playwright def scrape_restaurants(): - # Send a GET request to fetch the API data - response = requests.get(URL) - print(response.status_code) - print(response.text) - restaurants_data = response.json().get("docs").get("docs") - pprint.pprint(restaurants_data, compact=True) - - # Extract data - data = [] - for item in restaurants_data: - name = item.get("title") - category = item.get("primary_category").get("subcatname") - address = item.get("address1") - coordinates = [] - coordinates.append(item.get("latitude")) - coordinates.append(item.get("longitude")) - image_url = item.get("primary_image_url") - web_url = item.get("weburl") - - data.append({ - "Name": name, - "Category": category, - "Address": address, - "Coordinates": coordinates, - "Image URL": image_url, - "Web URL": web_url, - }) - - print(data) - return data + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + page.goto("https://www.visitithaca.com") + page.wait_for_timeout(1000) + + # Get a fresh token from token endpoint + token_response = context.request.get("https://www.visitithaca.com/plugins/core/get_simple_token/") + + token = token_response.text() + + # Build your API request URL with the new token + api_url = ( + "https://www.visitithaca.com/includes/rest_v2/plugins_listings_listings/find/" + "?json=%7B%22filter%22%3A%7B%22%24and%22%3A%5B%7B%22filter_tags%22%3A%7B%22%24in%22%3A%5B" + "%22site_primary_subcatid_307%22%2C%22site_primary_subcatid_308%22%2C%22site_primary_subcatid_309%22%2C" + "%22site_primary_subcatid_311%22%2C%22site_primary_subcatid_312%22%2C%22site_primary_subcatid_504%22%2C" + "%22site_primary_subcatid_505%22%2C%22site_primary_subcatid_506%22%2C%22site_primary_subcatid_508%22%2C" + "%22site_primary_subcatid_509%22%2C%22site_primary_subcatid_510%22%2C%22site_primary_subcatid_511%22%2C" + "%22site_primary_subcatid_512%22%2C%22site_primary_subcatid_513%22%2C%22site_primary_subcatid_514%22%2C" + "%22site_primary_subcatid_516%22%2C%22site_primary_subcatid_520%22%2C%22site_primary_subcatid_532%22%2C" + "%22site_primary_subcatid_536%22%5D%7D%7D%2C%7B%22regionid%22%3A%7B%22%24in%22%3A%5B8%5D%7D%7D%5D%7D%2C" + "%22options%22%3A%7B%22limit%22%3A100%2C%22skip%22%3A0%2C%22count%22%3Atrue%2C%22castDocs%22%3Afalse%2C" + "%22fields%22%3A%7B%22recid%22%3A1%2C%22title%22%3A1%2C%22primary_category%22%3A1%2C%22address1%22%3A1%2C" + "%22city%22%3A1%2C%22url%22%3A1%2C%22isDTN%22%3A1%2C%22latitude%22%3A1%2C%22longitude%22%3A1%2C" + "%22primary_image_url%22%3A1%2C%22qualityScore%22%3A1%2C%22rankOrder%22%3A1%2C%22weburl%22%3A1%2C" + "%22dtn.rank%22%3A1%2C%22yelp.rating%22%3A1%2C%22yelp.url%22%3A1%2C%22yelp.review_count%22%3A1%2C" + "%22yelp.price%22%3A1%2C%22booking_price_avg%22%3A1%2C%22booking_price_total%22%3A1%2C%22booking_full%22%3A1%7D%2C" + "%22hooks%22%3A%5B%5D%2C%22sort%22%3A%7B%22rankorder%22%3A1%2C%22sortcompany%22%3A1%7D%7D%7D" + f"&token={token}" + ) + + # Make the API request + api_response = context.request.get(api_url) + + # Parse JSON data + json_body = api_response.json() + + # Extract the restaurant data + restaurants_data = json_body.get("docs", {}).get("docs", []) + + data = [] + for item in restaurants_data: + name = item.get("title") + category = item.get("primary_category", {}).get("subcatname") + address = item.get("address1") + coordinates = [item.get("latitude"), item.get("longitude")] + image_url = item.get("primary_image_url") + web_url = item.get("weburl") + + data.append({ + "Name": name, + "Category": category, + "Address": address, + "Coordinates": coordinates, + "Image URL": image_url, + "Web URL": web_url, + }) + + browser.close() + return data if __name__ == "__main__": scrape_restaurants() \ No newline at end of file diff --git a/src/data/scripts/populate_db.py b/src/data/scripts/populate_db.py index 59a4f7d2..fd5e8717 100644 --- a/src/data/scripts/populate_db.py +++ b/src/data/scripts/populate_db.py @@ -24,7 +24,7 @@ def populate_db(): # Insert restaurants restaurants = scrape_restaurants() for restaurant in restaurants: - insert_restaurant(restaurant['Name'], restaurant['Category'], restaurant['Address'], restaurant['Coordinates'][0], printer['Coordinates'][1], restaurant['Image URL'], restaurant['Web URL']) + insert_restaurant(restaurant['Name'], restaurant['Category'], restaurant['Address'], restaurant['Coordinates'][0], restaurant['Coordinates'][1], restaurant['Image URL'], restaurant['Web URL']) if __name__ == "__main__": populate_db() \ No newline at end of file diff --git a/src/data/transit.db b/src/data/transit.db index 48ec910f63d194baf807aa610649b87cf79ca2e9..d9d091ea08425909688869e175a248005f0dd782 100644 GIT binary patch literal 49152 zcmeHw3v?UTc_tu8qy)bFlq}1PMwYFIr6K3ed(@*N0Rw=dDN+(iNl}~_x-)Z!garZ= z0E(iW*d?_|wr#q}Hs|DVvU?IYP13sUde4cQ?lwuQI?ZM`@%FUICQX~#o6T|dtW!6M z)8=Kj&3<zj@ASvc0(*4j~x|+ zSS+@Pf4%tE_(|Zw#>N5u##=+ZCs4g4t9-Sd-J|I-7X?f<2|i~Vot`AOfQ z>%Apghb^z@0*34F$Q zc4GQq+dE2hjtSG**G~(lCq_@5&bGhm(i)p7SC)b^+Fzochx@3m^GcD{3bh4~3ApFu z?QbfUOEcGdtuRMtSbm{WT<0SfnbUlwdHegd?yha4yW_D!$zzwR7mIMC@^qnA4vzCJ zDCJ4Z!RBRuWB0bf!T6P(0pjzOLaD|o)yCgVZCE#+{h5I-mH}p=T)IBk9;>>Q!hG|j zHPHQIQC-DncvIIld+>VrG$_lH#-py2iLPx24#dv|xYPlyvWp9>|9^nH0}Q+hHQCU$ExYr2P&diS zljcM2%Wvi~rH}u*f&YqrA_O7?A_O7?A_O7?A_O7?A_O7?A_O7?A_O7?UaJTsIbo6i z|5{z%h^`2M2!RNJ2!RNJ2!RNJ2!RNJ2!RNJ2!RNJ00Ex=j}Lq&hX13V2!RNJ2!RNJ z2!RNJ2!RNJ2!RNJ2!RNJ2!RNJ*9HOu@x<Y>B5{8-FR_2BawONX5EDaN)Dk8~C zrtTT0?24MC=^|BhDoU2he3ddsk`2}L*w6#>GmUF5E-nsXxGKNh;8H8M_9bISwx_zg zSLor9!rUC&CD>)(XG{=RDF~m@q;|Rv3Yqd^sfK@`Fg^e2uO7ZSB@}AL*HewF{cIHG zPg^O?l=VExWHMs-WrmaP284&W?C4F#ltBVqmPT_Jjq8@b@$>Jm3MUH> zK8RJOjlY^~2xl=qcNHsOA>RM=*Pa6wZ$9$;Be!=33qQDpsyL79QxT&~F&s5dvRclR zjFf0+^t5PeMn=?AR@zk3dP_7eIm-amb zK)%`YH&dgX0ptxdpS29rkTRAC2qe+YnyN@F&Cpdlmj!R=Ig5x4=je`!1$VG*NS+@X zCIwqavL{L$2vgN513ze%VVHy(L-VEPc40L-Utj^v!GPbP+1gxjr8k+3k!?pn{n?RB zh0!@-no)t4JmF;d0!vj3kO7J$WLrQ05KP#OXTcsb#UFh5JcPlQpZetR*3O`jnLe3U zvuVwiY)#DSCJ{+m%Ze#oSG24`j7(b1=D?ULb2P`(RVoqLS4f^8yAG2?-;jI}2r*Gn z2-6MU(FibM7Hfrra|0$Y)43X>^B^_cDz|J&#zwZs0LMe)cDYz&GmHxbA&uF}#VX`T zLrjEX1PF93?_M6bIyLxPcgp9!(-{Qp>9p9m4tU{MUbRecyJgCj4n~($J0;3F*%Eaf zBE>RnO_6Pq)38JMITEoX-Lzaq3*O_X9(IUqnW9W|CYq2Yx}=z{NPpnHULWufHG%0v?; zh6I}~EzAT0hE6s73``1v!$R-bOtkU4gS~i zhG7uba-}v5EYI*H$|UM5rsp>Jfot`EAwZuOkQeTd;jUy%+YWJgrF$k_uF+Bf^oF}( zw!Bbf;X{aL!YQ9FltaRgW)JnMfB5Ua1-ica24)%*` zxt?M&PbV%_RdX#qK6RN>E?)@Haq57$f%XmX7+%@FDH%IJxPmE+r3>x_=-kqbFbxx_ zI?gV0D+~NGO)(}M${40lJI8<_#wXu@&t?Gkw|8Io{8VRv(`UJtPNenCpIOt&7_y-#mXTI0F*pbO(7=HoQ%xalIH;ayK>k{erYfGt zrOL`3iDYd5*Z~mwrqNWD&CN5RE;`R}9fP6nv^0!|_&q^u_yU8XkcE7!EOCZ7D~v#- zmFgA<+4;s#y&n*~M7%v6GReX_%9|twD-4Rol2cjHNat)(PD?VZ5P}^8(^NB|Kd9!* zs!UCZYO;#=P+eA_aeW0ES2lfB^gRHe=!VK9VyLRs5UUN5LIe6+CF;ul4awN>7*{Y0 zcB%wjJIiRLRuwYfg~}@+q(anW+kF&$V*aD|0it*R^&Nja*BLJ1cU38h$AbG3{c_pM z%OsoAWjQ0}(ivHlWi2Iw+?-{lv2SeKR1FC_7lymo{=*B~KR(BPa56 zeBEb_y%QW8zTUQXOrKf?gK;9c9$06MzXzqs;0E^`z^~*T__YPTYbJOpANGH=|KIjM(qHZ$>(~1S`hMK^t-e3$ z`*h#qeedaeOCRf->@)lJ^=Ts4 zv!2iQe4^)LJ&Qd~&ygOnX9q4C{X_^v2t)`(2t)`(2t)`(2t)|{Qy_3_VmN*lK8bLW z&Cjz6!cPce9J(>FH-4sYi8iAPR`?9vTSdsGd;wo1X5N_?jh`;LMJC)^W_W(2QYaP? zkeHe;a|bLz;H8#?@$y`mKdBEV-1zitd2YUn?_}fg;&hQQ9^4Rk^gIk9 zpM}qYYhPlOlGdNth4Xn53E_WFy*Y89e#kRRft#oFCI*A&CUI+WUt(u)aJ*bwsFH07 zB{<5#0jLVW=i;n6sqry3D;8-*@b;dl3~x^uIF}Z1o7F;5m~4I{;1H*N8u2+MlzoAp zA_jLw-7@gVjKBUM2V-gx(L-UJ6${lW2Wr%1f*nN0r)JCZ2kuB5j!zZlSh@H+-rv z13>Gsfj#_=C)p)d#CHguqZO}u_W;_TCz;3JS48OPE>7(vt_kHD%6AF3#AKH0o2s6?sj<&@2TFw*LA#5 zynv``$=ks%HIA^)>N{`e_chKc__n`*(ZGszFt&4o#%X0in8erPm2TrakHZS6b!Qua zqiuTq`Xj>!+WuS-eLiP?&>WZ#(OEgopA)q!Advt6$)dw!c^HU(Be(}qwCM4u&s{q zsOXKiC#?7}x>yjhwb?>>KIDTS)O7tywM`6tx#Sv^}5Bz|pnC%G#z5`Wn&#}>XA>jwNca~xV?-`#w{8{yd2cyJfrqHDsQ zn{G(l9d8ZyZRC8=9u`aR<>f(_wouT98x#AtvmaR|6dzgO~fEt7UloBP7bR*qWpi9|Bv$j+z^WL|55(G6lN`>{C|}HZzMCL{QqjL zW|aT0*Q`eQ|0w?-<^NHw_-f?;mk=6iDc}wL|6{TKf8ST@EhIm?<@=levitA5{-JBn zrVSf!OxPRL__3eLAO075@^ax0RQ3&Os4LlWV!V89=&lj$L~S2JT?@6@_R1{fnW?A$ z7&RtO{+qWJXlH6n#*gJkr;m->c?6tO2*IL5z)>B^Z0`^trl+Z{>^O|6?ltv`2s&Os zj1wV9)Xwk-WKcuXyCWIfNw`6K-*__`d7b)A#Db|;UaULB4}APfvCp8Q^~&$5_W$S% znHk(#1)TvoJ1wJ#x3z*sLW#GgY8joR4IA~nsAqK~mozJA__1RuZ51>M(OnPqS(fW4 zs+s5Lm7rX%ENN3wu!kj<`7>Sd9#fz3WHGS*-V8KHK#X zy)!^qR-+9>C)8^r%CH>MXDH%io{y5cdRb7tMs==0TLi*#X*;TwcT9nri&N`Hl1EX$ z%nREkv%S7>H44;Hj(h5@;-7vVbbk8$u_J1S=+wh*7niwKJDIwJ(mKN-D3UT%vX;y+ zD(+C8L1`(UTiJD6GPa8^-<93=YJfagI+VU&+tiVAKJ-6PP%4zqCl7q7GbHiLia}gn z3}fG-7!=8gC=heY3ni2?;+aL}Yi(1tE){-i8W8-sZ7f9la)N&`80ccZ3Sa3^mgg zJQ}$BUH@;Kg6f4}-WodL_T>I~p9M{X~nMFlEzs7jcsOWrk`7es~W4v@7F()H%8 z$=IpgT$1e=edWAIn5P=%fqOy}lnJ&K*G@n4@JD_GQlHrQ=dWJOV^LR7t3{a4@SMWl7Y;(lI!8Iu3TTaz(yHaP$R+LVdW}%)W^l zgd*=-S{}a{Ef~BbW%=TXmULXGt{~T^+O_uZ^Jl*NCy;o*{JYO~zqvEi!tD)O7F|!$ z+nLytnWPz}rs#_5G|PD#0fPsSJe&z~hb=nm%C4JwVtWT4;`Ce@3?*nipV(MdS(b&V zp_aPdgTm1uvwEytW5tHTYGybaPe6ioec-MqZoyK&kQ_RDpfki{c?05oOGX1)V6mXb zhWBgArlPr$t`grut4TwHHHe29Hp_3fh4dpXotvkHnbKiPL&Jdy+|)fTEH-R|!>Wu% z1XDsQoT==&z?K%v74I-UZi77N~C`vGapKJ~!j(s*GyJ)0RzO^#Mm zse481%%#iDCG!oI^!kgAbL^Z|n6FLxw2+(Ati{X)>!dN^yOk3c&la4qu?ewOGOgLE z`4ml+E0?mVPys2@HSQnHQc;euEkFJxewm-NFL}ve&SjC z8=XOabbKVQOS+P`GL~v+N~%qlYUy+aHlT^^Xr>I&@^B83n|)BhvAag?5ZA#<8omqj z%8?tZRu@Yltspf>We0L;&kf0#HO75~`?Bk&wO^rVpKG7ciCArp0;|ej*}Cg7fc00^ zTOXb53|P4ecqwlgIbG3pyA3HksVGJcO$ug4Hbo1y=!%Sb{d&h(aO|NKvz5PHL-I^t zC#t4U=NhYH7AbzQg}GPG4}fRJ&?5%vp9$G_piTGe`<7n9Zn}6w&Z98h9noH&vgxj7W-#23`rz zD8493u7eIWbb&)+O3;~TJ0iZ$Jf+pFYu*7^uslB;-q$eq{|93KIoAJ~zOVH@)$@4r zqg%eX`HAkYcYSfwQyZU4{O?3!1AqCS-_Od_fn;p-0F?X6TgoqAb<|#iRFx$uXtZqh zC9kXD-OmjE(1Fg|`pzFe{jtvMrQj~bdTo@5yK;G9R9>@kFvN}a&1I0Zl(kcaYD+mA zFIm22D6-PnT*0yGx1026&{KUwq13e$qv_>wjw+l~ondGcE?#7K=r9sCsdM zRv4xa)Y$Oy#e0&m!-J=wN#8PZy+W;qw3cPHt#xQ!9LK+Q`tGN=rSzufuI}y(j+O%B zf$%18-jZoJhTDz|DjmQ<=}O-Hv$VWWo5!|ihlhgV|zFvMUR1nF`$zfL_} zX!sfJonYv;<+WWXi3?TOwf(kGIqzF~=DA@kum0H=?|q^}%L`=^jnr4 zDArJShY}7zFj*TS%6@bf8$Wp4Up&aWvEP&1x8dQ=kV;ha=kq1@DJBoJYf4v_DMC?} zYj{r660F-4A@f#g5k-387>dBINbKQ^jLo|uo}PkHxLOJsMl;Y6+B%M8-TiuW`pP%I z1S%hVaQmab(itkN!L773n>!-mUY;g3jfLPC27em_6m7M-ffxk>ciuI9km0A+139a^ zd3dGg?qqE5P6H_9N7|}Cd4(o=Rauot76b@s_w&Har=}i%>@8sCPgKvn`@5Z?y?z7v zw3aqfW-2Xavz9Do&7302T28|-hE&EfQ?`*&L`-BbD7wh0CRx7h=J~OQo@UW84MmhF zh7g#pPYo(j*V7Qml$$PaW9wJKYS$%R>>BL+3U3E6Sb3kW*|Q0JDz7Y`=0BZjuO=Bd=aI(tt-4^H$s#a z!3v)pTeo^QsNuyzqx{t_mtZjkJ~F}zVKgr=CHek7YH$6$x1addi&rQATzl92-3L3g z=2Pjsj2>}0W%IyIig4ph7gZ^TovurUmQp0cCSU-ZqoJ2xp=cwQg7?6JRz>Q$7{=nm zv@<;ekt`;=%+MsNwyk$1tXr-IWy`^0S5aa;ynO0T#51?U(OOZwUl8uBscIB*2FCCx&QKgyovdHkEP!Ji=CnCWC~Q}RNc@se7FjR5phf1 z5T%TX|8zB-!+ZqEMgW$dly^T%Nj+xA0tH+<`B%#P+Ki^ zF4HOHW#%C!rKMnX6-kj-ZWqA%JGmEq;qq*vr54-0fHI``c%a zzU{03y+e@HOW;Lb0uLbv6!@qJo222u7?v%XlA@_*E=`maWO>?#ki*m&1*21B)6=O8 zA%|nQ%@A^`Eb1^}`OpZA9-+*@C>B}u%~ovqPz|(b1a}S%^PdSKuY56$x=3 zEd!R7QyE3gr4&V^mgVUPd{K(Tg{uPGN@e_c; zTMjUECg?Z_ML&BTH~P3Oe{pP`_Jew~FR%%k5^-Id;K0p~#lDNZ^!*Rr^V=O7p@FPh zX!xiu$ZVAmT)C;LD@iJbimX)!CAJ8o7VR2g4@H=cJT%x7)5ex9PUz|N`YC?uP{BT= z6frPBsL!r~771t2RBP4!Lbk-4aM0JnZN#P%IkYx*2z%mHm_@?}It4y~LP>k*(D4ln zWzj7{ECf*3vd3Jb&1BU$4wxkH|Ko3r4cydE`X+iOdq$G}7H{+WxWTQ!gMfLpO&p*cA9DD(iUV=PP1Wsr$tO)Q49l~G6t%-ikIid zCN-Fd!FisDaW(M55p*ye2P0b?MTVKOR$TMurum?;4#Jv-nWvRIEg0#9yDD^~t;z1% zp*}p4*E*qOT@f{Q>#uje01@@X8wQRJbVhi$Ot!(mBSlSzVyZr!1E#yG>vATY(XBK_ zm%t$>L{=LCn&8+%oTXK?!M=4cqme+axf)%I(iWzCNEX*;r?Y1YEq#aA z zwzjo&-B8ZjNDg$I-B_bV87wI(x9eT?5EREE{-QBL>D z9xWMjcW|5j?Af297q(u00_&)4uHw6WfyvJ=mX1n0n=!)YcnSnwX%QZ3PCs8KG@qZ6YOBcN1PFY7Sha3^ATCnCidNyjkY&xXSSgJbwQZNd)&&>TV#&VdMY z+c+N?Nfai4q!u54c~rsNw=hM0BqX}o>Kybtt1>*P&^65i=^uy;n zLo{rxFcj%wY1_zwOYZxa6e_EzGHZq+5w!@Y!M!#|P7Nmy8}5s#%bX#+O6dk zk_J!BMz<`l5#v*Wvp2o|zupE)vVZ+M4?fx%N+wAj`2-Bh&1A)#hICR+N@I)W4BJ2` zMUis^xnn~_4TXx(s^MX=bi>Q@W6NLCm8hWe|u>ou6w-|S=#;GC9nAYU9Zs(K~#*O(BQ?Qnb;+`41GUw-H|}$Tem5AM z;FW{na>L8#hxn*hL?)LP&aFE|A>?@i*#()`CcTb1<@5gw2)y*I&;7!C8&~<+=4L}7 z*HcIygPqkTzdVI%#Bd$YbTL$~byY(tjPzK+p%?+N#$jB(UxZN_MioY3p9)2#L(K`0IhaXo!>K}Z0?ya4&JsLS9iUQv@LM_jeJ(HO(Rg86(kW*0hIkojL z+wz5jP`5!839^bdrdwR=mmmfC>c>*~4diuVsX05F3F&H#)C;Yy2RNohuvQ)x-njQ=bA;ea<9AW^R`6d=Z@ zPIgm`1ofnYxb^ac8(jRhl;N?c6KN{U1S2csm` z9~_DW3eQMLT3nT9@+TF3o5bjJCLs}M7b^NnNlh$#m%r+k?G;>|zuv~ksf2Wyb|i_% z%uz9uR`lah!?IDg@s1m0xw@@eZ6i7^xdEDVBqOEbQqf^~0ZK(*o}Ut@rACb0a$K_U zbuO3f#4258QXW%9RhG2;o3a8q`H%kuaEpnADruVTk0Lz(_fWgbxVnIAF{txqjk@!1 zS!7ft%_;}}YHL<7!Y#rj!Yb#p&P$y2ju#y_I%1Ax_Q&m$_Va6gR&!g;VVq*UECehB zECehBECehBECehBECfEL2=vx+j3>zR2c$F!Br>^YC%;4=)!LieNmQAWR1!`!l3;pL zjEjw=e}uT(Iy}wYfzCY>9rd3+6{$ac*FS#!&wNWbW@8`EN77MZ6e}?lojA^BLPuM6Ys24a`@8j{@YGs_%DC>?Td%~ zP7IInd{|5+^x>UtJ4h@g&*HIDPR}nY;^eSX9dGstLT7g6KAFlcj1>@ge|G~E4VLQi+4)A>6yh@)!#rdS7l6_)IB6}4L zs2VbfH<6I(L#PKf`juo-g3XJV+401dPkw#PsferY;OncT-WrbCUVH@#x2NT!B|ZE8 zf8M_rl9ruU@%^&h)v&b(x{9g1tu3uPh<;qcVboZ-tK-)&_VT~}!=uyQDvsFzr}v3T zC7sqWZ*jMf9#Kt5sl}}6ge{dfLBvBhKKA;R-IdS*J`c&MxROj|Wq8^@-0e5Nn)$Ey zA>pl)@BU)i%V(9?r>F@rE1?HUoaYGOeYR_*72y&cz+z>7a?*&>U+Kq5R!1ym6t=%W}+mgp)rhsyPX5 zEhG|`^>zd)5e8Jm^);`){0u}qK77Y#ANH4V%$EHSA*wTa>ChEvb+;0~I6aeD%$SZ} zw>@{u7=-AFbI8Uqu?Ww5$5lBlrbIX{lRytcM=LC8lBmWfb6B>zn@JQ+oJnzddeQpS z^%u)SH;zL5%gjGLP*;r#UhDi6Bm6{oRJdQbO_&fygrLwNoG;V~Hs>?Whn@F1Z+G5+ zQ>>STfQ5jCfQ5jCfQ5jCfQ5jCfQ5jCfQ5jCz#oOcO4eg5)nTh$!fvz~4KmunS>j+_ zwtV+Xphx*;mXn@r?n*hD*iqX#x0&qA+3kgA(+k#6q2~5awZLw`2l;kR6+OY+o2jHH zm|877RWjc<;dZjeY=3MUq@tGHY%Azxlsnn=wuQz7dmg*RR@h`H+swwQ&ZT8Q*Z<3e zpE1H)_{Vx#2v`VM2v`VM2v`VM2v`VM2v`VM2v`VM2v`XGe?`Dv#@5@6T^3h3Y-mj6 z_W{(>=YNl_gAq;(*9y(fx3Shg?W}dY;`p*7>{w%e)&3>>LHn|r=W1@Q*eW?0sybffuY9NSiOSK+75vlu34SN{26qd05m#Svx*}b1LHU{TZvO{GnY|q)gfJ=QWUeU{V#&MjdBoi>18bJ&ZQ;lR3C`&W*q=_^%gurIe;^GY0 zOCV@q^GiIPQ6-(d0WJs`=nsuFG=MQ8{`|kCKY)#KFL~r^x9^?fm`*Rx56el=P7?XE zvs4*hQUsZ06B(xB2(How9?#ySqNoexl`I$P!dZ}4z#ht;NjqAFW1EOi)TC*uND~Q( zV;B$0Q1V(Zpq5ZlRUiZI5GZ5AqN;+iR7ybp+K>=dlxfn|(grbCjNO0FF_;4yPS?A= z8pK?jrP1X$W2NAN1i!4tGjx*?ctLw4Z3>d4t0SZr>ZN>gqTMh#tg@_IYW5H>%w{g?hpzyR({Y=-iUs1?ST7Eq1GDRLMenGFgK!w2H@9&O@cShV zTu8%>dBk#lH@J(hpIPf~+IIj3jY5o!y>jy(Vo)oxMNb8Y^NC3S1K^Wo zam-dKD?T|Vs_COS(o`wb4N3iASCIvHI&OWwde;%ab6)BzpC8;20y}OOE_^(*SV2 z{qrweg8wR|&#c$*72=91kz+L?eHH4EE|b zF6agNC`VygGte)T1g)Y65aF5BgalPq(VR8%CJ?}2N;{oYu(wb*tZ=ZYenC#QhIqiG zG-#dcp8A)=-ailcf$|8%80^(-6k)_(8Jt3|n$8yld`45osECiWJU)CUv_Tv-{#Dq^ zG0TrZ%%I<|OE@bfd{TN&Lelh!a*`(3CgP*s@0YOq5Ik8>?qH$#sWmSSLFVVb`@P3% z1KmjCA)X&hib(hzHP8rhH|V)W7*Wa|Ef@w+`J%5=@%nZ{j0iE4sQU6u7OK)Znxk9? ztkvBCz}~o{{$IZat)ACbzBlM=N9KY3XkHU32Hq!4VjHGjb&Yas6iz)=dOT1GzB4LQ zN$;60)Rl45;tAg8SRbW83Sa#4Ltnkr%`ro$bA56WEMHKEX-OgtUhk$sqGeG*Jg1WL z4W$jv`xCw$%PKA9sukQ}){l+C!7G*=?r;JntGoj-ULnqArbz-QWe zw*e-^RiB6?{E(Yd6QOpF>-P%eUwn#QcT&BVJPJt<{&wU$Pwi~tn5IrDNu4KqU`*~_ zQv`bM8qgzp_^t#^lNz&$&AQ|Y{n5|<%LryYkjKokPW zye?I!ggHa-zAvN6al?3+b!%$dw;zEtoj=^U9{G#%782)-Q;gWAVOWSr2ME|(pG9TV zsn~;rV@5o5Xj)Dw7(FYeCX70B)(zB>fKL&@q@4ITL;JvL9QgFsoRu{CY=SDSuvoNjdhP3 zvNnA4<~dNdT^C*Tx$9pZSj#aH!~x|ulU^yM%2Sw|fIW;VWsrAsnipOIQ;=L>bzOHW z@(&)+xw##y5zxEApEl^%g~Rfy&>GO`B^RGsjgdO@c;_lGkKwys5a?x{UZ0-|VAR6Y z%_vA6I&T_==-KaEiGT*zzZdf&6v6Z)b{(7z48jMXnSjl4zg_+tj&mD#)NxEbbngMh zKOs&ihLepsM-TRB=RDGdo9VzSP+gw__pQLikq@YyDtxDCi(I^RgV%fl~r;%+n_@gD7Q)0YleG@VB9fi<$iCwRd}#Vfq6Q_sB^4;@KHsTuA7?r{6mH z9t`+a`%&RbzNOeZf??14cF+aNa1=Whx5+xSE19@A9>boQ|7UqDZD>VrZME!ZsbkceI47~f@p)*&xMzAel0QY>KNcSWF zapDgRL;`!r-hnXj5A;W(;g~-f9OyqQSIxIeqkQKcy0Y7ReAq5P)6Sce>8wmqu=vj8!rrMlscT9}WszN|6dn{-3q%kBjwLv$Q;>~nNE8XPO^@7T}SpSM3|zu$hB{U-a>_9^=%_Wkw?>|5N_M%8 z_L{xau!070xBiK_*SnjVV{X^B(^Cq1ubxfpknN+JsBWr6zW>=w<(m7O+m^9S_@J;& z*_1tN;inEZ(htl%#0}I`QyXx713QA_bLs67YDq!U&PAo0+imMMvk@@oKeA`GmfDtY zg{`KX_rmtpDr#ME&uXO|i{C~Qs*avf+*n#cRV-)^EvKQD>-sFCn-og5a+ZFA4dRRJ z4^))y&(s#u^o13y8#Mjmwn`;cs< { + // Open the database + const db = new sqlite3.Database(dbPath, (err) => { + if (err) { + console.error(err.message); + return reject(err); + } + console.log("Connected to the SQLite database."); + }); + + // Fetch printers + db.all("SELECT * FROM restaurants", (err, rows) => { + if (err) { + console.error(err.message); + return reject(err); + } + db.close((err) => { + if (err) console.error(err.message); + console.log("Closed the database connection."); + }); + + resolve(rows); + }); + }); +} + +export default { fetchAllLibraries, fetchAllPrinters, fetchAllRestaurants }; From b789388ffc3044f6218ed7597a82e0c33bd57fab Mon Sep 17 00:00:00 2001 From: Lauren Ah-Hot Date: Wed, 26 Mar 2025 17:30:40 -0400 Subject: [PATCH 3/6] temp commit on master --- src/data/.DS_Store | Bin 0 -> 6148 bytes .../db/__pycache__/database.cpython-312.pyc | Bin 0 -> 2471 bytes .../db/__pycache__/models.cpython-312.pyc | Bin 0 -> 1912 bytes .../__pycache__/libraries.cpython-312.pyc | Bin 0 -> 1680 bytes .../__pycache__/printers.cpython-312.pyc | Bin 0 -> 1734 bytes .../__pycache__/restaurants.cpython-312.pyc | Bin 0 -> 3925 bytes src/data/scrapers/restaurants.py | 70 ++++++++++++++++++ 7 files changed, 70 insertions(+) create mode 100644 src/data/.DS_Store create mode 100644 src/data/db/__pycache__/database.cpython-312.pyc create mode 100644 src/data/db/__pycache__/models.cpython-312.pyc create mode 100644 src/data/scrapers/__pycache__/libraries.cpython-312.pyc create mode 100644 src/data/scrapers/__pycache__/printers.cpython-312.pyc create mode 100644 src/data/scrapers/__pycache__/restaurants.cpython-312.pyc create mode 100644 src/data/scrapers/restaurants.py diff --git a/src/data/.DS_Store b/src/data/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..8230c96cdfcc0bdb1765b56334ab786240d9e22e GIT binary patch literal 6148 zcmeH~L2uJA6vv;tLfuVB6#{7&q)1$=QpNyHT(U9_To(ifKqXlU1cF_XbUjp+a)uAX zl~2NV;ROF@ds3T%5Eq0hKgs?-+wZ0IueBW$kr>U=+eAGgk|?b0T~xm?9_O-QE!VRL z75t1O*9m-n*s&Y);t6;H|1SYvyDds7d7SIB#`+!nFlN5IiAm<=)Wv*Frg>IY!{Im4 z+G$_9+>sr*CtpTy^*pNLYF3Wo;yJgTXq_h)ew;kbCi8Lc+C#0XI8&3tBxLCXAupa~ zD%JCmo~g7nxse@^UD+M?_7{u8qx=2f;O=nQ4;DuU!+vmh=iYMJmDg|HdT=^^pMOyL zlSM7C9~ryi^g=$vF>^VO3YBa125pYLN{Td9l+kNCr3rnc8R`-}B@O5uYE7!a=nL{z z8A(K)Q9&_Hky}u?9xrbEDSk`%Zg6O@?l?{vIDv1268}Q0R2$J-!l*H;2BBSW;s22W zsSN&LbA^iU4NhqOrMWdCc@OaB33vjYz$yXW9|9EC&}nVeY#pe~6#&{sw=wwq`p-Fm z)6i*c6(cZVQ=v9hxGRRR>F5t!ThF2^DhDhgReY+e@fsd7{3V1 literal 0 HcmV?d00001 diff --git a/src/data/db/__pycache__/database.cpython-312.pyc b/src/data/db/__pycache__/database.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bdff2262bdb64cd5f7d141cd9ab5b96f125ef71a GIT binary patch literal 2471 zcmds3&2Jk;6rZ)%-d#IMl{O&01S17CHF9l1Q@Kcpkdn}*NgYVi3S5x2-kCUS*1KkA z4Ry3giBuFt9Fkj738^`7NXb9IkvkHK6bVIB2_eCSTa<9>iJA4TZ6YV3S4N&UZ{B>o znfHG4TmRhKn*hB0`rdElLL7iUL?&Bx$KIDGnP z_725_W-Yx=aCXcw>Q!vJH2c0oY;0NC(OPW`f0+$#lr`N2-Ow)w-ODB2_#E3PONo(1 zx~peV$!|+K#hF@z^_Q_*M5SW;R7Ai-<(2kxu>}51o?jbT9r-S|mmJ)c2ZC zk$yjD-*criXgjB(g?stq zU0~K8Z$whj#lZ=i2i1i-w9FFGNdua;>p*t_LlGrEI*TL$gEEny9iN$l(=%}5Mt*u` z98Totrh}y>rtp&G7`jMl8tMoUOlcZg+~C#`=9*)deKpnE@b2i;o$*_l4q*zukBz#ExxuMcO_#+C%b_?GXKLt^DS=OJ3VbrH<&()GwWOEFmGb8f znl~aMM6A#BqfiWDK%VEu9x?GI7b5eo>?L1a8r$#fU%R(@@7vFoKKNZutmK#T2Ov72 z9>hWN!upNPkGC4T+M9nX(b$!P1c+%XlgpD2Ztu(L%B|&F-%S1A08=9zDwuQwOud!g z?tlrF{tGV%Q-hebCV0a9JsW0-$XO~*#Cv7Qjt$8LZXp9gJSW7dAP5deUgR@HeHIW* zZoRPm_O5nSfFO4=2s+=bU>Tn^e&9-1)=)gE^>o(oWH08$=amlbP|n~ErE)fR zTx@X%f9Nh}W!-kWp<&|;G_+T9K-hYfGX*CRTy{v~bQ(!Rv#OVIu}-Ws{0f(ZavHT9 z=a?p5G$sqDObwlq^E?x6$S|$tg(-oT4j73z1?&i6lTP{rI%!{wyvAo;B*;DnMs)OnlewPgK0r9VZ-sR#T zDoN54(DwvroD6&V)^e*k@vYo^>rw7u?$OA@k?m`HFAwh}uP&+kiJrBg)uFZYYI^QtB1Z${Y~EiMQY;*|g3^YF*H3ygPB%UGJLN zG1OKnIZ~@&aWJ=138_7T@~3bk5(--h5FZC_7U5KJU}pW%xUvfp6(jAueeeC=ypMVF zzDcK(fUO_j{h^wP0q_fFMvL*`I{A)~2fzZB;DGXg3S|KcHF1bIMQYM1X81b;nV5o% z9SQ^Kh?e6stY4X89k(J@^e{Tmlx0i22g(VYv|_i<13=^mz=^}5nE+ZsOUT53=e*XA zWV7ugCCK${nzbtC5ET-B9{*1MV%q~ihY$TUjF*QS3vCDh?yN9)Ujfi0xLP zKn(pObL7)=Fw#SD~I?-B4liGSs$os9rB_>)X(=D+Cdnop*i= z=T^1_i<+*!r~hYlm8<{3;QOq8~iR_Vw&i6 z*7*M>Q&wJzn(Qsn|DLRi>Uj2DvVOx=kF=&_`4X;qvIXoqs$!!_oGH#e!IiOQT%kd8 zXc;Mx{Er;=7~Tvd({o*H(m*P#8dvlB`$61n65k_%gzsRpNih?6^}0=inCW;v&O`!{ z+YmlgeLij=d|Fm^eAY;c!-|ZRf@e1CnAKWY^N5QbN69xD1^lrxoMwtmZy^(17_t{C z$h?hROYw=RScoFUswj2O!j7M9w1Tu5zHejjya^u&GIzbpwhs6;b*8t{UHLM1lzOdG zI8HD0Zgy{e_F?DpPh#>w+t>O)UX=d?axBqb1hLdiQHrBd40R5~5(n%1>-RQ~Mfu>$ z{+0V1C(IKFWW%T<+ck_#itvdVL_9wb8;ITtB+DkeEdmKu{7}NcyS9T3Bam0~+p1A0 zmIATn*)EAQ(=Z8sAU86MuK}6o(mA$0mEUHmHy!*AIm=i+*!>UK){hE;@C3|10f~Mb z%+2?5-5gK+i#HzS9_AjcJY4zet)o{j9;GgIhV~TkbA@e&_Mx(8T``;ebr78db literal 0 HcmV?d00001 diff --git a/src/data/scrapers/__pycache__/libraries.cpython-312.pyc b/src/data/scrapers/__pycache__/libraries.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3781a20b60e24a7dc72decc6d6f662d9503f6137 GIT binary patch literal 1680 zcmbVMO-vg{6rTO@4_+IC6JVRvDYOLY6xbF;X-Y#QimQT(qS7>~iV~sq&SKVCdzYDU z`O#W(r5;R+1W2s_DH0r0ap|$g9x7E;FSwR!Ym!Ql+6#wDw5W&X)LG-jIdIBIGw;p& z`R1FaH}gv*5&~>|9{e@^mjJ*Y?#3H@Lpgq#DO&&n$eCcqmAuJi`5B%AP?!;*_yw2| zm$`)R&`r3N;ArGsUCk3?E^pqp^7hJy^Mu%VL{SzO7l$+p<#f{=(qUdvQ&yfRS=Gi0 zG?1nA2d$6A9~T?E+5sDop~2yX$xdCrV? z0nflB5xvcRBd!35i%=g9$OJvTDIvGtKq_z!mywztPlG&UWiXm?yu+Uv=krd@M7-Rl zgnrLk5YnI^IzkfM`w4)8&+(lZA2^NolcFT+`-hFyx(jf>{VKbm@7}3qh6{ey+?Gr{zp;5szCEtcNr{4K)59t8^hmIz0Z) zDgRA=Eh>V7R0ug@rpxm;r4w+1js#mD2p%d2)_xX&Bi?4X9GivEB1lAUtw{4EYYy2e z!aAZpqNYrp@&=@>r(OMV-R2`H3ocWhAU5Hl+qG`ff)ybY3LUx)d05}_1KkOM2B$1d zb$g!%#vnvG##9=&ECh|5%3eq!OoK>Y$m^J3Dx`Hng*zWjAs3X2L|-CQoHKHe2Hka5 zHBBm*dXD-qL59tQZ5o97=1fZ^)UVpMo`WZl*tT+5ry^!=7y0UN$Xdiygep;~z;r5V zmWgS^Y!IBysac%{)CS9#)?_*B9+yi~Zi!0Im~oilc0V(oYRVl}J*=2&9_dQb((+k7 zN3b$}@?R)pww=^JQwUOX*dU5Q=2cA{Y-|Tps&=1UK*2~;T$&2jkZRZQ>i-D)G=}VD z+IC`|H9aGGmGN-jKfb`qUj%Z z(`ET)DN>7dZY*r}Zw+h?RAas6SZ^iPR|?ePJ$pSnpX}b)Cyxi<6~bN&zX(}@lM literal 0 HcmV?d00001 diff --git a/src/data/scrapers/__pycache__/printers.cpython-312.pyc b/src/data/scrapers/__pycache__/printers.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a76130d5079888837b05be5af987ed3bb1c9c4e GIT binary patch literal 1734 zcmbVM-A^M`6un=VJH)J|9lqu7h**mxL zk!eyBABvIyiDn_$1RgQ`6F&H0)Q8p@Tko!kiN5fL5+B?r@01SZffv2$J?GqW&-wLt z&)jpr4~K(*j!*nQX1?VC_}y8!gKZioFH++Xkb%r-V9wF3#^l&JmH{$1$I1MsV2)p9 zlAa?cVJ69t@Y{-*$Lc~}yJzIhwT~7tHmy-XSYBQplnkURnl`A&c|lAYc`W2a(-LGA zDH1l&s_>R3t}1AJa`t_s>bO@DO{^OFc+b$Qle1JbyslP|B{f+#l#fb|&XW~N{t>{8 zyP0(l<2tCu^uVX%uo+hX*g>vVV#4M$E=bwpR{3M*}W8 zUAEoLZ0W(@faNyr^EGGv)z1fV%9!e`TgKa<~w*gV{?h)~+SDx#%gd z2_-fIB=!NVcLCxnZj24xm~D0=!h=Gn0Bt_o>F&3*?X&%MNRB+_+zbY6{J;a7zejNy zIty?aB%?EHa1rOUK~qGQf`|u;X-y%lDwDRedw##+^Wd~0uM!p`I^`qh?jq-c9XU7h z$gv0`60pYhcPBaGzhg+E(+vcs6iY&CqcsSeGz=uGx`-7k86tk9Jj^Q=wg{I|FyUrD zxPu&E!eeCx6MjL}W#V__yr^k}(-fU}EKJd;GBp(w&w^%%n0Q6gRCGBRb|5X&&@F}V z7VT){X}}?4*&@6wVv%svCcI>5771%jlJmNlQwWsLF^x(srt_MrFOiP(9*2aUASUvRP6u}>n&P{vWemq%? zbQhnFR`0A2v^~)V(Qon!tI;W>(g7AeZJg#tNi?MH8fIurw*f=qwAwvnJT3>O?r^io`g$VuQzTZa{8}P)33k@H390Q;+cOeXUKU?xs3$6bh^NbQ*q2O4D$p; Vegz52!KW}q?u5V2MA0DK{1;E5o)Z87 literal 0 HcmV?d00001 diff --git a/src/data/scrapers/__pycache__/restaurants.cpython-312.pyc b/src/data/scrapers/__pycache__/restaurants.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..733477b7cf38fcaa0abbdb1213824a6b1c9b70c5 GIT binary patch literal 3925 zcma)9Pi))P8Gn>0iLxb2mL1D~k~*%dw$>WclB3y8=dMnSWLb+0SW^@YFc38PEK#ON zmZU7plh7S@P_P1-PfoB6>9$LT?2w(7<4(KmVoe>Sy=+5(UACLnU?`B&K2oG&S+Ow2INARvv$ACW$S@Z?91%OUpB{yEX-YE|h9ogeE*bR+kP{)+Es z=og-&-%nF{mL2m=y&=v3n6@@!XL4Q5ews1n>jKlmj=)#1wU zOEs8oOraAH!2v#px)#DGSQzdiMovJ4T*L|Tuq)4cs*w{gBQB;IJpmEvB1S7i`FMZ+ zE1^obGE#|DqLooG`txqRDD?TSX#{MZ>eq2O0bCpsJg`aO_)CTvb7hj~%PhOsx3GfmC7#ImJQX>ms~BvUeTyudFCYC-xJ?9RPQ zryy8OE@magU49rm9vJDe?Ip93Sh4?rY&OAloRw5X zatUqE0mYBdb~Vve>U3GjhqHC2=E&8REiD#~2}P}44H4lKJ>ha(nf*CCUtJJfNw3u{*U5@Y}h z?iGz!x+U#Uer4lz`^H!!vXyf)3mNL1%nDz?gmL*o7`NgiD7$4Qqpotd zw#475(1M@PH*5=692%0qdWU;t_=*Z~O9}{#z2J3=7Uw_u{s()3kMX9}SqSUa4_Cqn zIUR1uVuAQ&zNiQ}GOXAYQcS#S62=;6piJnjYN{k!;w6*YQgzOh3Ro?gB%tF@i*V}^ zFI=01wzX*MdLw>b)372EpCGFSUWgG&BY{m_Eg4uR0YU9}pl$0TcG@`DHX3Ys42iQY zUJ~Fn$zn4JS)HylXxZmBRk2K>){V;-ft#kmiM+`}=Is`)!2+Q>3GSvn;G~)i3op_` zf-f=~fXrGBjTj{7eBbjNvmZh91`D}@9|~}w6bl$UvsYAI!LrP*YuX0hVNHk!u35J8 z8{~pBys*g&uqu%?bb+;^WDP;*HOt{66*-86}yYm(-4vM&R~22ZwNm^e+y1F zg41oZ<{Q7uw_;-l$^B$AHeZj;Ke%}qn{UL@wLmL2)r=+Tu|y;G28<#T&B$y$GTVsE z)#w%zx)Zz|JdCklGD|RMGUw{dxx3<*%zWFArswX@-kWVsy;Yxj>+piqn9A2iogwV2wc7iw$+P#Fd(7e7#l~c&cHLFnZA`pb zyY@5`z4QKU3)4FLT5IOj)@-tM?se$=Cmfg>`7a8DBkfUiCeeNsR(mIWJN$X16&d^b zhZNM|YpWPvSHFJ3Sg z?H5kqyS+i}vRld)lR54;=ouJBdJ3B7%(n`vSd{U*`Xv}z?~TR+G;J?MQBP3h2@3xm iv0u^eQseFSJctg}_Wtah`YS#07kUzkMNjK*1NwiJV;z40 literal 0 HcmV?d00001 diff --git a/src/data/scrapers/restaurants.py b/src/data/scrapers/restaurants.py new file mode 100644 index 00000000..d3307302 --- /dev/null +++ b/src/data/scrapers/restaurants.py @@ -0,0 +1,70 @@ +import requests +import pprint +from playwright.sync_api import sync_playwright + +def scrape_restaurants(): + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + page.goto("https://www.visitithaca.com") + page.wait_for_timeout(1000) + + # Get a fresh token from token endpoint + token_response = context.request.get("https://www.visitithaca.com/plugins/core/get_simple_token/") + + token = token_response.text() + + # Build your API request URL with the new token + api_url = ( + "https://www.visitithaca.com/includes/rest_v2/plugins_listings_listings/find/" + "?json=%7B%22filter%22%3A%7B%22%24and%22%3A%5B%7B%22filter_tags%22%3A%7B%22%24in%22%3A%5B" + "%22site_primary_subcatid_307%22%2C%22site_primary_subcatid_308%22%2C%22site_primary_subcatid_309%22%2C" + "%22site_primary_subcatid_311%22%2C%22site_primary_subcatid_312%22%2C%22site_primary_subcatid_504%22%2C" + "%22site_primary_subcatid_505%22%2C%22site_primary_subcatid_506%22%2C%22site_primary_subcatid_508%22%2C" + "%22site_primary_subcatid_509%22%2C%22site_primary_subcatid_510%22%2C%22site_primary_subcatid_511%22%2C" + "%22site_primary_subcatid_512%22%2C%22site_primary_subcatid_513%22%2C%22site_primary_subcatid_514%22%2C" + "%22site_primary_subcatid_516%22%2C%22site_primary_subcatid_520%22%2C%22site_primary_subcatid_532%22%2C" + "%22site_primary_subcatid_536%22%5D%7D%7D%2C%7B%22regionid%22%3A%7B%22%24in%22%3A%5B8%5D%7D%7D%5D%7D%2C" + "%22options%22%3A%7B%22limit%22%3A100%2C%22skip%22%3A0%2C%22count%22%3Atrue%2C%22castDocs%22%3Afalse%2C" + "%22fields%22%3A%7B%22recid%22%3A1%2C%22title%22%3A1%2C%22primary_category%22%3A1%2C%22address1%22%3A1%2C" + "%22city%22%3A1%2C%22url%22%3A1%2C%22isDTN%22%3A1%2C%22latitude%22%3A1%2C%22longitude%22%3A1%2C" + "%22primary_image_url%22%3A1%2C%22qualityScore%22%3A1%2C%22rankOrder%22%3A1%2C%22weburl%22%3A1%2C" + "%22dtn.rank%22%3A1%2C%22yelp.rating%22%3A1%2C%22yelp.url%22%3A1%2C%22yelp.review_count%22%3A1%2C" + "%22yelp.price%22%3A1%2C%22booking_price_avg%22%3A1%2C%22booking_price_total%22%3A1%2C%22booking_full%22%3A1%7D%2C" + "%22hooks%22%3A%5B%5D%2C%22sort%22%3A%7B%22rankorder%22%3A1%2C%22sortcompany%22%3A1%7D%7D%7D" + f"&token={token}" + ) + + # Make the API request + api_response = context.request.get(api_url) + + # Parse JSON data + json_body = api_response.json() + + # Extract the restaurant data + restaurants_data = json_body.get("docs", {}).get("docs", []) + + data = [] + for item in restaurants_data: + name = item.get("title") + category = item.get("primary_category", {}).get("subcatname") + address = item.get("address1") + coordinates = [item.get("latitude"), item.get("longitude")] + image_url = item.get("primary_image_url") + web_url = item.get("weburl") + + data.append({ + "Name": name, + "Category": category, + "Address": address, + "Coordinates": coordinates, + "Image URL": image_url, + "Web URL": web_url, + }) + + browser.close() + return data + +if __name__ == "__main__": + scrape_restaurants() \ No newline at end of file From e50e1175c32c3595042a099f7923f6bcb70b9717 Mon Sep 17 00:00:00 2001 From: Lauren Ah-Hot Date: Wed, 26 Mar 2025 17:30:40 -0400 Subject: [PATCH 4/6] Ignore .DS_Store and Python bytecode files --- .gitignore | 7 +++++++ src/data/.DS_Store | Bin 6148 -> 0 bytes src/data/db/__pycache__/database.cpython-312.pyc | Bin 2471 -> 0 bytes src/data/db/__pycache__/models.cpython-312.pyc | Bin 1912 -> 0 bytes .../__pycache__/libraries.cpython-312.pyc | Bin 1680 -> 0 bytes .../__pycache__/printers.cpython-312.pyc | Bin 1734 -> 0 bytes .../__pycache__/restaurants.cpython-312.pyc | Bin 3925 -> 0 bytes 7 files changed, 7 insertions(+) delete mode 100644 src/data/.DS_Store delete mode 100644 src/data/db/__pycache__/database.cpython-312.pyc delete mode 100644 src/data/db/__pycache__/models.cpython-312.pyc delete mode 100644 src/data/scrapers/__pycache__/libraries.cpython-312.pyc delete mode 100644 src/data/scrapers/__pycache__/printers.cpython-312.pyc delete mode 100644 src/data/scrapers/__pycache__/restaurants.cpython-312.pyc diff --git a/.gitignore b/.gitignore index 41635879..ad4d0412 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,10 @@ service-account-credentials.json # File Types *.env *.zip + +# Python bytecode +__pycache__/ +*.pyc + +# macOS +.DS_Store \ No newline at end of file diff --git a/src/data/.DS_Store b/src/data/.DS_Store deleted file mode 100644 index 443fc85bcd1a16b5e456ed69c38cc567673255b2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~&u`N(6vv;tLfuVB6=KpZNRhZ!rHlcZxP&qeTo(ifKqXlU1TwoM>3XOt%C&lf9z5@Kl@w{J zD5KYONE7-%Gx!ocB@O5uye8F{_cQ9w&MT31L^JdZKln{EXw|X*QXUd)FRnRmGW_6s90c zCrEkmEK{kTkMvBXrKyeVfb7cdxVN)d?Csy{2fKHM%YLxf-yQaYz1w$}%dXtMar6G+ z_X4Eli6>L*C^NB-jY+0F+YIPKUn+a}|G3mT+=vRX7m6C3Jjct^1iIG7% z)ibE%w zEF&AbL=@^O*J>XKQeJ|Y#Lr3PQ&2W7|TD6 z`SE9QqefhZFa6Xn8v0~P;FFsIpG<4|WU@#t>E>~D`Ev)2 z8xavA)?@lnCy5baYB z;vjip?dHbkoAn*-y}y-c?CL=R#I)t{rSV61_GESW_R{SiCjM`LsS*wqOgaIk*355b zzywSGg%^ZfgP7F@c*6X>9A=2fSt?G%du7SC4ao&=A$>x;BE+d62o6Us@|mK(3<$^f_Ft5i1S9!zH1dMlHuV zrhylY$-*g9L#O0C&qNC{Olx{!O5mjpMj}oDJ3`o?liq+%S{Ea4^I4a8btk#_PmFSI zP2CvSyuS6tFWRr!9c^4d>W*|Grc&KRe2ZAbahY7z={!@AN&Jf`ddc-IYOy$LTDVwb z>Wz`*py>`GRAD>vSMl6{XW>-z2+L%Ydqi|SsYYjt2{U^TsxUVFIN cx9ROBh8C4WIU>Dts3xV}!`Hbu62kZU4`O&7e*gdg diff --git a/src/data/db/__pycache__/models.cpython-312.pyc b/src/data/db/__pycache__/models.cpython-312.pyc deleted file mode 100644 index 8e1a0893ca69f5df68df07bf36e5fe64e40efc1f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1912 zcmcgs&2Jk;6rZ)%-nAVErOGAcG7^W>QtA~@WDbbn#9MHaY+7d{wJvBi-krGXu6NDs z7-}n(9H~{XIG9_hgw!5E`BS(N35CrC@p0g05l$5cX4W6Xm0ggi7-{G2d++z=eaxHp zZ91IFvL!wM`904<>;i=$2A#ca3Ac0X;m3Ta$ndNYEx=q z38NDPFar_!M?~}-0@P9yp@z&T3g!dU(l64t#7wNMtP_kV_8~@Cs%o~(=Nc=lbge#2Fdw5DYF60Ui&1?)PiVxvi%Db7B{m9b`Ap+R$K z87Ywbj~(_H-V7wub6srGKq{;m*Yo-ZLELN--y?y9?_sk^F%x+8x=n+a>3BZQL;{i9 z5I$9XK5ig)<}xOij0+lXEy7Y)mmBih>IOZ$u}AW{E0H0W{OSkAQN34vX?8! zyo+5+@rkKeh$6+RD0R=mj-PF`g0vaFZ)5Ph2_FbDcfH594)`T?p|{#y{VI2ydaYA9 zNiXzncW;0GQRnK9V)9Ts(E31Ll>Y#7EYV*CvD8dailb5tbrHl8hZ_eQ54KK3`S9Ao zwTGK$%o7M?!>A+MHH=J(@QE5kJU_PEt6#r;{OXnC)LR|-Bstex?k@LMx+`DYd%E~^|2TQ2BmFKe c3k&^{0K~b^+TTT&eh?QPZug@=ycl}_3rCTpVE_OC diff --git a/src/data/scrapers/__pycache__/libraries.cpython-312.pyc b/src/data/scrapers/__pycache__/libraries.cpython-312.pyc deleted file mode 100644 index 3781a20b60e24a7dc72decc6d6f662d9503f6137..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1680 zcmbVMO-vg{6rTO@4_+IC6JVRvDYOLY6xbF;X-Y#QimQT(qS7>~iV~sq&SKVCdzYDU z`O#W(r5;R+1W2s_DH0r0ap|$g9x7E;FSwR!Ym!Ql+6#wDw5W&X)LG-jIdIBIGw;p& z`R1FaH}gv*5&~>|9{e@^mjJ*Y?#3H@Lpgq#DO&&n$eCcqmAuJi`5B%AP?!;*_yw2| zm$`)R&`r3N;ArGsUCk3?E^pqp^7hJy^Mu%VL{SzO7l$+p<#f{=(qUdvQ&yfRS=Gi0 zG?1nA2d$6A9~T?E+5sDop~2yX$xdCrV? z0nflB5xvcRBd!35i%=g9$OJvTDIvGtKq_z!mywztPlG&UWiXm?yu+Uv=krd@M7-Rl zgnrLk5YnI^IzkfM`w4)8&+(lZA2^NolcFT+`-hFyx(jf>{VKbm@7}3qh6{ey+?Gr{zp;5szCEtcNr{4K)59t8^hmIz0Z) zDgRA=Eh>V7R0ug@rpxm;r4w+1js#mD2p%d2)_xX&Bi?4X9GivEB1lAUtw{4EYYy2e z!aAZpqNYrp@&=@>r(OMV-R2`H3ocWhAU5Hl+qG`ff)ybY3LUx)d05}_1KkOM2B$1d zb$g!%#vnvG##9=&ECh|5%3eq!OoK>Y$m^J3Dx`Hng*zWjAs3X2L|-CQoHKHe2Hka5 zHBBm*dXD-qL59tQZ5o97=1fZ^)UVpMo`WZl*tT+5ry^!=7y0UN$Xdiygep;~z;r5V zmWgS^Y!IBysac%{)CS9#)?_*B9+yi~Zi!0Im~oilc0V(oYRVl}J*=2&9_dQb((+k7 zN3b$}@?R)pww=^JQwUOX*dU5Q=2cA{Y-|Tps&=1UK*2~;T$&2jkZRZQ>i-D)G=}VD z+IC`|H9aGGmGN-jKfb`qUj%Z z(`ET)DN>7dZY*r}Zw+h?RAas6SZ^iPR|?ePJ$pSnpX}b)Cyxi<6~bN&zX(}@lM diff --git a/src/data/scrapers/__pycache__/printers.cpython-312.pyc b/src/data/scrapers/__pycache__/printers.cpython-312.pyc deleted file mode 100644 index 8a76130d5079888837b05be5af987ed3bb1c9c4e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1734 zcmbVM-A^M`6un=VJH)J|9lqu7h**mxL zk!eyBABvIyiDn_$1RgQ`6F&H0)Q8p@Tko!kiN5fL5+B?r@01SZffv2$J?GqW&-wLt z&)jpr4~K(*j!*nQX1?VC_}y8!gKZioFH++Xkb%r-V9wF3#^l&JmH{$1$I1MsV2)p9 zlAa?cVJ69t@Y{-*$Lc~}yJzIhwT~7tHmy-XSYBQplnkURnl`A&c|lAYc`W2a(-LGA zDH1l&s_>R3t}1AJa`t_s>bO@DO{^OFc+b$Qle1JbyslP|B{f+#l#fb|&XW~N{t>{8 zyP0(l<2tCu^uVX%uo+hX*g>vVV#4M$E=bwpR{3M*}W8 zUAEoLZ0W(@faNyr^EGGv)z1fV%9!e`TgKa<~w*gV{?h)~+SDx#%gd z2_-fIB=!NVcLCxnZj24xm~D0=!h=Gn0Bt_o>F&3*?X&%MNRB+_+zbY6{J;a7zejNy zIty?aB%?EHa1rOUK~qGQf`|u;X-y%lDwDRedw##+^Wd~0uM!p`I^`qh?jq-c9XU7h z$gv0`60pYhcPBaGzhg+E(+vcs6iY&CqcsSeGz=uGx`-7k86tk9Jj^Q=wg{I|FyUrD zxPu&E!eeCx6MjL}W#V__yr^k}(-fU}EKJd;GBp(w&w^%%n0Q6gRCGBRb|5X&&@F}V z7VT){X}}?4*&@6wVv%svCcI>5771%jlJmNlQwWsLF^x(srt_MrFOiP(9*2aUASUvRP6u}>n&P{vWemq%? zbQhnFR`0A2v^~)V(Qon!tI;W>(g7AeZJg#tNi?MH8fIurw*f=qwAwvnJT3>O?r^io`g$VuQzTZa{8}P)33k@H390Q;+cOeXUKU?xs3$6bh^NbQ*q2O4D$p; Vegz52!KW}q?u5V2MA0DK{1;E5o)Z87 diff --git a/src/data/scrapers/__pycache__/restaurants.cpython-312.pyc b/src/data/scrapers/__pycache__/restaurants.cpython-312.pyc deleted file mode 100644 index e1afe133b39669fa542eeb9c5796e88091c8ec20..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3925 zcma)9Pi))P8Gn>0iLxb2mL1D~k~*%dw$>WclB3y8=e9|VWLb+0SW^@YFc38PEK#ON zmZU7plh7S@P_P1-PfoB6>9$LX^pKsF<4(KmVoe>Sy=+5(UDlh{U?`B&K2oG&S+OKSLS}*)Rm7(27CgloUa~V`c6_j^_bH(tT_Z=EIU6K0I^T4rDr7BcD(2sE~S3MO^ zK6rxMTk+MfH*Y!XlKtLLgJ|cYx;mMC2l|$^K{d1zv)vD4JIzQHr=tk}n{T1IY z(XTyKKdj;Zv|sUe?FTA>{HYV{GZp5A5e6jmz^TNG<=L*h8qA{VP$jsIe*7DRs>7Ax z7iuuwm_o-Of&+XGbuEOCu`t|4j2wdqxrk%rVOO5@R3pb=MqEradJH1cMT}O4^6~!s zS3;F=Wuy|RL@T3W^jF<_QRuVZ(FoW))vx1n0=PINcwm#l$E^{CK0xy*g$%0Xq0q;O zf=?Okct1uZYGL&H4cMGa4)dan4P$pEXPTOEfn`gj(&DycNTy`wc!6IO)PnSH*qyr< zPC&4lT+B*}!3wI5*(^3WLn>%8=1g@9D=dG`kI+f#k|M}O5gV+I4U^kWyY@I)GE7Oy z8r=@NDJdfRPTo+JwZzJ$L^{1G$tKpJlUTmknMtJ2^NQ%qWG)?saVDQNj$kF_IW(Py zm19oRr2?;)Iit8C@TMek%S$U3D18~);}EL@5pNDcq*4PB5X6abWR}hkL}UgczB>>x zXtkN8L95NAmIiqln2$_qU_MgI1HBB)SZZKC(gWABJTTH_+e>CWv10!L*=&OAI4h}& zs^D1LzZ(^^iHZuwtv=*2ey3{b4N{oQsiZD|ABS7^%&E6 zW$TB!=pH{#s3p8{)PiU#ix#*?yNqQGW~~FRfBvY24j6AsxWpZ*kmJ&p!Xkv;_%>8^ z3#?n`nSeQdyVp;z)KpErH>0k6vnY23Kux=(Ik46WxZ`IYro?Hgl_&?1nH_-y8os9PSFE@Y^4GAn!m6UOBWVcd$7pzM~JjJnF* z+7f@KLJNLEU$-q>c4$Zf7dzY|!;|t{v^e|WkKW%6e1tcw&O%tXez+1w z$mwuH77N5L^F>9-kzvKIkYeH;lQ7mm17$*IRZ}I=5-*wDrmAzMRKRM{Bmo_NQiNNN zc;VV4w5>&3*BkNknuZmT_yk!s@Is7G8VPLZYRSMl2?%P(18rL$vD3!Ew$WhAV@RBJ z@sa?qNfw()$m(>ZLCZe3p^9Y^wQgL#2;4LcPUKAU)g(d6v@r+c3^qFd4Hu5U#^9~ z;HkOhc%nX@XpFyB3&Qwx;@;ZbwfhqfV~v^f&6(Bu%<7|wKf+-BfSG&}8LiQe$6u+f zx5nc8t9z?=t~SQ3QG8~9XK&}=Z2IA)Mtr3if2$sU>(Tp-__f;iTNBg!xxL(--Nr<+ zcC|G#dvEpb>gPXb%&-r~|1kOc$w%L5q^=xHU#-2@nmm1vxyu~Poo`HLYS&!FoyNrL zwX07;(cACcvM{aFue4@fZp|iJXI_QQf5L&Ok^iDVIMNHSmEcV#(17>+COlEQJYYZ+(H7{sr^?p91P_WT8N z(SG3szS|qrF1w{{F`47OKu^Ii(o@hpW4>2V#iESg)-S-&dT%rqplN$4ih7J9k5Tv^ ji2a6sn;LJw>p^s=w)+?F)ZgfdztWRXEP7gh6VU$yj&B}V From 549c05970b8e6d42f78c599ab493bc506aac6812 Mon Sep 17 00:00:00 2001 From: Nicole Qiu Date: Wed, 16 Apr 2025 18:25:16 -0400 Subject: [PATCH 5/6] update db and fix scrapers with updated cornell urls --- src/data/scrapers/libraries.py | 35 ++++++++++++-------- src/data/scrapers/printers.py | 58 +++++++++++++++------------------ src/data/transit.db | Bin 49152 -> 49152 bytes 3 files changed, 49 insertions(+), 44 deletions(-) diff --git a/src/data/scrapers/libraries.py b/src/data/scrapers/libraries.py index 51a7eb84..20fe79be 100644 --- a/src/data/scrapers/libraries.py +++ b/src/data/scrapers/libraries.py @@ -1,26 +1,32 @@ -import requests +from playwright.sync_api import sync_playwright from bs4 import BeautifulSoup -# URL of the CU Print directory page -URL = "https://www.cornell.edu/about/maps/directory/?notes=Library&caption=%20Libraries" +URL = "https://www.cornell.edu/about/maps/directory/?layer=Library&sublayer=" def scrape_libraries(): - # Send a GET request to fetch the HTML content - response = requests.get(URL) - soup = BeautifulSoup(response.text, 'html.parser') + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + page.goto(URL, wait_until="networkidle") - # Locate the table + # Get the rendered HTML after JS loads + content = page.content() + browser.close() + + soup = BeautifulSoup(content, 'html.parser') table = soup.find("table", {"id": "directoryTable"}) - rows = table.find("tbody").find_all("tr") + if not table: + print("Could not find the table.") + return [] - # Extract data + rows = table.find("tbody").find_all("tr") data = [] for row in rows: cols = row.find_all("td") if len(cols) < 3: continue - - location_name = cols[0].text.strip().split('\n\n\n')[0] + + location_name = cols[0].text.strip() address = cols[1].text.strip() coordinates_string = cols[2].text.strip() coordinates = [float(x) for x in coordinates_string.split(', ')] @@ -30,5 +36,8 @@ def scrape_libraries(): "Address": address, "Coordinates": coordinates }) - - return data \ No newline at end of file + + return data + +if __name__ == "__main__": + scrape_libraries() \ No newline at end of file diff --git a/src/data/scrapers/printers.py b/src/data/scrapers/printers.py index e972046f..1cc7242b 100644 --- a/src/data/scrapers/printers.py +++ b/src/data/scrapers/printers.py @@ -1,37 +1,33 @@ -import requests -from bs4 import BeautifulSoup - -# URL of the CU Print directory page -URL = "https://www.cornell.edu/about/maps/directory/?layer=CUPrint&caption=%20CU%20Print%20Printers" # Replace with the actual URL +from playwright.sync_api import sync_playwright def scrape_printers(): - # Send a GET request to fetch the HTML content - response = requests.get(URL) - soup = BeautifulSoup(response.text, 'html.parser') + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + page.goto("https://www.cornell.edu/about/maps/directory/?layer=CUPrint") + + # Wait for the dynamic table to load + page.wait_for_selector("table#directoryTable") + + rows = page.query_selector_all("table#directoryTable > tbody > tr") + data = [] - # Locate the table - table = soup.find("table", {"id": "directoryTable"}) - rows = table.find("tbody").find_all("tr") + for row in rows: + cols = row.query_selector_all("td") + if len(cols) < 3: + continue + location = cols[0].inner_text().strip() + description = cols[1].inner_text().strip() + coordinates = [float(x.strip()) for x in cols[2].inner_text().split(",")] - # Extract data - data = [] - for row in rows: - cols = row.find_all("td") - if len(cols) < 3: # Ensure row has enough columns - continue - - location_name = cols[0].text.strip() - description = cols[1].text.strip() - - # Extract coordinates from the hyperlink tag inside - coordinates_link = cols[2].find("a") - coordinates_string = coordinates_link.text.strip() if coordinates_link else "" - coordinates = [float(x) for x in coordinates_string.split(', ')] + data.append({ + "Location": location, + "Description": description, + "Coordinates": coordinates + }) + browser.close() + return data - data.append({ - "Location": location_name, - "Description": description, - "Coordinates": coordinates - }) - return data \ No newline at end of file +if __name__ == "__main__": + scrape_printers() \ No newline at end of file diff --git a/src/data/transit.db b/src/data/transit.db index d9d091ea08425909688869e175a248005f0dd782..40bc3f7a57c6f7357dc8f4110cb4bea1453b7d98 100644 GIT binary patch delta 7069 zcmbVR3v?URnI3s!jcrLYdB2F`o1{q^Ck~NqIhKioCE2l!EnAlCIDrN-md5tr@r)Ts zah;G%OqQjPb`Pl!3l!QOo~>KTAxjGd_E4A7Wm~!hTDmNBO92d+*7KQ@d~^Z6zvR^P zPIg)3E{KCa{ed$jeIL#;`zSkT2Hh4WA|h3!|t7~@43>h<)^3L zsW1*KaWVrjjtdOya$HnJ5=*5MvXmsEnjldrDT#V~l!PQIbDehw>DT3nQE_}+(n&P2 zjdZ3)#kjbQ^z0|z`bOW5mbRwPFYCVTs=3(OSD6 zfos^&K#YEE@`6zQ){(}_V{@^}=f3ezU-_#;f|EIVfa5yHwGl~`NQ9nDlFPJFHJQaE z!TyVyIsI&K8L*J$vP4vj>V!{;ec*V5+i|IzjDYXJCo-MQ_Z z%@vjsF>yq)B=FXgHZhqZfkV=H#}NwtB`rPo&f@qx-#dN9V5r>5Z0Uj-MSa2yCyHSx zb>2D>62~Xf@W#B(Xne2!pYIy7g}Af~TC{UqSX8rG)Yk1J=I43LK(&Ao8s7uJk-z)% z564?L0KgU9vKrSELp3$2^)-C5CU?G;{;!V#@w?fN-W(4(VTb{ai_#pSuJG6IBm=4) zXcsic+}pqXV=F-3{KfEd!9b~#*$_53?lMhJh?W=)bvwu)>CyDmC|tXZv?I#L+d300VSPSH{+%j{mGmb8m{LQ*dzrXL7pUxLw|xb2zOuM0Vx%w}MOwDDw0i)XU2 z)_-9TUn32~>Ky~MB0wH^`K{f-3RFm+>wQM}P&hLzJSNNvw+SZ%MK~Zd3G0MqsJrj- zXHhsG1El|>R{Dxtx%>l}ll za>~l9V*11G=2FAEfB{feBg$>H8CF*k;GT)#A7) zlIm&{aTx|q8Kk$Tdw+Gcwus&A=+{)4gf$%?nyOO#{;Ngm26}$n#a`ryrQ~sZr_j6X zGDE*SNMDZ0=wwso%U3R8w>o0+(MeHH9nR=;z{6HKvSp&5iW5y62G-&byCeDiU_-A zs&j|=WE4i1Ck(6dwo`@jYv6ABj%^B4q~=X*pCc?QhG-rG}+!4Ib!Iq!q7gs$nAetk-hItA;+Xijj5KR;p~bIBDao zuV(k-b4pevt2tg=%m!zsv!m#{RWj&EnG+4BBQd+y2 zjX7u+B^{Eg%VWgsbEq3OBgjClq3UKE*y_dX7Doq4jBHj1YNSaw>Hjim^5V5$U02Gyq%W_!t6k=}Ok;2x)0O zRnsL-)dah6uTwURx( zQ4;FyiS2L_R|(qYrXPhEU)rMz(}Fsc-Wd zap^MlV#j%1bwLe#RpHZ5YLG0&%xiWf3)#)(R^u`qLdH$RYDzVv9(4_QtLeFBYIXJ9$3Tujr*hq^_-If}O5?QG)zCb_c~B_j zBqbgTDL-gC3R(a%mDlfz5we#9BC2WQq`tNuIL6Q;4<1ER2SsoCC~(AL7kO>67$zj^ z@w8%1AgzX!90P{5_2)kTF{pfI>i+MzcBMcK100v%TF(Pf(5g~&^uX!9>t<)%P(;}L z#KrG~CSVCnQn@}{e6|*JahMtrdKk23)Ykf1+8Y0*_4y@~CBMAG6^;3=xVaLDR*>_?<1#7y7vx5a8-CdlI;#@YIhr|2je!e3`FhIc+X1}&DX#_i`84d z5ZT#}v7;G)_WV_BAv@C`iLKSU@v583_r(xXBIcEJj)EEa9Bh6a5y&Y~#)ysP+M^vp zLD)<~pz4EP|IJMZf#mc{zx;i7AC$ZT(1ir9STZCmT_&K5BbLnmiow_Nqn*XSz>xpd zrU>khqLjz|ilO@VBFNL~hy+nq(e$-5ClbclF`UiuL^}jRoiP&8wP7il#N;9>m=+C6 zTR@Te4u=pS#?Lp55+yin!0y>yrF08VCg!!MY{qf+} zAsAw6+kMeaC$sD*P(V?$mNp6!1cYf)0{ae&vO-gB6DSlSp+~}k0lNz{eP}CQd(kU> z0R6&uAAe?1TL*HwkK+pHt}UJyGr1j7w7b*lYa7ik{`wEnpXZF&T60koZU+uxBVv(c zi_2!EED$n3=FKu(=WT?=-uhbQ8+Smgmy#?06bbG|B4R+xZ`Z{h3`(Q2Y9t7)$95Ll zmDJ3f_GZY08dyNei&64*G^O#@(@AgN!l5u-y9hV`?Gv|O?RPSLD17s9d83HT&#!XU zrwEcEDt3ZMX8F=|c*M^4wg0{I+jrM*YecYY#%1R7>x-y?<`N;Y&dKce(f9w7 zdC0aWSl2|_Tw+iHVmiX0o;ch7;L;;|38F2@aRqbJ{NX@j&hkqQ2_$6U?y7ujZqXu2}T);bCTFC5* z*kL^T#U}qU#3&NJU`oz`Sy;9qnr5+khA36c=r2z`P`?zc>JpC2jMMg^tp2$o5&qfl zX8!~YzS(eCI2FX-1$?mne3Q3rh6uG=M8mJ_)iyb}eaDlY-$oWWnNIis@0***{2?9N z2zyGmMY25-logPZv8fC}K#m?h3yN-TPaE+^fOS-?>@L3ev=vU2SZ7!8<_f(5i(~H= zoV3P&BX9n<^|2$^pSjZ7z3;}hw+2@hF9?b+&atn?FO{S>?y)1W6N_G&r1`sqpkfrx zSMz0Qdz~LjWcHs`QPgm07Z$!7Lcq$cA&SL__V$Oe{_xW=U;;kTpP^udvbw`03o cSp$&FV#M}Krb57_+4zyriEz&ugMNCqgXSJutT|AT>@`#%HUUH+qd-Moc-QQZG| ztu_l9v~q9$&t1gG+?d8PS(00Zk#({e_enN=RyJ{G*~t#P5|d5&gc#W-ck=}?H>NU9 zmgaxTX2`@Q?kW$K%;OW`X9ilw0kV=8LJg{n9| aBeODNQEG8XVrfxgUP*BYUK182Bme-}8a7M- From 0bc4485c7d2b504826b92ee41bed46d76c42538c Mon Sep 17 00:00:00 2001 From: Nicole Qiu Date: Wed, 16 Apr 2025 22:10:24 -0400 Subject: [PATCH 6/6] update swagger --- src/swagger.json | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/swagger.json b/src/swagger.json index 9aae0b90..0ee1b8c6 100644 --- a/src/swagger.json +++ b/src/swagger.json @@ -49,10 +49,7 @@ ], "responses": { "200": { - "description": "{\"success\": true, \"data\": [\"id\": 1, \"location\": \"Africana Studies and Research Center\", \"address\": \"310 Triphammer Rd, Ithaca, NY 14850\", \"latitude\": 42.4574, \"longitude\": -76.4823]}", - "schema": { - "$ref": "#/definitions/BusStop" - } + "description": "{\"success\": true, \"data\": [\"id\": 1, \"location\": \"Africana Studies and Research Center\", \"address\": \"310 Triphammer Rd, Ithaca, NY 14850\", \"latitude\": 42.4574, \"longitude\": -76.4823]}" } } } @@ -66,10 +63,21 @@ ], "responses": { "200": { - "description": "{\"success\": true, \"data\": [{\"id\": 1, \"location\": \"Akwe:kon\", \"description\": \"Color - Room 115\", \"latitude\": 42.4563, \"longitude\": -76.4806}]}", - "schema": { - "$ref": "#/definitions/BusStop" - } + "description": "{\"success\": true, \"data\": [{\"id\": 1, \"location\": \"Akwe:kon\", \"description\": \"Color - Room 115\", \"latitude\": 42.4563, \"longitude\": -76.4806}]}" + } + } + } + }, + "/api/v1/restaurants": { + "get": { + "summary": "Returns a list of all restaurants in Ithaca.", + "description": "A list of all restaurants.", + "produces": [ + "application/json" + ], + "responses": { + "200": { + "description": "{\"success\": true, \"data\": [{\"id\": 1, \"name\": \"Alley Cat Cafe\", \"category\": \"Coffee\", \"address\": \"112 N Cayuga St.\", \"latitude\": 42.4407309, \"longitude\": -76.4950526, \"image_url\": \"https://assets.simpleviewinc.com/simpleview/image/upload/crm/ithacany/Alley-Cat-Logo_1872F7A2-5056-A36A-09631186ACCA298F-1872f6fe5056a36_1872f7f6-5056-a36a-098fa97e4ad0cd49.jpg\", \"web_url\": \"https://www.alleycatithaca.com\"}]}" } } }