66
77import requests
88from bs4 import BeautifulSoup
9-
9+ import re
1010from py_common .Logger import Logger
1111from py_common .Production import Production
1212import py_common .utils as utils
13+ from datetime import datetime
1314
1415########################
1516### GLOBAL VARIABLES ###
@@ -86,7 +87,7 @@ def scrape(platform):
8687 # get rows; for each rows, get the name of the prod and the internal link
8788 for link in links :
8889 demozoo_internal_link = baseurl + "/" + link .get ("href" )
89-
90+ print ( demozoo_internal_link )
9091 # building slug: all lowercase, each word separated by hyphen, no special character
9192 slug = utils .build_slug (link .text )
9293
@@ -115,7 +116,21 @@ def scrape(platform):
115116 elif slug in globalgameslist :
116117 logger .write ("[WARN]" , " " + slug + " already in entries folder!" )
117118
119+ def parse_date (date_string ):
120+ date_part = re .search (r"(\d{1,2} [A-Za-z]+ \d{4})|([A-Za-z]+ \d{4})|(\d{4})" , date_string )
121+
122+ if not date_part :
123+ raise ValueError (f"No recognizable date found in: { date_string } " )
124+
125+ date_part = date_part .group (0 ) # Extract the matched part
126+
127+ parsed_date = datetime .strptime (date_part , "%d %B %Y" )
128+
129+ # Convert to desired format
130+ return parsed_date .strftime ("%Y-%m-%d" )
131+
118132def scrape_page (slug , url , platform ):
133+ demozoo_url = url
119134 '''
120135 given a slug and demozoo production url, it returns an object containing everything useful
121136 to build a file hierarchy
@@ -131,6 +146,17 @@ def scrape_page(slug, url, platform):
131146 # getting title
132147 title = str .strip (soup .find ('div' , {"class" : "production_title focus_title" }).findChildren ("h2" )[0 ].text )
133148
149+ date_string = str .strip (soup .find ('ul' , {"class" : "attributes" }).findChildren ("li" )[0 ].text )
150+
151+ release_date = None
152+
153+ try :
154+ release_date = parse_date (date_string )
155+ print (date_string , "->" , parse_date (date_string ))
156+ except :
157+ print ("nodate" )
158+
159+
134160 logger .write ("[INFO]" , " Adding: " + title + " ..." )
135161
136162 # getting developer
@@ -198,7 +224,7 @@ def scrape_page(slug, url, platform):
198224
199225 files = [f"{ slug } .{ platform .lower ()} " ]
200226
201- return Production (title , slug , developer , platform , typetag , screenshots , files , video , repository = source , url = url )
227+ return Production (title , slug , developer , platform , typetag , screenshots , files , video , date = release_date , repository = source , url = demozoo_url )
202228
203229def main ():
204230 for platform in PLATFORMS .keys ():
0 commit comments