Skip to content

Commit 26719e8

Browse files
committed
fix: Update changelog for version 2.3.4 and adjust scraper logging
1 parent a95f403 commit 26719e8

File tree

4 files changed

+68
-32
lines changed

4 files changed

+68
-32
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## v2.3.4
4+
5+
- Fixed Some scrappers
6+
- Optimized enrollment process
7+
38
## v2.3.3
49

510
- Improved all scrapers for better performance.

base.py

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
rich_traceback_install()
2626

27-
VERSION = "v2.3.3"
27+
VERSION = "v2.3.4"
2828

2929

3030
log_file_path = "duce.log"
@@ -248,7 +248,7 @@ def append_to_list(self, title: str, link: str):
248248
target.append(course)
249249

250250
def fetch_page(self, url: str, headers: dict = None) -> requests.Response:
251-
return requests.get(url, headers=headers, timeout=(10, 30))
251+
return requests.get(url, headers=headers, timeout=(30, 30))
252252

253253
def parse_html(self, content: str):
254254
return bs(content, "lxml")
@@ -627,18 +627,41 @@ def en(self):
627627
def cj(self):
628628
try:
629629
self.set_attr("length", 4)
630+
631+
headers = {
632+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0",
633+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
634+
"Accept-Language": "en-US,en;q=0.5",
635+
# 'Accept-Encoding': 'gzip, deflate, br, zstd',
636+
"DNT": "1",
637+
"Sec-GPC": "1",
638+
"Alt-Used": "www.coursejoiner.com",
639+
"Connection": "keep-alive",
640+
# 'Cookie': 'ezosuibasgeneris-1=f7de3a73-8edf-4957-6bd7-c03d6192a105; cf_clearance=Vo1nMPpI9BOvaSvzT1RuuxxHQDU.SjH0Gvy_1Q5A8eA-1745306395-1.2.1.1-a7L2AgL7rcy4jHX.whQY0bjrjQwiz78KBWIOzX6_b8wBevOqdlK5yNLXDzSk1KJao2pu7ogq5pFL.TfdYmOQY3hz5c3Zk8BvRZVu0fyENuYVk1PNX.Q.UswXoe.LOSzsPpOBzySIOo5frr2Wv.ez2dE9GvPfPKG_a3WgmI.da5J94k2bQrs2w5tGdPlZgBNNuXlln_g9hIWQf8FNPXNjYQajWhZMZRJEqrwN6J8axTX8InJ_Fpt4wJaP6AvwcE28Lw6sgnWHLjVlrSdW9u.ZmTXvB7rDVVF5fKTSydwn5v0iI_4ch8TQPx6gFD_JHdnhTuVyzp64J.cKe1Uh53n_.DbRv8sCkUP9lfl_I2VGlog; ezoictest=stable; ezopvc_664594=1; ezoab_664594=mod24-c; active_template::664594=pub_site.1745306394; ezoadgid_664594=-1; wssplashchk=c03df4b443bf0a1a0365c55282e792b435f0599b.1745309995.1',
641+
"Upgrade-Insecure-Requests": "1",
642+
"Sec-Fetch-Dest": "document",
643+
"Sec-Fetch-Mode": "navigate",
644+
"Sec-Fetch-Site": "cross-site",
645+
"Priority": "u=4",
646+
"Pragma": "no-cache",
647+
"Cache-Control": "no-cache",
648+
# Requests doesn't support trailers
649+
# 'TE': 'trailers',
650+
}
630651
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
631652
future_page = [
632653
executor.submit(
633654
self.fetch_page,
634655
f"https://www.coursejoiner.com/wp-json/wp/v2/posts?categories=74&per_page=100&page={page}",
656+
headers=headers
635657
)
636658
for page in range(1, 5)
637659
]
638660
for i, future in enumerate(
639661
concurrent.futures.as_completed(future_page)
640662
):
641-
content = future.result().json()
663+
content = future.result()
664+
content = content.json()
642665
if not content:
643666
logger.debug("No more coupons")
644667
break
@@ -972,8 +995,7 @@ def get_session_info(self):
972995
headers=headers,
973996
)
974997
r = r.json()
975-
if self.debug:
976-
logger.info(r)
998+
logger.debug(r)
977999
if not r["header"]["isLoggedIn"]:
9781000
raise LoginException("Login Failed")
9791001

@@ -1147,20 +1169,20 @@ def check_course(self):
11471169
url = f"https://www.udemy.com/api-2.0/course-landing-components/{self.course.course_id}/me/?components=purchase"
11481170
if self.course.coupon_code:
11491171
url += f",redeem_coupon&couponCode={self.course.coupon_code}"
1150-
try:
1151-
r = None
1152-
r = self.client.get(url).json()
1153-
except Exception as e:
1154-
logger.error(f"Error fetching course data: {e}")
1155-
logger.error(f"Course ID: {self.course.course_id}")
1156-
logger.error(f"Coupon Code: {self.course.coupon_code}")
1157-
logger.error(f"URL: {url}")
1158-
logger.error("Response:" + str(r))
1159-
logger.exception("Exception occurred")
1160-
if self.debug:
1161-
os.makedirs("test/", exist_ok=True)
1162-
with open("test/check_course.json", "w") as f:
1163-
json.dump(r, f, indent=4)
1172+
1173+
for _ in range(3):
1174+
try:
1175+
r = self.client.get(url)
1176+
r = r.json()
1177+
break
1178+
except requests.exceptions.ConnectionError:
1179+
r = None
1180+
except Exception as e:
1181+
logger.error(f"Error fetching course data: {e}")
1182+
logger.error(f"Course ID: {self.course.course_id}")
1183+
logger.error(f"Coupon Code: {self.course.coupon_code}")
1184+
logger.error(f"URL: {url}")
1185+
r = None
11641186
amount = (
11651187
r.get("purchase", {})
11661188
.get("data", {})
@@ -1261,7 +1283,7 @@ def start_new_enroll(
12611283
self.valid_courses.append(self.course)
12621284
logger.info("Added for enrollment")
12631285

1264-
if len(self.valid_courses) >= 30:
1286+
if len(self.valid_courses) >= 20:
12651287
self.bulk_checkout()
12661288
self.valid_courses.clear()
12671289
self.update_progress()
@@ -1308,11 +1330,11 @@ def bulk_checkout(self):
13081330
# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:119.0) Gecko/20100101 Firefox/119.0",
13091331
"Accept": "application/json, text/plain, */*",
13101332
"Accept-Language": "en-US",
1311-
# "Referer": f"https://www.udemy.com/payment/checkout/express/course/{self.course.course_id}/?discountCode={self.course.coupon_code}",
1312-
"Referer": "https://www.udemy.com/payment/checkout/express/",
1333+
"Referer": f"https://www.udemy.com/payment/checkout/express/course/{self.course.course_id}/?discountCode={self.course.coupon_code}",
1334+
# "Referer": "https://www.udemy.com/payment/checkout/express/",
13131335
"Content-Type": "application/json",
13141336
"X-Requested-With": "XMLHttpRequest",
1315-
"x-checkout-is-mobile-app": "true",
1337+
"x-checkout-is-mobile-app": "false",
13161338
# "Origin": "https://www.udemy.com",
13171339
"Host": "www.udemy.com",
13181340
"DNT": "1",

cli.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -181,20 +181,27 @@ def create_scraping_thread(site: str):
181181
scraper, f"{code_name}_error"
182182
):
183183
current = getattr(scraper, f"{code_name}_progress")
184-
udemy.progress.update(task_id, completed=current, total=getattr(scraper, f"{code_name}_length"))
184+
udemy.progress.update(
185+
task_id,
186+
completed=current,
187+
total=getattr(scraper, f"{code_name}_length"),
188+
)
185189
time.sleep(0.1)
186-
187-
udemy.progress.update(task_id, completed=getattr(scraper, f"{code_name}_length"))
188-
logger.debug(f"Courses Found {code_name}: {len(getattr(scraper, f'{code_name}_data'))}")
189-
190+
191+
udemy.progress.update(
192+
task_id, completed=getattr(scraper, f"{code_name}_length")
193+
)
194+
logger.debug(
195+
f"Courses Found {code_name}: {len(getattr(scraper, f'{code_name}_data'))}"
196+
)
197+
190198
if getattr(scraper, f"{code_name}_error"):
191199
raise Exception(f"Error in: {site}")
192200
except Exception:
193201
error = getattr(scraper, f"{code_name}_error", traceback.format_exc())
194202
handle_error(f"Error in {site}", error=error, exit_program=True)
195203

196204

197-
198205
if __name__ == "__main__":
199206
try:
200207
logger.info("Starting CLI application")
@@ -288,7 +295,7 @@ def create_scraping_thread(site: str):
288295
udemy.scraped_data = scraper.get_scraped_courses(create_scraping_thread)
289296
total_courses = len(udemy.scraped_data)
290297
console.print(f"[green]Found {total_courses} courses to process[/green]")
291-
298+
292299
layout = create_layout()
293300
layout["header"].update(create_header())
294301
layout["footer"].update(create_footer())
@@ -306,6 +313,7 @@ def update_progress():
306313
)
307314
layout["main"]["stats"].update(create_stats_panel(udemy))
308315
live.update(layout)
316+
309317
udemy.update_progress = update_progress
310318

311319
try:

gui.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ def create_scraping_thread(site: str):
5555
if getattr(scraper, f"{code_name}_length") == -1:
5656

5757
raise Exception(f"Error in: {site}")
58-
5958
main_window[f"p{site}"].update(0, max=getattr(scraper, f"{code_name}_length"))
6059
while not getattr(scraper, f"{code_name}_done") and not getattr(
6160
scraper, f"{code_name}_error"
@@ -66,7 +65,9 @@ def create_scraping_thread(site: str):
6665
)
6766

6867
time.sleep(0.1)
69-
logger.info(f"Courses Found {code_name}: {len(getattr(scraper, f'{code_name}_data'))}")
68+
logger.info(
69+
f"Courses Found {code_name}: {len(getattr(scraper, f'{code_name}_data'))}"
70+
)
7071
if getattr(scraper, f"{code_name}_error"):
7172
raise Exception(f"Error in: {site}")
7273
except Exception:

0 commit comments

Comments
 (0)