freelawproject · Luis-manzur · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025
diff --git a/CHANGES.md b/CHANGES.md
@@ -17,6 +17,7 @@ The following changes are not yet released, but are code complete:
 Features:
 - Add error handling for scrapers with expected results #1447
 - Add a check to verify ACMS user data is loaded before querying attachment pages #1495
+- Adds the possibility to use working hours and rate limit in all scrapers #1496
 
 Changes:
 - Expanded ACMS URL matching to support both HTTP and HTTPS protocols.

diff --git a/juriscraper/AbstractSite.py b/juriscraper/AbstractSite.py
@@ -1,8 +1,10 @@
 import hashlib
 import json
+import sys
 from datetime import date, datetime, timedelta
 
 import certifi
+import pytz
 import requests
 
 from juriscraper.lib.date_utils import (
@@ -79,6 +81,14 @@ def __init__(self, cnt=None, **kwargs):
         # indicates whether the scraper should have results or not to raise an error
         self.should_have_results = False
 
+        # Default working hours, can be overridden by subclasses
+        self.working_hours = (0, 24)
+        # use print(pytz.all_timezones) to get a list of time zones
+        self.time_zone = "America/Los_Angeles"
+
+        # indicates the rate limit for the scraper, in seconds
+        self.rate_limit = 0
+
         # Sub-classed metadata
         self.court_id = None
         self.url = None
@@ -354,8 +364,21 @@ def _make_html_tree(self, text):
         """
         return get_html_parsed_text(text)
 
+    def is_within_working_hours(self) -> bool:
+        """
+        Checks if the current time is within the allowed working hours.
+
+        :return: True if within working hours, False otherwise.
+        """
+        now = datetime.now(pytz.timezone(self.time_zone)).time()
+        start, end = self.working_hours
+        return start <= now.hour < end
+
     def _download(self, request_dict=None):
         """Download the latest version of Site"""
+        if not self.is_within_working_hours():
+            raise sys.exit("Attempted to download outside of working hours.")
+
         if request_dict is None:
             request_dict = {}
         self.downloader_executed = True

diff --git a/sample_caller.py b/sample_caller.py
@@ -10,6 +10,7 @@
 from collections import defaultdict
 from datetime import datetime
 from optparse import OptionParser
+from time import sleep
 from urllib import parse
 
 import requests
@@ -177,6 +178,8 @@ def get_binary_content(download_url: str, site, exceptions) -> bytes:
 
     # Note that we do a GET even if site.method is POST. This is
     # deliberate.
+
+    sleep(site.rate_limit)
     r = s.get(
         download_url,
         verify=has_cipher,  # WA has a certificate we don't understand