-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreddit_comment_scraper.py
More file actions
64 lines (50 loc) · 2.11 KB
/
reddit_comment_scraper.py
File metadata and controls
64 lines (50 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import praw
import logging
from datetime import datetime
import pandas as pd
import emoji
import time
from prawcore.exceptions import RequestException, ResponseException
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
client_id = ''
client_secret = ''
user_agent = ''
try:
reddit = praw.Reddit(client_id=client_id,
client_secret=client_secret,
user_agent=user_agent)
logger.info("Authenticated as: %s", reddit.user.me())
#subredditnamehere
subreddit = reddit.subreddit('')
data = []
for post in subreddit.top(limit=100000):
while True:
try:
post.comments.replace_more(limit=20)
for comment in post.comments.list():
if emoji.emoji_count(comment.body) > 0:
comment_data = {
'post_id': post.id,
'comment_body': comment.body,
'comment_created_utc': datetime.utcfromtimestamp(comment.created_utc).strftime('%Y-%m-%d %H:%M:%S'),
}
data.append(comment_data)
print(comment_data['comment_body'])
df = pd.DataFrame(data)
df.to_csv('reddit_comments_with_emojis50.csv', index=False)
logger.info(f"Data saved to reddit_comments_with_emojis10.csv after processing post {post.id}")
break
except (RequestException, ResponseException) as e:
if 'RATELIMIT' in str(e):
wait_time = int(str(e).split(' ')[-2])
logger.info(f"Rate limit hit. Waiting for {wait_time} seconds.")
time.sleep(wait_time)
else:
logger.error(f"RequestException or ResponseException: {e}")
time.sleep(10)
logger.info("Scraping completed and data saved to reddit_comments_with_emojis9.csv")
except praw.exceptions.PRAWException as e:
logger.error("PRAWException: %s", e)
except Exception as e:
logger.error("Exception: %s", e)