-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinstagram_post_scraper.py
More file actions
77 lines (59 loc) · 2.74 KB
/
instagram_post_scraper.py
File metadata and controls
77 lines (59 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""
Instagram Post Scraper
Extract structured JSON data from any public Instagram post using the
Crawlbase 'instagram-post' scraper — likes, comments, captions, media, and more.
Blog reference: https://crawlbase.com/blog/how-to-scrape-instagram-data-using-python/
"""
from crawlbase import CrawlingAPI
import json
# Set your Crawlbase token
crawlbase_token = 'YOUR_CRAWLBASE_TOKEN'
# URL of the Instagram post to scrape
instagram_post_url = 'https://www.instagram.com/p/B5LQhLiFFCX'
# Options for Crawling API — use the instagram-post scraper
options = {
'scraper': 'instagram-post',
}
# Create a Crawlbase API instance with your token
api = CrawlingAPI({'token': crawlbase_token})
try:
# Send a GET request to crawl the URL with options
response = api.get(instagram_post_url, options=options)
# Check if the response status code is 200 (OK)
if response.get('statusCode', 0) == 200:
# Parse the JSON response
response_body_json = response.get('body', {})
data = json.loads(response_body_json) if isinstance(response_body_json, str) else response_body_json
# Display key post information
print("=== Post Details ===")
posted_by = data.get('postedBy', {})
print(f"Posted by : @{posted_by.get('accountUserName', 'N/A')}")
print(f"Profile : {posted_by.get('accountLink', 'N/A')}")
location = data.get('postLocation', {})
if location:
print(f"Location : {location.get('locationName', 'N/A')}")
caption = data.get('caption', {})
print(f"\nCaption : {caption.get('text', 'N/A')}")
tags = caption.get('tags', [])
if tags:
hashtags = [t.get('hashtag') for t in tags if t.get('hashtag')]
print(f"Hashtags : {', '.join(hashtags)}")
print(f"\nLikes : {data.get('likesCount', 0):,}")
print(f"Comments : {data.get('repliesCount', 0):,}")
print(f"Date : {data.get('dateTime', 'N/A')}")
media = data.get('media', {})
print(f"\nImages : {len(media.get('images', []))}")
print(f"Videos : {len(media.get('videos', []))}")
replies = data.get('replies', [])
if replies:
print(f"\n=== Top Comments ({len(replies)} total) ===")
for reply in replies[:3]:
print(f" @{reply.get('accountUserName')}: {reply.get('text', '')[:80]}")
# Save full data to JSON file
with open('post_data.json', 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print("\n✓ Full post data saved to post_data.json")
else:
print(f"Request failed with status code: {response.get('statusCode', 0)}")
except Exception as e:
print(f"API request error: {str(e)}")