Skip to content

Commit 96d85f2

Browse files
committed
tags_stream : add author's name
1 parent 068cfd8 commit 96d85f2

File tree

2 files changed

+31
-3
lines changed

2 files changed

+31
-3
lines changed

source-twitter-fetcher/source_twitter_fetcher/schemas/tags_stream.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,18 @@
1414
"author_id": {
1515
"type": ["null", "string"]
1616
},
17+
"author_username": {
18+
"type": ["null", "string"],
19+
"description": "The Twitter handle/username of the tweet author (e.g., 'john_doe')"
20+
},
21+
"author_name": {
22+
"type": ["null", "string"],
23+
"description": "The display name of the tweet author (e.g., 'John Doe')"
24+
},
25+
"author_verified": {
26+
"type": ["null", "boolean"],
27+
"description": "Whether the tweet author is verified"
28+
},
1729
"conversation_id": {
1830
"type": ["null", "string"]
1931
},

source-twitter-fetcher/source_twitter_fetcher/tags_stream.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def request_params(
4545
params = {
4646
"query": tag,
4747
"tweet.fields": "text,public_metrics,author_id,referenced_tweets,created_at",
48+
"expansions": "author_id",
49+
"user.fields": "username,name,verified,public_metrics",
4850
"max_results": 100
4951
}
5052
params.update({"start_time": self.start_time.strftime("%Y-%m-%dT%H:%M:%SZ")})
@@ -59,10 +61,24 @@ def parse_response(
5961
**kwargs
6062
) -> Iterable[Mapping]:
6163
logger.debug("Full response %s", response.json())
62-
if 'data' in response.json():
63-
data = response.json()['data']
64+
response_data = response.json()
65+
66+
# Create a mapping of user_id to user info for quick lookup because ser data is returned separately in the includes.users array, you need to manually join them using the author_id as the key
67+
users_map = {}
68+
if 'includes' in response_data and 'users' in response_data['includes']:
69+
for user in response_data['includes']['users']:
70+
users_map[user['id']] = user
71+
72+
if 'data' in response_data:
73+
data = response_data['data']
6474
for t in data:
65-
# Add the tag that matched this tweet
6675
t["matched_tag"] = stream_slice["tag"]
76+
77+
if t.get('author_id') and t['author_id'] in users_map:
78+
user_info = users_map[t['author_id']]
79+
t["author_username"] = user_info.get('username')
80+
t["author_name"] = user_info.get('name')
81+
t["author_verified"] = user_info.get('verified')
82+
6783
yield t
6884
time.sleep(2) # Rate limiting protection

0 commit comments

Comments
 (0)