tags_stream : add author's name

c200bzh · c200bzh · commit 96d85f2c179c · 2025-06-20T13:40:11.000+01:00
diff --git a/source-twitter-fetcher/source_twitter_fetcher/schemas/tags_stream.json b/source-twitter-fetcher/source_twitter_fetcher/schemas/tags_stream.json
@@ -14,6 +14,18 @@
     "author_id": {
       "type": ["null", "string"]
     },
+    "author_username": {
+      "type": ["null", "string"],
+      "description": "The Twitter handle/username of the tweet author (e.g., 'john_doe')"
+    },
+    "author_name": {
+      "type": ["null", "string"],
+      "description": "The display name of the tweet author (e.g., 'John Doe')"
+    },
+    "author_verified": {
+      "type": ["null", "boolean"],
+      "description": "Whether the tweet author is verified"
+    },
     "conversation_id": {
       "type": ["null", "string"]
     },
diff --git a/source-twitter-fetcher/source_twitter_fetcher/tags_stream.py b/source-twitter-fetcher/source_twitter_fetcher/tags_stream.py
@@ -45,6 +45,8 @@ def request_params(
         params = {
             "query": tag,
             "tweet.fields": "text,public_metrics,author_id,referenced_tweets,created_at",
+            "expansions": "author_id",
+            "user.fields": "username,name,verified,public_metrics",
             "max_results": 100
         }
         params.update({"start_time": self.start_time.strftime("%Y-%m-%dT%H:%M:%SZ")})
@@ -59,10 +61,24 @@ def parse_response(
         **kwargs
     ) -> Iterable[Mapping]:
         logger.debug("Full response %s", response.json())
-        if 'data' in response.json():
-            data = response.json()['data']
+        response_data = response.json()
+        
+        # Create a mapping of user_id to user info for quick lookup because ser data is returned separately in the includes.users array, you need to manually join them using the author_id as the key
+        users_map = {}
+        if 'includes' in response_data and 'users' in response_data['includes']:
+            for user in response_data['includes']['users']:
+                users_map[user['id']] = user
+        
+        if 'data' in response_data:
+            data = response_data['data']
             for t in data:
-                # Add the tag that matched this tweet
                 t["matched_tag"] = stream_slice["tag"]
+                
+                if t.get('author_id') and t['author_id'] in users_map:
+                    user_info = users_map[t['author_id']]
+                    t["author_username"] = user_info.get('username')
+                    t["author_name"] = user_info.get('name')
+                    t["author_verified"] = user_info.get('verified')
+                
                 yield t
         time.sleep(2)  # Rate limiting protection