diff --git a/METHODS.md b/METHODS.md index 23f7d25..beb8ef0 100644 --- a/METHODS.md +++ b/METHODS.md @@ -107,5 +107,21 @@ 102 | UnstoppableDomains | | | 103 | memory.lol | [memory_lol](https://github.com/soxoj/socid-extractor/search?q=test_memory_lol) | | 104 | Duolingo API | [duolingo_api](https://github.com/soxoj/socid-extractor/search?q=test_duolingo_api) | | +105 | TikTok | [tiktok_hydration](https://github.com/soxoj/socid-extractor/search?q=test_tiktok_hydration) | | +106 | TikTok (legacy SIGI_STATE) | | | +107 | Picsart API | [picsart_api](https://github.com/soxoj/socid-extractor/search?q=test_picsart_api) | | +108 | TwitchTracker | [twitchtracker](https://github.com/soxoj/socid-extractor/search?q=test_twitchtracker) | | +109 | Chess.com API | [chess_com](https://github.com/soxoj/socid-extractor/search?q=test_chess_com) | | +110 | Roblox user API | [roblox_user_api](https://github.com/soxoj/socid-extractor/search?q=test_roblox_user_api) | | +111 | Roblox username lookup API | [roblox_username_lookup](https://github.com/soxoj/socid-extractor/search?q=test_roblox_username_lookup) | | +112 | MyAnimeList profile | [myanimelist](https://github.com/soxoj/socid-extractor/search?q=test_myanimelist) | | +113 | XVideos profile | [xvideos](https://github.com/soxoj/socid-extractor/search?q=test_xvideos) | | +114 | lnk.bio | [lnk_bio](https://github.com/soxoj/socid-extractor/search?q=test_lnk_bio) | | +115 | Fandom MediaWiki API | [fandom_mediawiki](https://github.com/soxoj/socid-extractor/search?q=test_fandom_mediawiki) | | +116 | Substack public profile API | [substack](https://github.com/soxoj/socid-extractor/search?q=test_substack) | | +117 | hashnode GraphQL API | [hashnode](https://github.com/soxoj/socid-extractor/search?q=test_hashnode) | | +118 | Rarible API | [rarible](https://github.com/soxoj/socid-extractor/search?q=test_rarible) | | +119 | CSSBattle | [cssbattle](https://github.com/soxoj/socid-extractor/search?q=test_cssbattle) | | +120 | Max (max.ru) profile | [max_ru](https://github.com/soxoj/socid-extractor/search?q=test_max_ru) | | -The table has been updated at 2025-11-02 15:04:53.659694 UTC +The table has been updated at 2026-03-25 diff --git a/README.md b/README.md index d1a83c2..09aadbc 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,17 @@ The latest development version can be installed directly from GitHub: - Tumblr - TikTok - GitHub +- Chess.com +- Roblox +- MyAnimeList +- Substack +- Hashnode +- Fandom wikis (MediaWiki API) +- Rarible +- CSSBattle +- Max (max.ru) +- TwitchTracker +- lnk.bio ...and many others. diff --git a/socid_extractor/schemes.py b/socid_extractor/schemes.py index 7225278..5bf39ae 100644 --- a/socid_extractor/schemes.py +++ b/socid_extractor/schemes.py @@ -2130,5 +2130,84 @@ 'links': lambda x: x.get('links') or x.get('socialLinks'), }, }, + 'Fandom MediaWiki API': { + 'url_hints': ('fandom.com',), + 'flags': ['"batchcomplete"', '"query"', '"users"'], + 'regex': r'^(\{[\s\S]*\})$', + 'extract_json': True, + 'fields': { + 'uid': lambda x: x.get('query', {}).get('users', [{}])[0].get('userid'), + 'username': lambda x: x.get('query', {}).get('users', [{}])[0].get('name'), + }, + 'url_mutations': [{ + 'from': r'https?://(?P[^/]+)\.fandom\.com/wiki/User:(?P[^/?#]+)', + 'to': 'https://{wiki}.fandom.com/api.php?action=query&list=users&ususers={username}&format=json', + }], + }, + 'Substack public profile API': { + 'url_hints': ('substack.com',), + 'flags': ['"handle"', '"profile_set_up_at"'], + 'regex': r'^(\{[\s\S]*\})$', + 'extract_json': True, + 'fields': { + 'uid': lambda x: x.get('id'), + 'username': lambda x: x.get('handle'), + 'fullname': lambda x: x.get('name'), + 'bio': lambda x: x.get('bio'), + 'image': lambda x: x.get('photo_url'), + }, + 'url_mutations': [{ + 'from': r'https?://substack\.com/@(?P[^/?#]+)', + 'to': 'https://substack.com/api/v1/user/{username}/public_profile', + }], + }, + 'hashnode GraphQL API': { + 'url_hints': ('hashnode.com', 'gql.hashnode.com'), + 'flags': ['"data"', '"user"'], + 'regex': r'^(\{[\s\S]*\})$', + 'extract_json': True, + 'fields': { + 'username': lambda x: x.get('data', {}).get('user', {}).get('username') if x.get('data', {}).get('user') else None, + 'fullname': lambda x: x.get('data', {}).get('user', {}).get('name') if x.get('data', {}).get('user') else None, + }, + 'url_mutations': [{ + 'from': r'https?://hashnode\.com/@(?P[^/?#]+)', + 'to': 'https://gql.hashnode.com?query=%7Buser(username%3A%20%22{username}%22)%20%7B%20name%20username%20%7D%7D', + }], + }, + 'Rarible API': { + 'url_hints': ('rarible.com',), + 'flags': ['"createDate"', '"owner"', '"ref"'], + 'regex': r'^(\{[\s\S]*\})$', + 'extract_json': True, + 'fields': { + 'rarible_id': lambda x: x.get('id'), + 'rarible_owner': lambda x: x.get('owner'), + 'rarible_ref': lambda x: x.get('ref'), + 'rarible_type': lambda x: x.get('type'), + 'created_at': lambda x: x.get('createDate'), + }, + 'url_mutations': [{ + 'from': r'https?://rarible\.com/(?P[^/?#]+)$', + 'to': 'https://rarible.com/marketplace/api/v4/urls/{username}', + }], + }, + 'CSSBattle': { + 'url_hints': ('cssbattle.dev',), + 'flags': ['__NEXT_DATA__', 'cssbattle.dev'], + 'regex': r'', + 'extract_json': True, + 'fields': { + 'cssbattle_id': lambda x: x.get('props', {}).get('pageProps', {}).get('player', {}).get('id'), + 'cssbattle_username': lambda x: x.get('props', {}).get('pageProps', {}).get('player', {}).get('username'), + 'cssbattle_games_played': lambda x: x.get('props', {}).get('pageProps', {}).get('player', {}).get('gamesPlayed'), + 'cssbattle_score': lambda x: x.get('props', {}).get('pageProps', {}).get('player', {}).get('score'), + }, + }, + 'Max (max.ru) profile': { + 'url_hints': ('max.ru',), + 'flags': ['channel:{title:"'], + 'regex': r'channel:\{title:"(?P[^"]*)",description:"(?P[^"]*)",icon:"(?P[^"]*)",participantsCount:(?P\d+)\}', + }, } diff --git a/tests/test_socid_improvements.py b/tests/test_socid_improvements.py index fc9f9bc..6810911 100644 --- a/tests/test_socid_improvements.py +++ b/tests/test_socid_improvements.py @@ -270,3 +270,142 @@ def test_lnk_bio_next_data_fixture(): assert info.get('username') == 'fixture' assert info.get('fullname') == 'Fixture User' assert 'example.org' in info.get('links', '') + + +def test_fandom_mediawiki_api_json(): + """Fandom MediaWiki API: extract userid and canonical username from user query response.""" + body = json.dumps({ + "batchcomplete": "", + "query": { + "users": [ + {"userid": 22693, "name": "Red"} + ] + } + }) + info = extract(body) + assert info.get('uid') == '22693' + assert info.get('username') == 'Red' + + +def test_fandom_mediawiki_api_missing_user(): + """Fandom MediaWiki API: missing user has no userid — scheme should still match but yield empty uid.""" + body = json.dumps({ + "batchcomplete": "", + "query": { + "users": [ + {"name": "NonexistentUser12345", "missing": ""} + ] + } + }) + info = extract(body) + # missing user has no userid → uid should be absent or empty + assert info.get('username') == 'NonexistentUser12345' + assert not info.get('uid') + + +def test_substack_public_profile_api_json(): + """Substack public profile API: extract user fields from JSON response.""" + body = json.dumps({ + "id": 188506911, + "name": "Philip", + "handle": "user23", + "photo_url": "https://substack-post-media.s3.amazonaws.com/photo.jpg", + "bio": "Been Internettin' since 1997", + "profile_set_up_at": "2023-12-11T03:04:51.141Z", + }) + info = extract(body) + assert info.get('uid') == '188506911' + assert info.get('username') == 'user23' + assert info.get('fullname') == 'Philip' + assert info.get('bio') == "Been Internettin' since 1997" + assert 'substack-post-media' in info.get('image', '') + + +def test_hashnode_graphql_api_json(): + """hashnode GraphQL API: extract username and fullname from GraphQL response.""" + body = json.dumps({ + "data": { + "user": { + "name": "Melwin D'Almeida", + "username": "melwinalm" + } + } + }) + info = extract(body) + assert info.get('username') == 'melwinalm' + assert info.get('fullname') == "Melwin D'Almeida" + + +def test_hashnode_graphql_api_null_user(): + """hashnode GraphQL API: null user (unclaimed) should yield empty result.""" + body = json.dumps({ + "data": { + "user": None + } + }) + info = extract(body) + assert not info.get('username') + assert not info.get('fullname') + + +def test_rarible_api_json(): + """Rarible API: extract user ownership info from marketplace API response.""" + body = json.dumps({ + "createDate": "2020-07-21T15:18:51.758+00:00", + "id": "blue", + "owner": "0x0000000000000000000000000000000000000000", + "ref": "0x65d472172e4933aa4ddb995cf4ca8bef72a46576", + "type": "USER", + "version": 0, + }) + info = extract(body) + assert info.get('rarible_id') == 'blue' + assert info.get('rarible_owner') == '0x0000000000000000000000000000000000000000' + assert info.get('rarible_ref') == '0x65d472172e4933aa4ddb995cf4ca8bef72a46576' + assert info.get('rarible_type') == 'USER' + assert info.get('created_at') == '2020-07-21T15:18:51.758+00:00' + + +def test_cssbattle_next_data_fixture(): + """CSSBattle: extract player stats from __NEXT_DATA__ embedded JSON.""" + next_data = { + "props": { + "pageProps": { + "player": { + "id": "8wBrf63WLOOv8JuCeknfYk7t94B3", + "username": "beo", + "gamesPlayed": 55, + "score": 1234.56, + } + } + } + } + html = ( + 'CSSBattle' + '' + 'cssbattle.dev footer' + ) + info = extract(html) + assert info.get('cssbattle_id') == '8wBrf63WLOOv8JuCeknfYk7t94B3' + assert info.get('cssbattle_username') == 'beo' + assert info.get('cssbattle_games_played') == '55' + assert info.get('cssbattle_score') == '1234.56' + + +def test_max_ru_sveltekit_profile(): + """Max (max.ru): extract channel info from SvelteKit hydration JS object.""" + html = ( + '' + '' + '' + ) + info = extract(html) + assert info.get('max_title') == 'Ирина Волк' + assert info.get('max_description') == 'Канал генерал-лейтенанта' + assert 'oneme.ru' in info.get('max_icon', '') + assert info.get('max_participants_count') == '15599'