Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion METHODS.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,5 +107,21 @@
102 | UnstoppableDomains | | |
103 | memory.lol | [memory_lol](https://github.com/soxoj/socid-extractor/search?q=test_memory_lol) | |
104 | Duolingo API | [duolingo_api](https://github.com/soxoj/socid-extractor/search?q=test_duolingo_api) | |
105 | TikTok | [tiktok_hydration](https://github.com/soxoj/socid-extractor/search?q=test_tiktok_hydration) | |
106 | TikTok (legacy SIGI_STATE) | | |
107 | Picsart API | [picsart_api](https://github.com/soxoj/socid-extractor/search?q=test_picsart_api) | |
108 | TwitchTracker | [twitchtracker](https://github.com/soxoj/socid-extractor/search?q=test_twitchtracker) | |
109 | Chess.com API | [chess_com](https://github.com/soxoj/socid-extractor/search?q=test_chess_com) | |
110 | Roblox user API | [roblox_user_api](https://github.com/soxoj/socid-extractor/search?q=test_roblox_user_api) | |
111 | Roblox username lookup API | [roblox_username_lookup](https://github.com/soxoj/socid-extractor/search?q=test_roblox_username_lookup) | |
112 | MyAnimeList profile | [myanimelist](https://github.com/soxoj/socid-extractor/search?q=test_myanimelist) | |
113 | XVideos profile | [xvideos](https://github.com/soxoj/socid-extractor/search?q=test_xvideos) | |
114 | lnk.bio | [lnk_bio](https://github.com/soxoj/socid-extractor/search?q=test_lnk_bio) | |
115 | Fandom MediaWiki API | [fandom_mediawiki](https://github.com/soxoj/socid-extractor/search?q=test_fandom_mediawiki) | |
116 | Substack public profile API | [substack](https://github.com/soxoj/socid-extractor/search?q=test_substack) | |
117 | hashnode GraphQL API | [hashnode](https://github.com/soxoj/socid-extractor/search?q=test_hashnode) | |
118 | Rarible API | [rarible](https://github.com/soxoj/socid-extractor/search?q=test_rarible) | |
119 | CSSBattle | [cssbattle](https://github.com/soxoj/socid-extractor/search?q=test_cssbattle) | |
120 | Max (max.ru) profile | [max_ru](https://github.com/soxoj/socid-extractor/search?q=test_max_ru) | |

The table has been updated at 2025-11-02 15:04:53.659694 UTC
The table has been updated at 2026-03-25
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,17 @@ The latest development version can be installed directly from GitHub:
- Tumblr
- TikTok
- GitHub
- Chess.com
- Roblox
- MyAnimeList
- Substack
- Hashnode
- Fandom wikis (MediaWiki API)
- Rarible
- CSSBattle
- Max (max.ru)
- TwitchTracker
- lnk.bio

...and many others.

Expand Down
79 changes: 79 additions & 0 deletions socid_extractor/schemes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2130,5 +2130,84 @@
'links': lambda x: x.get('links') or x.get('socialLinks'),
},
},
'Fandom MediaWiki API': {
'url_hints': ('fandom.com',),
'flags': ['"batchcomplete"', '"query"', '"users"'],
'regex': r'^(\{[\s\S]*\})$',
'extract_json': True,
'fields': {
'uid': lambda x: x.get('query', {}).get('users', [{}])[0].get('userid'),
'username': lambda x: x.get('query', {}).get('users', [{}])[0].get('name'),
},
'url_mutations': [{
'from': r'https?://(?P<wiki>[^/]+)\.fandom\.com/wiki/User:(?P<username>[^/?#]+)',
'to': 'https://{wiki}.fandom.com/api.php?action=query&list=users&ususers={username}&format=json',
}],
},
'Substack public profile API': {
'url_hints': ('substack.com',),
'flags': ['"handle"', '"profile_set_up_at"'],
'regex': r'^(\{[\s\S]*\})$',
'extract_json': True,
'fields': {
'uid': lambda x: x.get('id'),
'username': lambda x: x.get('handle'),
'fullname': lambda x: x.get('name'),
'bio': lambda x: x.get('bio'),
'image': lambda x: x.get('photo_url'),
},
'url_mutations': [{
'from': r'https?://substack\.com/@(?P<username>[^/?#]+)',
'to': 'https://substack.com/api/v1/user/{username}/public_profile',
}],
},
'hashnode GraphQL API': {
'url_hints': ('hashnode.com', 'gql.hashnode.com'),
'flags': ['"data"', '"user"'],
'regex': r'^(\{[\s\S]*\})$',
'extract_json': True,
'fields': {
'username': lambda x: x.get('data', {}).get('user', {}).get('username') if x.get('data', {}).get('user') else None,
'fullname': lambda x: x.get('data', {}).get('user', {}).get('name') if x.get('data', {}).get('user') else None,
},
'url_mutations': [{
'from': r'https?://hashnode\.com/@(?P<username>[^/?#]+)',
'to': 'https://gql.hashnode.com?query=%7Buser(username%3A%20%22{username}%22)%20%7B%20name%20username%20%7D%7D',
}],
},
'Rarible API': {
'url_hints': ('rarible.com',),
'flags': ['"createDate"', '"owner"', '"ref"'],
'regex': r'^(\{[\s\S]*\})$',
'extract_json': True,
'fields': {
'rarible_id': lambda x: x.get('id'),
'rarible_owner': lambda x: x.get('owner'),
'rarible_ref': lambda x: x.get('ref'),
'rarible_type': lambda x: x.get('type'),
'created_at': lambda x: x.get('createDate'),
},
'url_mutations': [{
'from': r'https?://rarible\.com/(?P<username>[^/?#]+)$',
'to': 'https://rarible.com/marketplace/api/v4/urls/{username}',
}],
},
'CSSBattle': {
'url_hints': ('cssbattle.dev',),
'flags': ['__NEXT_DATA__', 'cssbattle.dev'],
'regex': r'<script id="__NEXT_DATA__" type="application/json">([\s\S]+?)</script>',
'extract_json': True,
'fields': {
'cssbattle_id': lambda x: x.get('props', {}).get('pageProps', {}).get('player', {}).get('id'),
'cssbattle_username': lambda x: x.get('props', {}).get('pageProps', {}).get('player', {}).get('username'),
'cssbattle_games_played': lambda x: x.get('props', {}).get('pageProps', {}).get('player', {}).get('gamesPlayed'),
'cssbattle_score': lambda x: x.get('props', {}).get('pageProps', {}).get('player', {}).get('score'),
},
},
'Max (max.ru) profile': {
'url_hints': ('max.ru',),
'flags': ['channel:{title:"'],
'regex': r'channel:\{title:"(?P<max_title>[^"]*)",description:"(?P<max_description>[^"]*)",icon:"(?P<max_icon>[^"]*)",participantsCount:(?P<max_participants_count>\d+)\}',
},
}

139 changes: 139 additions & 0 deletions tests/test_socid_improvements.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,142 @@ def test_lnk_bio_next_data_fixture():
assert info.get('username') == 'fixture'
assert info.get('fullname') == 'Fixture User'
assert 'example.org' in info.get('links', '')


def test_fandom_mediawiki_api_json():
"""Fandom MediaWiki API: extract userid and canonical username from user query response."""
body = json.dumps({
"batchcomplete": "",
"query": {
"users": [
{"userid": 22693, "name": "Red"}
]
}
})
info = extract(body)
assert info.get('uid') == '22693'
assert info.get('username') == 'Red'


def test_fandom_mediawiki_api_missing_user():
"""Fandom MediaWiki API: missing user has no userid — scheme should still match but yield empty uid."""
body = json.dumps({
"batchcomplete": "",
"query": {
"users": [
{"name": "NonexistentUser12345", "missing": ""}
]
}
})
info = extract(body)
# missing user has no userid → uid should be absent or empty
assert info.get('username') == 'NonexistentUser12345'
assert not info.get('uid')


def test_substack_public_profile_api_json():
"""Substack public profile API: extract user fields from JSON response."""
body = json.dumps({
"id": 188506911,
"name": "Philip",
"handle": "user23",
"photo_url": "https://substack-post-media.s3.amazonaws.com/photo.jpg",
"bio": "Been Internettin' since 1997",
"profile_set_up_at": "2023-12-11T03:04:51.141Z",
})
info = extract(body)
assert info.get('uid') == '188506911'
assert info.get('username') == 'user23'
assert info.get('fullname') == 'Philip'
assert info.get('bio') == "Been Internettin' since 1997"
assert 'substack-post-media' in info.get('image', '')


def test_hashnode_graphql_api_json():
"""hashnode GraphQL API: extract username and fullname from GraphQL response."""
body = json.dumps({
"data": {
"user": {
"name": "Melwin D'Almeida",
"username": "melwinalm"
}
}
})
info = extract(body)
assert info.get('username') == 'melwinalm'
assert info.get('fullname') == "Melwin D'Almeida"


def test_hashnode_graphql_api_null_user():
"""hashnode GraphQL API: null user (unclaimed) should yield empty result."""
body = json.dumps({
"data": {
"user": None
}
})
info = extract(body)
assert not info.get('username')
assert not info.get('fullname')


def test_rarible_api_json():
"""Rarible API: extract user ownership info from marketplace API response."""
body = json.dumps({
"createDate": "2020-07-21T15:18:51.758+00:00",
"id": "blue",
"owner": "0x0000000000000000000000000000000000000000",
"ref": "0x65d472172e4933aa4ddb995cf4ca8bef72a46576",
"type": "USER",
"version": 0,
})
info = extract(body)
assert info.get('rarible_id') == 'blue'
assert info.get('rarible_owner') == '0x0000000000000000000000000000000000000000'
assert info.get('rarible_ref') == '0x65d472172e4933aa4ddb995cf4ca8bef72a46576'
assert info.get('rarible_type') == 'USER'
assert info.get('created_at') == '2020-07-21T15:18:51.758+00:00'


def test_cssbattle_next_data_fixture():
"""CSSBattle: extract player stats from __NEXT_DATA__ embedded JSON."""
next_data = {
"props": {
"pageProps": {
"player": {
"id": "8wBrf63WLOOv8JuCeknfYk7t94B3",
"username": "beo",
"gamesPlayed": 55,
"score": 1234.56,
}
}
}
}
html = (
'<!DOCTYPE html><html><head><title>CSSBattle</title></head><body>'
'<link rel="canonical" href="https://cssbattle.dev/player/beo" />'
'<script id="__NEXT_DATA__" type="application/json">'
+ json.dumps(next_data)
+ '</script>cssbattle.dev footer</body></html>'
)
info = extract(html)
assert info.get('cssbattle_id') == '8wBrf63WLOOv8JuCeknfYk7t94B3'
assert info.get('cssbattle_username') == 'beo'
assert info.get('cssbattle_games_played') == '55'
assert info.get('cssbattle_score') == '1234.56'


def test_max_ru_sveltekit_profile():
"""Max (max.ru): extract channel info from SvelteKit hydration JS object."""
html = (
'<!DOCTYPE html><html><head></head><body>'
'<script>__sveltekit_start({data:[null,{type:"data",data:'
'{channel:{title:"Ирина Волк",description:"Канал генерал-лейтенанта",'
'icon:"https://i.oneme.ru/i?r=abc123",participantsCount:15599}}'
',uses:{url:1}},null]})</script>'
'</body></html>'
)
info = extract(html)
assert info.get('max_title') == 'Ирина Волк'
assert info.get('max_description') == 'Канал генерал-лейтенанта'
assert 'oneme.ru' in info.get('max_icon', '')
assert info.get('max_participants_count') == '15599'
Loading