Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions socid_extractor/schemes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,36 @@
'is_verified': lambda x: x['confirmed'],
}
},
'Weibo API': {
'url_hints': ('weibo.com',),
'flags': ['"ok":1', '"data":{"user"'],
'regex': r'^(.+)$',
'extract_json': True,
'url_mutations': [
{
'from': r'https?://weibo.com/(?P<username>[^/u][^/]*)/?$',
'to': 'https://weibo.com/ajax/profile/info?custom={username}',
},
{
'from': r'https?://weibo.com/u/(?P<uid>\d+)/?$',
'to': 'https://weibo.com/ajax/profile/info?uid={uid}',
},
],
'fields': {
'weibo_id': lambda x: x['data']['user']['idstr'],
'username': lambda x: x['data']['user'].get('domain'),
'fullname': lambda x: x['data']['user']['screen_name'],
'bio': lambda x: x['data']['user'].get('description'),
'image': lambda x: x['data']['user'].get('avatar_hd'),
'gender': lambda x: x['data']['user'].get('gender'),
'location': lambda x: x['data']['user'].get('location'),
'verified': lambda x: x['data']['user'].get('verified'),
'verified_reason': lambda x: x['data']['user'].get('verified_reason'),
'follower_count': lambda x: x['data']['user'].get('followers_count'),
'following_count': lambda x: x['data']['user'].get('friends_count'),
'statuses_count': lambda x: x['data']['user'].get('statuses_count'),
}
},
# TODO
'Weibo': {
'url_hints': ('weibo.com',),
Expand Down
34 changes: 34 additions & 0 deletions tests/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ def test_blogger():
assert info.get('blog_id') == '9057808199412143402'


@pytest.mark.github_failed
def test_d3():
"""D3.ru"""
info = extract(parse('https://d3.ru/user/nomad62')[0])
Expand Down Expand Up @@ -1192,6 +1193,39 @@ def test_weibo(): # Broken. Parser not handling redirect
assert info.get("fullname") == "郭靜Claire"


@pytest.mark.github_failed
def test_weibo_api():
"""Weibo API"""
URL = 'https://weibo.com/clairekuo'
mutated = mutate_url(URL)
assert len(mutated) >= 1
url, add_headers = mutated[0]

info = extract(parse(url, headers=add_headers, timeout=10)[0])

assert info.get('weibo_id') == '1733299783'
assert info.get('username') == 'clairekuo'
assert info.get('fullname') == '郭靜Claire'
assert info.get('gender') == 'f'
assert 'follower_count' in info
assert 'following_count' in info


@pytest.mark.github_failed
def test_weibo_api_by_id():
"""Weibo API by user ID"""
URL = 'https://weibo.com/u/6215884155'
mutated = mutate_url(URL)
assert len(mutated) >= 1
url, add_headers = mutated[0]

info = extract(parse(url, headers=add_headers, timeout=10)[0])

assert info.get('weibo_id') == '6215884155'
assert 'fullname' in info
assert 'follower_count' in info


@pytest.mark.skip(reason="broken forever")
def test_icq():
info = extract(parse('https://icq.im/CaZaNoVa163')[0])
Expand Down
56 changes: 56 additions & 0 deletions tests/test_socid_improvements.py
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,62 @@ def test_lesswrong_graphql_null_user():
assert not info.get('fullname')


def test_weibo_api_extracts_profile_fields():
"""Verifies the Weibo API scheme extracts user profile fields from JSON response."""
body = json.dumps({
"ok": 1,
"data": {
"user": {
"id": 1733299783,
"idstr": "1733299783",
"screen_name": "郭靜Claire",
"profile_image_url": "https://tvax2.sinaimg.cn/crop.0.0.1080.1080.50/67500e47ly8hape352btfj20u00u0mz1.jpg",
"profile_url": "/u/1733299783",
"verified": True,
"verified_type": 0,
"domain": "clairekuo",
"avatar_large": "https://tvax2.sinaimg.cn/crop.0.0.1080.1080.180/67500e47ly8hape352btfj20u00u0mz1.jpg",
"avatar_hd": "https://tvax2.sinaimg.cn/crop.0.0.1080.1080.1024/67500e47ly8hape352btfj20u00u0mz1.jpg",
"verified_reason": "台湾女歌手",
"description": "歌手郭静",
"location": "台湾 台北市",
"gender": "f",
"followers_count": 3126727,
"friends_count": 217,
"statuses_count": 2248,
}
}
}, separators=(',', ':'))
info = extract(body)
assert info.get('weibo_id') == '1733299783'
assert info.get('username') == 'clairekuo'
assert info.get('fullname') == '郭靜Claire'
assert info.get('bio') == '歌手郭静'
assert info.get('image') == 'https://tvax2.sinaimg.cn/crop.0.0.1080.1080.1024/67500e47ly8hape352btfj20u00u0mz1.jpg'
assert info.get('gender') == 'f'
assert info.get('location') == '台湾 台北市'
assert info.get('verified') == 'True'
assert info.get('verified_reason') == '台湾女歌手'
assert info.get('follower_count') == '3126727'
assert info.get('following_count') == '217'
assert info.get('statuses_count') == '2248'


def test_weibo_api_url_mutations():
"""Verifies Weibo API url_mutations convert profile URLs to API endpoints."""
from socid_extractor.main import mutate_url

# Username URL pattern
results = mutate_url('https://weibo.com/clairekuo')
urls = [r[0] for r in results]
assert 'https://weibo.com/ajax/profile/info?custom=clairekuo' in urls

# User ID URL pattern
results = mutate_url('https://weibo.com/u/6215884155')
urls = [r[0] for r in results]
assert 'https://weibo.com/ajax/profile/info?uid=6215884155' in urls


def test_picsart_facebook_uid_from_image():
"""Picsart: extract facebook_uid from graph.facebook.com avatar URL."""
body = json.dumps({
Expand Down
Loading