soxoj · soxoj · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/socid_extractor/schemes.py b/socid_extractor/schemes.py
@@ -1239,18 +1239,58 @@
     # unactual
     'Vimeo HTML': {
         'url_hints': ('vimeo.com', 'vimeocdn.com'),
-        'flags': ['https://i.vimeocdn.com/favicon/main-touch'],
-        'regex': r'"app_config":({"user":.+?})},\"coach_notes',
-        'extract_json': True,
-        'fields': {
-            'uid': lambda x: x['user']['id'],
-            'name': lambda x: x['user']['display_name'],
-            'username': lambda x: x['user']['name'],
-            'location': lambda x: x['user']['location'],
-            'created_at': lambda x: x['user']['join_date']['raw'],
-            'account_type': lambda x: x['user']['account_type'],
-            'is_staff': lambda x: x['user']['is_staff'],
-            'links': lambda x: [a['url'] for a in x['user']['links']],
+        'flags': ['ProfilePage', 'vimeo://app.vimeo.com/users/', 'vimeocdn.com'],
+        'regex': r'<script type="application/ld\+json">\s*(\[\{[\s\S]*?\}\])\s*</script>',
+        'extract_json': True,
+        'transforms': [
+            json.loads,
+            lambda x: x[0] if isinstance(x, list) else x,
+            json.dumps,
+        ],
+        'fields': {
+            'uid': lambda x: x['mainEntity'].get('identifier'),
+            'username': lambda x: x['mainEntity'].get('alternateName'),
+            'fullname': lambda x: x['mainEntity'].get('name'),
+            'bio': lambda x: html.unescape(x['mainEntity'].get('description', '') or '') or None,
+            'image': lambda x: x['mainEntity'].get('image') or None,
+            'created_at': lambda x: x.get('dateCreated'),
+            'updated_at': lambda x: x.get('dateModified'),
+            'follower_count': lambda x: (x['mainEntity'].get('interactionStatistic') or {}).get('userInteractionCount'),
+            'videos_count': lambda x: (x['mainEntity'].get('agentInteractionStatistic') or {}).get('userInteractionCount'),
+            'links': lambda x: ', '.join(
+                link for link in (x['mainEntity'].get('sameAs') or [])
+                if not link.startswith('https://vimeo.com/')
+            ) or None,
+            'twitter_url': lambda x: next(
+                (link for link in (x['mainEntity'].get('sameAs') or [])
+                 if 'twitter.com' in link or 'x.com' in link),
+                None,
+            ),
+            'instagram_url': lambda x: next(
+                (link for link in (x['mainEntity'].get('sameAs') or [])
+                 if 'instagram.com' in link),
+                None,
+            ),
+            'facebook_url': lambda x: next(
+                (link for link in (x['mainEntity'].get('sameAs') or [])
+                 if 'facebook.com' in link),
+                None,
+            ),
+            'youtube_url': lambda x: next(
+                (link for link in (x['mainEntity'].get('sameAs') or [])
+                 if 'youtube.com' in link),
+                None,
+            ),
+            'tiktok_url': lambda x: next(
+                (link for link in (x['mainEntity'].get('sameAs') or [])
+                 if 'tiktok.com' in link),
+                None,
+            ),
+            'linkedin_url': lambda x: next(
+                (link for link in (x['mainEntity'].get('sameAs') or [])
+                 if 'linkedin.com' in link),
+                None,
+            ),
         }
     },
     'Vimeo GraphQL API': {

diff --git a/tests/test_e2e.py b/tests/test_e2e.py
@@ -656,18 +656,29 @@ def test_eyeem():
     assert info.get('facebook_uid') == '1610716256'
 
 
-@pytest.mark.skip(reason="Broken, now API only: https://api.vimeo.com/users/alexaimephotography")
-def test_vimeo():
-    info = extract(parse('https://vimeo.com/alexaimephotography')[0])
-
-    assert info.get('uid') == '75857717'
-    assert info.get('name') == 'AlexAimePhotography'
-    assert info.get('username') == 'alexaimephotography'
-    assert info.get('location') == 'France'
-    assert info.get('created_at') == '2017-12-06 06:49:28'
-    assert info.get('is_staff') == 'False'
-    assert info.get(
-        'links') == "['https://500px.com/alexaimephotography', 'https://www.flickr.com/photos/photoambiance/', 'https://www.instagram.com/alexaimephotography/', 'https://www.youtube.com/channel/UC4NiYV3Yqih2WHcwKg4uPuQ', 'https://flii.by/alexaimephotography/']"
+def test_vimeo_html_e2e():
+    """Vimeo HTML"""
+    info = extract(parse('https://vimeo.com/staff')[0])
+
+    assert info.get('uid') == '152184'
+    assert info.get('username') == 'staff'
+    assert info.get('fullname') == 'Vimeo'
+    assert 'innovative video experience platform' in info.get('bio', '')
+    assert 'i.vimeocdn.com/portrait/' in info.get('image', '')
+    assert info.get('created_at') == '2007-01-18T16:40:11Z'
+    assert info.get('updated_at')
+    assert int(info.get('follower_count', 0)) >= 30000
+    assert int(info.get('videos_count', 0)) >= 2000
+    # All 6 social crosslinks
+    assert info.get('twitter_url') == 'https://twitter.com/vimeo'
+    assert info.get('instagram_url') == 'https://www.instagram.com/vimeo/'
+    assert info.get('facebook_url') == 'https://www.facebook.com/Vimeo/'
+    assert info.get('youtube_url') == 'https://www.youtube.com/@vimeo/'
+    assert info.get('linkedin_url') == 'https://www.linkedin.com/company/vimeo/'
+    assert info.get('tiktok_url') == 'https://www.tiktok.com/@vimeo?lang=en'
+    # Combined `links` field excludes self vimeo.com URL
+    assert 'vimeo.com/staff' not in info.get('links', '')
+    assert 'twitter.com/vimeo' in info.get('links', '')
 
 
 @pytest.mark.skip(reason="broken")
@@ -843,17 +854,23 @@ def test_tiktok_hydration_e2e():
     assert 'fullname' in info
 
 
+@pytest.mark.github_failed
 def test_picsart_api_e2e():
     """
     Picsart API
     """
-    info = extract(parse('https://api.picsart.com/users/show/adam.json', timeout=15)[0])
+    URL = 'https://www.picsart.com/u/adam'
+    mutated = mutate_url(URL)
+    assert len(mutated) >= 1
+    url, add_headers = mutated[0]
+
+    info = extract(parse(url, headers=add_headers, timeout=15)[0])
 
     assert info.get('picsart_username') == 'adam'
-    assert info.get('fullname') == 'Adam'
-    assert info.get('picsart_id') == '184924161000102'
-    assert info.get('is_verified') == 'False'
-    assert int(info.get('follower_count')) >= 0
+    assert info.get('fullname') is not None
+    assert info.get('picsart_id') is not None
+    assert info.get('is_verified') in ('True', 'False')
+    assert 'follower_count' in info
 
 
 def test_imgur_api_e2e():

diff --git a/tests/test_socid_improvements.py b/tests/test_socid_improvements.py
@@ -1870,3 +1870,102 @@ def test_hive_blog_empty_profile():
     assert info.get('bio') is None
     assert info.get('image') is None
     assert info.get('website') is None
+
+
+def test_vimeo_html_ld_json():
+    """Vimeo HTML: extract profile from ld+json ProfilePage with social crosslinks."""
+    ld_json = json.dumps([{
+        "dateCreated": "2006-12-11T19:57:24Z",
+        "dateModified": "2022-08-01T02:48:55Z",
+        "url": "https://vimeo.com/testuser",
+        "mainEntity": {
+            "@type": "Person",
+            "name": "Test User",
+            "identifier": 12345,
+            "alternateName": "testuser",
+            "interactionStatistic": {
+                "@type": "InteractionCounter",
+                "interactionType": "https://schema.org/FollowAction",
+                "userInteractionCount": 1621,
+            },
+            "agentInteractionStatistic": {
+                "@type": "InteractionCounter",
+                "interactionType": "https://schema.org/WriteAction",
+                "userInteractionCount": 519,
+            },
+            "description": "Bio with &#039;quotes&#039; and entities",
+            "image": "https://i.vimeocdn.com/portrait/12345_640x640",
+            "url": "/testuser",
+            "sameAs": [
+                "https://vimeo.com/testuser",
+                "http://testuser.example.com",
+                "http://twitter.com/testuser",
+                "https://www.instagram.com/testuser/",
+                "https://www.facebook.com/testuser/",
+                "https://www.youtube.com/@testuser/",
+                "https://www.tiktok.com/@testuser",
+                "https://www.linkedin.com/in/testuser/",
+            ],
+        },
+        "potentialAction": {
+            "@type": "ViewAction",
+            "target": "vimeo://app.vimeo.com/users/12345",
+        },
+        "@type": "ProfilePage",
+        "@context": "http://schema.org",
+    }])
+    html_page = (
+        '<!DOCTYPE html><html><head>'
+        f'<script type="application/ld+json">{ld_json}</script>'
+        '</head><body></body></html>'
+    )
+    info = extract(html_page)
+    assert info.get('uid') == '12345'
+    assert info.get('username') == 'testuser'
+    assert info.get('fullname') == 'Test User'
+    assert info.get('bio') == "Bio with 'quotes' and entities"
+    assert 'vimeocdn.com' in info.get('image', '')
+    assert info.get('created_at') == '2006-12-11T19:57:24Z'
+    assert info.get('updated_at') == '2022-08-01T02:48:55Z'
+    assert info.get('follower_count') == '1621'
+    assert info.get('videos_count') == '519'
+    assert info.get('twitter_url') == 'http://twitter.com/testuser'
+    assert info.get('instagram_url') == 'https://www.instagram.com/testuser/'
+    assert info.get('facebook_url') == 'https://www.facebook.com/testuser/'
+    assert info.get('youtube_url') == 'https://www.youtube.com/@testuser/'
+    assert info.get('tiktok_url') == 'https://www.tiktok.com/@testuser'
+    assert info.get('linkedin_url') == 'https://www.linkedin.com/in/testuser/'
+    # vimeo.com self-link should be excluded from `links`
+    assert 'vimeo.com/testuser' not in info.get('links', '')
+    assert 'testuser.example.com' in info.get('links', '')
+
+
+def test_vimeo_html_minimal_profile():
+    """Vimeo HTML: profile with no external links and no description."""
+    ld_json = json.dumps([{
+        "dateCreated": "2020-01-01T00:00:00Z",
+        "dateModified": "2020-01-01T00:00:00Z",
+        "url": "https://vimeo.com/minimal",
+        "mainEntity": {
+            "@type": "Person",
+            "name": "Minimal",
+            "identifier": 99999,
+            "alternateName": "minimal",
+            "image": "https://i.vimeocdn.com/portrait/default",
+            "sameAs": ["https://vimeo.com/minimal"],
+        },
+        "potentialAction": {"target": "vimeo://app.vimeo.com/users/99999"},
+        "@type": "ProfilePage",
+    }])
+    html_page = (
+        '<!DOCTYPE html><html><head>'
+        f'<script type="application/ld+json">{ld_json}</script>'
+        '</head><body></body></html>'
+    )
+    info = extract(html_page)
+    assert info.get('uid') == '99999'
+    assert info.get('username') == 'minimal'
+    assert info.get('fullname') == 'Minimal'
+    assert info.get('bio') is None
+    assert info.get('twitter_url') is None
+    assert info.get('links') is None