Skip to content

Commit

Permalink
Tests fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
soxoj committed Dec 9, 2021
1 parent a155487 commit 1e533cf
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 10 deletions.
33 changes: 29 additions & 4 deletions socid_extractor/schemes.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,14 @@
'following_count': lambda x: x.get('stats', {}).get('subscribersCount'),
}
},
# TODO: rework
'Yandex Market user profile': {
'flags': ['MarketNode', '{"entity":"user"'],
'regex': r'type="application/json">({"widgets":{"@MarketNode/UserReviews".+?)</script>',
'regex': r'>{"widgets":{"@MarketNode/MyArticles/ArticlesGrid.+?"collections":({"publicUser":{"\d+".+?}}})}<',
'extract_json': True,
'transforms': [
json.loads,
lambda x: list(x['collections']['user'].values())[0],
lambda x: list(x['publicUser'].values())[0],
json.dumps,
],
'fields': {
Expand Down Expand Up @@ -440,7 +441,7 @@
'flags': ['OK.startupData'],
'regex': r'path:"/(profile/)?(?P<ok_user_name_id>.+?)",state:".+?friendId=(?P<ok_id>\d+?)"',
},
'Habrahabr': {
'Habrahabr HTML (old)': {
'flags': ['habracdn.net'],
'bs': True,
'fields': {
Expand All @@ -449,6 +450,30 @@
'image': lambda x: 'http:' + x.find('div', {'class': 'user-info__stats'}).find('img').get('src'),
},
},
'Habrahabr JSON': {
'flags': ['habrastorage.org'],
'regex': r'({"authorRefs":{.+?}),"viewport',
'extract_json': True,
'transforms': [
json.loads,
lambda x: list(x['authorRefs'].values())[0],
json.dumps,
],
'fields': {
'username': lambda x: x['alias'],
'about': lambda x: x['speciality'],
'birthday': lambda x: x['birthday'],
'gender': lambda x: x['gender'],
'rating': lambda x: x['rating'],
'karma': lambda x: x['scoreStats']['score'],
'fullname': lambda x: x['fullname'],
'is_readonly': lambda x: x['isReadonly'],
'location': lambda x: x['location'],
'image': lambda x: x['avatarUrl'],
'follower_count': lambda x: x.get('legacy', {}).get('followStats', {}).get('followStats'),
'following_count': lambda x: x.get('legacy', {}).get('followStats', {}).get('followersCount'),
}
},
# unactual
'Twitter HTML': {
'flags': ['abs.twimg.com', 'moreCSSBundles'],
Expand Down Expand Up @@ -886,7 +911,7 @@
},
'TikTok': {
'flags': ['tiktokcdn.com', '__NEXT_DATA__'],
'regex': r'<script id="__NEXT_DATA__" type="application/json" crossorigin="anonymous">(.+?)</script>',
'regex': r'<script id="__NEXT_DATA__"[^>]+>(.+?)</script>',
'extract_json': True,
'transforms': [
json.loads,
Expand Down
16 changes: 10 additions & 6 deletions tests/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,14 @@ def test_ok():
def test_habr():
info = extract(parse('https://habr.com/ru/users/m1rko/')[0])

assert info.get('uid') == '1371978'
assert info.get('username') == 'm1rko'
assert info.get('image') == 'http://habrastorage.org/getpro/habr/avatars/4ec/bd0/85d/4ecbd085d692835a931d03174ff19539.png'

assert info.get("username") == "m1rko"
assert info.get("about") == "автор, переводчик, редактор"
assert info.get("gender") == "0"
assert info.get("rating") == "0"
assert info.get("karma") == "1236.5"
assert info.get("fullname") == "Анатолий Ализар"
assert info.get("is_readonly") == "False"
assert info.get("image") == "//habrastorage.org/getpro/habr/avatars/4ec/bd0/85d/4ecbd085d692835a931d03174ff19539.png"

@pytest.mark.github_failed
def test_habr_no_image():
Expand Down Expand Up @@ -380,7 +384,7 @@ def test_behance():
assert 'appreciations' in info


@pytest.mark.github_failed
@pytest.mark.skip(reason="non-actual, 500px requires POST requests for now")
def test_500px():
info = extract(parse('https://api.500px.com/graphql?operationName=ProfileRendererQuery&variables=%7B%22username%22%3A%22the-maksimov%22%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22105058632482dd2786fd5775745908dc928f537b28e28356b076522757d65c19%22%7D%7D')[0])

Expand Down Expand Up @@ -633,7 +637,7 @@ def test_pinterest_api():
assert info.get('is_website_verified') == 'False'
assert info.get('follower_count') == '2'
assert info.get('group_board_count') == '0'
assert info.get('following_count') == '16'
assert 'following_count' in info
assert info.get('board_count') == '11'
assert int(info.get('pin_count')) > 100

Expand Down

0 comments on commit 1e533cf

Please sign in to comment.