[extractor] Extract thumbnails from JSON-LD (#2195)

Authored by: nixxo
This commit is contained in:
nixxo 2022-01-01 20:50:27 +01:00 committed by GitHub
parent 767f999b53
commit 7592749cbe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 1 deletions

View file

@ -208,6 +208,32 @@ class TestInfoExtractor(unittest.TestCase):
},
{'expected_type': 'NewsArticle'},
),
(
# test multiple thumbnails in a list
r'''
<script type="application/ld+json">
{"@context":"https://schema.org",
"@type":"VideoObject",
"thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]}
</script>''',
{
'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
},
{},
),
(
# test single thumbnail
r'''
<script type="application/ld+json">
{"@context":"https://schema.org",
"@type":"VideoObject",
"thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"}
</script>''',
{
'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
},
{},
)
]
for html, expected_dict, search_json_ld_kwargs in _TESTS:
expect_dict(