[extractor/common] Recognize src attribute from HTML5 media elements (#3899)

Authored by: Lesmiscore
This commit is contained in:
Lesmiscore 2022-05-29 22:48:04 +09:00 committed by GitHub
parent ee27297f82
commit 222a230871
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 2 deletions

View file

@ -3197,7 +3197,8 @@ class InfoExtractor:
return f
return {}
def _media_formats(src, cur_media_type, type_info={}):
def _media_formats(src, cur_media_type, type_info=None):
type_info = type_info or {}
full_url = absolute_url(src)
ext = type_info.get('ext') or determine_ext(full_url)
if ext == 'm3u8':
@ -3215,6 +3216,7 @@ class InfoExtractor:
formats = [{
'url': full_url,
'vcodec': 'none' if cur_media_type == 'audio' else None,
'ext': ext,
}]
return is_plain_url, formats
@ -3241,7 +3243,8 @@ class InfoExtractor:
media_attributes = extract_attributes(media_tag)
src = strip_or_none(media_attributes.get('src'))
if src:
_, formats = _media_formats(src, media_type)
f = parse_content_type(media_attributes.get('type'))
_, formats = _media_formats(src, media_type, f)
media_info['formats'].extend(formats)
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content: