[extractor] Framework for embed detection (#4307)

This commit is contained in:
pukkandan 2022-08-01 06:52:03 +05:30
parent 47304e07dc
commit 8f97a15d1c
8 changed files with 149 additions and 77 deletions

View file

@ -705,13 +705,13 @@ def sanitize_path(s, force=False):
return os.path.join(*sanitized_path)
def sanitize_url(url):
def sanitize_url(url, *, scheme='http'):
# Prepend protocol-less URLs with `http:` scheme in order to mitigate
# the number of unwanted failures due to missing protocol
if url is None:
return
elif url.startswith('//'):
return 'http:%s' % url
return f'{scheme}:{url}'
# Fix some common typos seen so far
COMMON_TYPOS = (
# https://github.com/ytdl-org/youtube-dl/issues/15649