[jsinterp] Handle new youtube signature functions

Closes #4635
This commit is contained in:
pukkandan 2022-08-14 04:51:54 +05:30
parent 1cddfdc52b
commit 8f53dc44a0
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
6 changed files with 287 additions and 125 deletions

View file

@ -150,6 +150,16 @@ MONTH_NAMES = {
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
}
# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
TIMEZONE_NAMES = {
'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
'EST': -5, 'EDT': -4, # Eastern
'CST': -6, 'CDT': -5, # Central
'MST': -7, 'MDT': -6, # Mountain
'PST': -8, 'PDT': -7 # Pacific
}
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
@ -1684,7 +1694,11 @@ def extract_timezone(date_str):
$)
''', date_str)
if not m:
timezone = datetime.timedelta()
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
if timezone is not None:
date_str = date_str[:-len(m.group('tz'))]
timezone = datetime.timedelta(hours=timezone or 0)
else:
date_str = date_str[:-len(m.group('tz'))]
if not m.group('sign'):
@ -1746,7 +1760,8 @@ def unified_timestamp(date_str, day_first=True):
if date_str is None:
return None
date_str = re.sub(r'[,|]', '', date_str)
date_str = re.sub(r'\s+', ' ', re.sub(
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extract_timezone(date_str)
@ -1768,9 +1783,10 @@ def unified_timestamp(date_str, day_first=True):
with contextlib.suppress(ValueError):
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
return calendar.timegm(dt.timetuple())
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
return calendar.timegm(timetuple) + pm_delta * 3600
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
def determine_ext(url, default_ext='unknown_video'):
@ -3199,7 +3215,7 @@ def strip_jsonp(code):
r'\g<callback_data>', code)
def js_to_json(code, vars={}):
def js_to_json(code, vars={}, *, strict=False):
# vars is a dict of var, val pairs to substitute
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
@ -3233,14 +3249,17 @@ def js_to_json(code, vars={}):
if v in vars:
return vars[v]
if strict:
raise ValueError(f'Unknown value: {v}')
return '"%s"' % v
def create_map(mobj):
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
if not strict:
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|