mirror of
https://github.com/LucBerge/yt-dlp.git
synced 2025-03-17 19:57:52 +03:00
Allow users to specify encoding in each config files (#4357)
Authored by: Lesmiscore
This commit is contained in:
parent
49afc1d84a
commit
a904a7f8c6
3 changed files with 89 additions and 10 deletions
|
@ -3485,17 +3485,19 @@ def age_restricted(content_limit, age_limit):
|
|||
return age_limit < content_limit
|
||||
|
||||
|
||||
BOMS = [
|
||||
(b'\xef\xbb\xbf', 'utf-8'),
|
||||
(b'\x00\x00\xfe\xff', 'utf-32-be'),
|
||||
(b'\xff\xfe\x00\x00', 'utf-32-le'),
|
||||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
""" List of known byte-order-marks (BOM) """
|
||||
|
||||
|
||||
def is_html(first_bytes):
|
||||
""" Detect whether a file contains HTML by examining its first bytes. """
|
||||
|
||||
BOMS = [
|
||||
(b'\xef\xbb\xbf', 'utf-8'),
|
||||
(b'\x00\x00\xfe\xff', 'utf-32-be'),
|
||||
(b'\xff\xfe\x00\x00', 'utf-32-le'),
|
||||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
|
||||
encoding = 'utf-8'
|
||||
for bom, enc in BOMS:
|
||||
while first_bytes.startswith(bom):
|
||||
|
@ -5394,6 +5396,41 @@ def read_stdin(what):
|
|||
return sys.stdin
|
||||
|
||||
|
||||
def determine_file_encoding(data):
|
||||
"""
|
||||
From the first 512 bytes of a given file,
|
||||
it tries to detect the encoding to be used to read as text.
|
||||
|
||||
@returns (encoding, bytes to skip)
|
||||
"""
|
||||
|
||||
for bom, enc in BOMS:
|
||||
# matching BOM beats any declaration
|
||||
# BOMs are skipped to prevent any errors
|
||||
if data.startswith(bom):
|
||||
return enc, len(bom)
|
||||
|
||||
# strip off all null bytes to match even when UTF-16 or UTF-32 is used
|
||||
# endians don't matter
|
||||
data = data.replace(b'\0', b'')
|
||||
|
||||
PREAMBLES = [
|
||||
# "# -*- coding: utf-8 -*-"
|
||||
# "# coding: utf-8"
|
||||
rb'(?m)^#(?:\s+-\*-)?\s*coding\s*:\s*(?P<encoding>\S+)(?:\s+-\*-)?\s*$',
|
||||
# "# vi: set fileencoding=utf-8"
|
||||
rb'^#\s+vi\s*:\s+set\s+fileencoding=(?P<encoding>[^\s,]+)'
|
||||
]
|
||||
for pb in PREAMBLES:
|
||||
mobj = re.match(pb, data)
|
||||
if not mobj:
|
||||
continue
|
||||
# preambles aren't skipped since they're just ignored when reading as config
|
||||
return mobj.group('encoding').decode(), 0
|
||||
|
||||
return None, 0
|
||||
|
||||
|
||||
class Config:
|
||||
own_args = None
|
||||
parsed_args = None
|
||||
|
@ -5445,12 +5482,17 @@ class Config:
|
|||
@staticmethod
|
||||
def read_file(filename, default=[]):
|
||||
try:
|
||||
optionf = open(filename)
|
||||
optionf = open(filename, 'rb')
|
||||
except OSError:
|
||||
return default # silently skip if file is not present
|
||||
try:
|
||||
enc, skip = determine_file_encoding(optionf.read(512))
|
||||
optionf.seek(skip, io.SEEK_SET)
|
||||
except OSError:
|
||||
enc = None # silently skip read errors
|
||||
try:
|
||||
# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
|
||||
contents = optionf.read()
|
||||
contents = optionf.read().decode(enc or preferredencoding())
|
||||
res = shlex.split(contents, comments=True)
|
||||
except Exception as err:
|
||||
raise ValueError(f'Unable to parse "{filename}": {err}')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue