Add option --use-extractors

Deprecates `--force-generic-extractor`

Closes #3234, Closes #2044

Related: #4307, #1791
This commit is contained in:
pukkandan 2022-08-24 05:42:16 +05:30
parent 5314b52192
commit fe7866d0ed
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
5 changed files with 58 additions and 18 deletions

View file

@ -29,6 +29,7 @@ from .cookies import load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version
from .extractor import gen_extractor_classes, get_info_extractor
from .extractor.common import UnsupportedURLIE
from .extractor.openload import PhantomJSwrapper
from .minicurses import format_text
from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
@ -237,7 +238,7 @@ class YoutubeDL:
Default is 'only_download' for CLI, but False for API
skip_playlist_after_errors: Number of allowed failures until the rest of
the playlist is skipped
force_generic_extractor: Force downloader to use the generic extractor
allowed_extractors: List of regexes to match against extractor names that are allowed
overwrites: Overwrite all video and metadata files if True,
overwrite only non-video files if None
and don't overwrite any file if False
@ -477,6 +478,8 @@ class YoutubeDL:
The following options are deprecated and may be removed in the future:
force_generic_extractor: Force downloader to use the generic extractor
- Use allowed_extractors = ['generic', 'default']
playliststart: - Use playlist_items
Playlist item to start at.
playlistend: - Use playlist_items
@ -758,13 +761,6 @@ class YoutubeDL:
self._ies_instances[ie_key] = ie
ie.set_downloader(self)
def _get_info_extractor_class(self, ie_key):
ie = self._ies.get(ie_key)
if ie is None:
ie = get_info_extractor(ie_key)
self.add_info_extractor(ie)
return ie
def get_info_extractor(self, ie_key):
"""
Get an instance of an IE with name ie_key, it will try to get one from
@ -781,8 +777,19 @@ class YoutubeDL:
"""
Add the InfoExtractors returned by gen_extractors to the end of the list
"""
for ie in gen_extractor_classes():
self.add_info_extractor(ie)
all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
all_ies['end'] = UnsupportedURLIE()
try:
ie_names = orderedSet_from_options(
self.params.get('allowed_extractors', ['default']), {
'all': list(all_ies),
'default': [name for name, ie in all_ies.items() if ie._ENABLED],
}, use_regex=True)
except re.error as e:
raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
for name in ie_names:
self.add_info_extractor(all_ies[name])
self.write_debug(f'Loaded {len(ie_names)} extractors')
def add_post_processor(self, pp, when='post_process'):
"""Add a PostProcessor object to the end of the chain."""
@ -1413,11 +1420,11 @@ class YoutubeDL:
ie_key = 'Generic'
if ie_key:
ies = {ie_key: self._get_info_extractor_class(ie_key)}
ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
else:
ies = self._ies
for ie_key, ie in ies.items():
for key, ie in ies.items():
if not ie.suitable(url):
continue
@ -1426,14 +1433,16 @@ class YoutubeDL:
'and will probably not work.')
temp_id = ie.get_temp_id(url)
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
if self.params.get('break_on_existing', False):
raise ExistingVideoReached()
break
return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
tb=False if extractors_restricted else None)
def _handle_extraction_exceptions(func):
@functools.wraps(func)