musicdlp/backend/ydl_pool.py
DarkCat09 d0ed29e685
Optimize extract_info: no process=True for YouTube
For YT, processing separate videos greatly increases extract_info time,
but unnecessary when obtaining only track title.

For Yandex, processing is much faster, and we NEED it to get title.
2024-05-06 21:07:19 +04:00

140 lines
3.6 KiB
Python

import asyncio
from typing import Callable, Awaitable, Iterable
from yt_dlp import YoutubeDL
from yt_dlp.postprocessor import FFmpegExtractAudioPP
import config
import id3pp
class _CreateYDL:
@staticmethod
def youtube() -> YoutubeDL:
ydl = YoutubeDL({'format': 'ba'})
ydl.add_post_processor(id3pp.InfoYouTubePP(), when='before_dl')
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec='mp3'), when='post_process')
return ydl
@staticmethod
def yt_proxied() -> YoutubeDL:
ydl = _CreateYDL.youtube()
ydl.params['proxy'] = config.get().yt_proxy
return ydl
@staticmethod
def yandex() -> YoutubeDL:
return YoutubeDL()
create_ydl_fn = {
'youtube': _CreateYDL.youtube,
'yt_proxied': _CreateYDL.yt_proxied,
'yandex': _CreateYDL.yandex,
}
ydl_fn_keys = create_ydl_fn.keys()
# need process=True for track title in extract_info output
NP_YDLS = {'yandex'}
class Downloader:
def __init__(
self,
progress_cb: Callable[[str], Awaitable],
lyrics_cb: Callable[[list[str]], Awaitable]) -> None:
self.ydls: dict[str, YoutubeDL | None] = {
'youtube': None,
'yt_proxied': None,
'yandex': None,
}
self.cur_ydl: YoutubeDL | None = None
self.cur_site = ''
self.progress_cb = progress_cb
self.lyrics_cb = lyrics_cb
def choose_ydl(self, site: str) -> None:
ydl = self.ydls[site]
cfg = config.get()
if ydl is None:
ydl = create_ydl_fn[site]()
ydl.params['outtmpl']['default'] = cfg.tmpl
ydl.add_post_processor(id3pp.ID3TagsPP(), when='post_process')
cookies = cfg.cookies_dir / (site + '.txt')
if cookies.exists():
ydl.params['cookiefile'] = str(cookies)
self.cur_ydl = ydl
self.cur_site = site
def get_cur_ydl(self) -> YoutubeDL:
ydl = self.cur_ydl
if ydl is None:
raise RuntimeError('ydl object not initialized')
return ydl
async def get_playlist_items(self, url: str) -> list[str]:
return await asyncio.get_event_loop().run_in_executor(
None,
Downloader._target_get_playlist_items,
self.get_cur_ydl(),
url,
self.cur_site in NP_YDLS,
)
@staticmethod
def _target_get_playlist_items(ydl: YoutubeDL, url: str, process: bool) -> list[str]:
info = ydl.extract_info(url, download=False, process=process)
if info is None:
raise RuntimeError('ydl.extract_info returned None')
return [
entry['track'] if 'track' in entry else entry['title']
for entry in info['entries']
]
async def download(
self,
url: str,
playlist_items: Iterable[int] | None = None) -> int:
return await asyncio.get_event_loop().run_in_executor(
None,
Downloader._target_download,
self.get_cur_ydl(),
url,
playlist_items,
)
@staticmethod
def _target_download(
ydl: YoutubeDL,
url: str,
playlist_items: Iterable[int] | None = None) -> int:
if playlist_items:
ydl.params['playlist_items'] = ','.join(str(i) for i in playlist_items)
ret = ydl.download(url)
del ydl.params['playlist_items']
return ret
def cleanup(self) -> None:
for ydl in self.ydls.values():
if ydl is not None:
ydl.close()