Compare commits

..

4 commits

Author SHA1 Message Date
474c24e05a
Lyrics: switch to multiple search engines
Brave sometimes ratelimits SearXNG server
2024-05-06 20:34:03 +04:00
0b0759fb3b
Lyrics: add irrelevant results filtering
Small note on how it works, copied from
raise_on_irrelevant_result() docstring:

Raises ValueError
if no words from track title are present in search result track title
and no words from artist name are present in search result artist name
2024-05-06 20:31:42 +04:00
62ebecc87f
Remove path_length config option
I've misunderstood the FS limits, 255 is for filename, not path.
Path is limited only by libc, it's 4096.
The default outtmpl in musicdlp contains slashes, so it's a path.
But YDL considers it to be a filename, so the whole
outtmpl formatting result is trimmed to path_length.
Do we really need this? I think there are no "malicious" long-named tracks :)
2024-05-06 19:43:32 +04:00
e1ef74cc1c
Simpliest frontend UI 2024-05-05 19:44:37 +04:00
8 changed files with 122 additions and 14 deletions

View file

@ -13,10 +13,6 @@ class Config:
# Cookies are in Netscape CSV format, see yt-dlp docs # Cookies are in Netscape CSV format, see yt-dlp docs
self.cookies_dir = Path(os.getenv('COOKIES_DIR') or 'cookies') self.cookies_dir = Path(os.getenv('COOKIES_DIR') or 'cookies')
# Note: yt-dlp's path trimmer also counts album_path_tmpl, not only filename
# Why 235? 255 is the ext4 limit. 255 - len("/var/lib/musicdlp/") = 237, rounded down to 235
self.path_length = int(os.getenv('PATH_LENGTH') or 235)
self.tmpl = os.path.join( self.tmpl = os.path.join(
# `artists.0` instead of `artist`, because the latter can contain "feat. ..." # `artists.0` instead of `artist`, because the latter can contain "feat. ..."
os.getenv('ALBUM_PATH_TMPL') or 'music/%(artists.0)s/%(album)s', os.getenv('ALBUM_PATH_TMPL') or 'music/%(artists.0)s/%(album)s',

View file

@ -16,16 +16,16 @@ lyrics_xpath = etree.XPath('//div[@id="lyrics-root"][1]/div[@data-lyrics-contain
br_xpath = etree.XPath('.//br') br_xpath = etree.XPath('.//br')
def search(title: str, artist: str) -> str: def search(title: str, artist: str) -> tuple[str, str]:
'''Searches for Genius lyrics using SearXNG + Yahoo and returns the first URL. '''Searches for Genius lyrics using SearXNG + Yahoo
Irrelevant texts should be picked manually''' and returns the first result as tuple(title, url)'''
resp = http_pool.get().request( resp = http_pool.get().request(
'GET', 'GET',
'https://searx.dc09.ru/search', 'https://searx.dc09.ru/search',
fields={ fields={
'q': artist + ' ' + title + ' site:genius.com', 'q': artist + ' ' + title + ' site:genius.com',
'engines': 'brave', 'engines': 'brave,yahoo',
'safesearch': '0', 'safesearch': '0',
'format': 'json', 'format': 'json',
}, },
@ -34,7 +34,27 @@ def search(title: str, artist: str) -> str:
result: dict[str, str] = resp.json()['results'][0] result: dict[str, str] = resp.json()['results'][0]
del resp del resp
return result['url'] return (result['title'], result['url'])
def raise_on_irrelevant_result(res_title: str, track_track: str, track_artist: str) -> None:
'''Raises ValueError
if no words from track title are present in search result track title
and no words from artist name are present in search result artist name'''
res_artist, res_track = res_title.lower().split(' \u2013 ', maxsplit=1)
if not (
any(
word.group(0).lower() in res_artist
for word in word_regex.finditer(track_artist)
)
and
any(
word.group(0).lower() in res_track
for word in word_regex.finditer(track_track)
)
):
raise ValueError
def parse(url: str) -> str: def parse(url: str) -> str:

View file

@ -63,7 +63,8 @@ class ID3TagsPP(PostProcessor):
file['TCON'] = id3.TCON(encoding=ENC_UTF8, text=information['genre']) file['TCON'] = id3.TCON(encoding=ENC_UTF8, text=information['genre'])
try: try:
lyr_url = genius.search(title, artists[0]) lyr_title, lyr_url = genius.search(title, artists[0])
genius.raise_on_irrelevant_result(lyr_title, title, artists[0])
file['USLT'] = id3.USLT(encoding=ENC_UTF8, text=genius.parse(lyr_url)) file['USLT'] = id3.USLT(encoding=ENC_UTF8, text=genius.parse(lyr_url))
except: except:
pass pass

View file

@ -23,15 +23,46 @@ LYR3 = '''you are gonna get yours
Another day''' Another day'''
# There is no lyrics for this song on Genius
# Maybe someday TITLE2 and ARTIST2 will need to be changed
# (But really existing song is chosen intentionally)
TITLE2 = 'Паруса'
ARTIST2 = 'PIZZA'
class TestGenius(TestCase): class TestGenius(TestCase):
def setUp(self) -> None: def setUp(self) -> None:
http_pool.get() http_pool.get()
def test_search_success(self) -> None: def test_search(self) -> None:
url = genius.search(TITLE, ARTIST) _, url = genius.search(TITLE, ARTIST)
self.assertEqual(url, URL) self.assertEqual(url, URL)
def test_search_success(self) -> None:
title, _ = genius.search(TITLE, ARTIST)
genius.raise_on_irrelevant_result(title, TITLE, ARTIST)
def test_search_failure(self) -> None:
title, _ = genius.search(TITLE2, ARTIST2)
with self.assertRaises(ValueError):
genius.raise_on_irrelevant_result(title, TITLE2, ARTIST2)
def test_relevancy_success(self) -> None:
genius.raise_on_irrelevant_result(
'ABC hEllo world!@ \u2013 sOmE artist123',
'Artist123',
'hello World',
)
def test_relevancy_failure(self) -> None:
with self.assertRaises(ValueError):
genius.raise_on_irrelevant_result(
'DEF hEllo world@!15 \u2013 anOther artist456',
'DEF 789',
'ABC irrelevant track title',
)
def test_lyrics_parsing(self) -> None: def test_lyrics_parsing(self) -> None:
lyrics = genius.parse(URL) lyrics = genius.parse(URL)
self.assertTrue(lyrics.startswith(LYR1)) self.assertTrue(lyrics.startswith(LYR1))

View file

@ -63,7 +63,6 @@ class Downloader:
if ydl is None: if ydl is None:
ydl = create_ydl_fn[site]() ydl = create_ydl_fn[site]()
ydl.params['trim_file_name'] = cfg.path_length # Note: not only filename, but path in outtmpl
ydl.params['outtmpl']['default'] = cfg.tmpl ydl.params['outtmpl']['default'] = cfg.tmpl
ydl.add_post_processor(id3pp.ID3TagsPP(), when='post_process') ydl.add_post_processor(id3pp.ID3TagsPP(), when='post_process')

View file

@ -1 +1,33 @@
<!-- TODO: simple web ui, websockets --> <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>musicdlp</title>
<link rel="stylesheet" href="/style.css">
<script src="/script.js"></script>
</head>
<body>
<div>
<input type="text" id="url" placeholder="Playlist or track URL">
<button type="button" id="guess-site-btn">Guess site</button>
</div>
<div>
<select id="site-select">
<option value="youtube" selected>YouTube</option>
<option value="yt_proxied">YT proxied</option>
<option value="yandex">Yandex Music</option>
</select>
</div>
<div>
<button type="button" id="items-btn">Get playlist items</button>
</div>
<div id="items-container"></div>
<div>
<button type="button">Download</button>
</div>
<div>
<label>Progress: <span id="progress">not implemented</span></label>
</div>
</body>
</html>

18
frontend/script.js Normal file
View file

@ -0,0 +1,18 @@
addEventListener('DOMContentLoaded', () => {
/** @type{HTMLInputElement} */
const urlField = document.getElementById('url')
/** @type{HTMLSelectElement} */
const site = document.getElementById('site-select')
document.getElementById('guess-site-btn').addEventListener('click', () => {
const url = urlField.value
if (url.includes('/watch?v=') || url.includes('/playlist?list=')) {
if (site.value == 'yt_proxied') {
return
}
site.value = 'youtube'
} else if (url.includes('://music.yandex.')) {
site.value = 'yandex'
}
})
})

11
frontend/style.css Normal file
View file

@ -0,0 +1,11 @@
body {
margin: 0;
padding: 0.5rem;
display: flex;
flex-direction: column;
align-items: center;
row-gap: 0.25rem;
font-family: 'Noto Sans', 'Roboto', 'Ubuntu', sans-serif;
}