Lyrics: add irrelevant results filtering

Small note on how it works, copied from
raise_on_irrelevant_result() docstring:

Raises ValueError
if no words from track title are present in search result track title
and no words from artist name are present in search result artist name
This commit is contained in:
DarkCat09 2024-05-06 20:31:42 +04:00
parent 62ebecc87f
commit 0b0759fb3b
Signed by: DarkCat09
GPG key ID: 0A26CD5B3345D6E3
3 changed files with 59 additions and 7 deletions

View file

@ -16,9 +16,9 @@ lyrics_xpath = etree.XPath('//div[@id="lyrics-root"][1]/div[@data-lyrics-contain
br_xpath = etree.XPath('.//br') br_xpath = etree.XPath('.//br')
def search(title: str, artist: str) -> str: def search(title: str, artist: str) -> tuple[str, str]:
'''Searches for Genius lyrics using SearXNG + Yahoo and returns the first URL. '''Searches for Genius lyrics using SearXNG + Yahoo
Irrelevant texts should be picked manually''' and returns the first result as tuple(title, url)'''
resp = http_pool.get().request( resp = http_pool.get().request(
'GET', 'GET',
@ -34,7 +34,27 @@ def search(title: str, artist: str) -> str:
result: dict[str, str] = resp.json()['results'][0] result: dict[str, str] = resp.json()['results'][0]
del resp del resp
return result['url'] return (result['title'], result['url'])
def raise_on_irrelevant_result(res_title: str, track_track: str, track_artist: str) -> None:
'''Raises ValueError
if no words from track title are present in search result track title
and no words from artist name are present in search result artist name'''
res_artist, res_track = res_title.lower().split(' \u2013 ', maxsplit=1)
if not (
any(
word.group(0).lower() in res_artist
for word in word_regex.finditer(track_artist)
)
and
any(
word.group(0).lower() in res_track
for word in word_regex.finditer(track_track)
)
):
raise ValueError
def parse(url: str) -> str: def parse(url: str) -> str:

View file

@ -63,7 +63,8 @@ class ID3TagsPP(PostProcessor):
file['TCON'] = id3.TCON(encoding=ENC_UTF8, text=information['genre']) file['TCON'] = id3.TCON(encoding=ENC_UTF8, text=information['genre'])
try: try:
lyr_url = genius.search(title, artists[0]) lyr_title, lyr_url = genius.search(title, artists[0])
genius.raise_on_irrelevant_result(lyr_title, title, artists[0])
file['USLT'] = id3.USLT(encoding=ENC_UTF8, text=genius.parse(lyr_url)) file['USLT'] = id3.USLT(encoding=ENC_UTF8, text=genius.parse(lyr_url))
except: except:
pass pass

View file

@ -23,15 +23,46 @@ LYR3 = '''you are gonna get yours
Another day''' Another day'''
# There is no lyrics for this song on Genius
# Maybe someday TITLE2 and ARTIST2 will need to be changed
# (But really existing song is chosen intentionally)
TITLE2 = 'Паруса'
ARTIST2 = 'PIZZA'
class TestGenius(TestCase): class TestGenius(TestCase):
def setUp(self) -> None: def setUp(self) -> None:
http_pool.get() http_pool.get()
def test_search_success(self) -> None: def test_search(self) -> None:
url = genius.search(TITLE, ARTIST) _, url = genius.search(TITLE, ARTIST)
self.assertEqual(url, URL) self.assertEqual(url, URL)
def test_search_success(self) -> None:
title, _ = genius.search(TITLE, ARTIST)
genius.raise_on_irrelevant_result(title, TITLE, ARTIST)
def test_search_failure(self) -> None:
title, _ = genius.search(TITLE2, ARTIST2)
with self.assertRaises(ValueError):
genius.raise_on_irrelevant_result(title, TITLE2, ARTIST2)
def test_relevancy_success(self) -> None:
genius.raise_on_irrelevant_result(
'ABC hEllo world!@ \u2013 sOmE artist123',
'Artist123',
'hello World',
)
def test_relevancy_failure(self) -> None:
with self.assertRaises(ValueError):
genius.raise_on_irrelevant_result(
'DEF hEllo world@!15 \u2013 anOther artist456',
'DEF 789',
'ABC irrelevant track title',
)
def test_lyrics_parsing(self) -> None: def test_lyrics_parsing(self) -> None:
lyrics = genius.parse(URL) lyrics = genius.parse(URL)
self.assertTrue(lyrics.startswith(LYR1)) self.assertTrue(lyrics.startswith(LYR1))