From 0b0759fb3bb09d96ad99aba3ca95d4e81210f663 Mon Sep 17 00:00:00 2001 From: DarkCat09 Date: Mon, 6 May 2024 20:31:42 +0400 Subject: [PATCH] Lyrics: add irrelevant results filtering Small note on how it works, copied from raise_on_irrelevant_result() docstring: Raises ValueError if no words from track title are present in search result track title and no words from artist name are present in search result artist name --- backend/genius.py | 28 ++++++++++++++++++++++++---- backend/id3pp.py | 3 ++- backend/test_genius.py | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 59 insertions(+), 7 deletions(-) diff --git a/backend/genius.py b/backend/genius.py index c98f16f..7d7a119 100644 --- a/backend/genius.py +++ b/backend/genius.py @@ -16,9 +16,9 @@ lyrics_xpath = etree.XPath('//div[@id="lyrics-root"][1]/div[@data-lyrics-contain br_xpath = etree.XPath('.//br') -def search(title: str, artist: str) -> str: - '''Searches for Genius lyrics using SearXNG + Yahoo and returns the first URL. - Irrelevant texts should be picked manually''' +def search(title: str, artist: str) -> tuple[str, str]: + '''Searches for Genius lyrics using SearXNG + Yahoo + and returns the first result as tuple(title, url)''' resp = http_pool.get().request( 'GET', @@ -34,7 +34,27 @@ def search(title: str, artist: str) -> str: result: dict[str, str] = resp.json()['results'][0] del resp - return result['url'] + return (result['title'], result['url']) + + +def raise_on_irrelevant_result(res_title: str, track_track: str, track_artist: str) -> None: + '''Raises ValueError + if no words from track title are present in search result track title + and no words from artist name are present in search result artist name''' + + res_artist, res_track = res_title.lower().split(' \u2013 ', maxsplit=1) + if not ( + any( + word.group(0).lower() in res_artist + for word in word_regex.finditer(track_artist) + ) + and + any( + word.group(0).lower() in res_track + for word in word_regex.finditer(track_track) + ) + ): + raise ValueError def parse(url: str) -> str: diff --git a/backend/id3pp.py b/backend/id3pp.py index ae9f657..1418222 100644 --- a/backend/id3pp.py +++ b/backend/id3pp.py @@ -63,7 +63,8 @@ class ID3TagsPP(PostProcessor): file['TCON'] = id3.TCON(encoding=ENC_UTF8, text=information['genre']) try: - lyr_url = genius.search(title, artists[0]) + lyr_title, lyr_url = genius.search(title, artists[0]) + genius.raise_on_irrelevant_result(lyr_title, title, artists[0]) file['USLT'] = id3.USLT(encoding=ENC_UTF8, text=genius.parse(lyr_url)) except: pass diff --git a/backend/test_genius.py b/backend/test_genius.py index 1b7cfd6..0db4653 100644 --- a/backend/test_genius.py +++ b/backend/test_genius.py @@ -23,15 +23,46 @@ LYR3 = '''you are gonna get yours Another day''' +# There is no lyrics for this song on Genius +# Maybe someday TITLE2 and ARTIST2 will need to be changed +# (But really existing song is chosen intentionally) +TITLE2 = 'Паруса' +ARTIST2 = 'PIZZA' + + class TestGenius(TestCase): def setUp(self) -> None: http_pool.get() - def test_search_success(self) -> None: - url = genius.search(TITLE, ARTIST) + def test_search(self) -> None: + _, url = genius.search(TITLE, ARTIST) self.assertEqual(url, URL) + def test_search_success(self) -> None: + title, _ = genius.search(TITLE, ARTIST) + genius.raise_on_irrelevant_result(title, TITLE, ARTIST) + + def test_search_failure(self) -> None: + title, _ = genius.search(TITLE2, ARTIST2) + with self.assertRaises(ValueError): + genius.raise_on_irrelevant_result(title, TITLE2, ARTIST2) + + def test_relevancy_success(self) -> None: + genius.raise_on_irrelevant_result( + 'ABC hEllo world!@ \u2013 sOmE artist123', + 'Artist123', + 'hello World', + ) + + def test_relevancy_failure(self) -> None: + with self.assertRaises(ValueError): + genius.raise_on_irrelevant_result( + 'DEF hEllo world@!15 \u2013 anOther artist456', + 'DEF 789', + 'ABC irrelevant track title', + ) + def test_lyrics_parsing(self) -> None: lyrics = genius.parse(URL) self.assertTrue(lyrics.startswith(LYR1))