Lyrics: add irrelevant results filtering

Small note on how it works, copied from raise_on_irrelevant_result() docstring: Raises ValueError if no words from track title are present in search result track title and no words from artist name are present in search result artist name
2024-05-06 20:31:42 +04:00 · 2024-05-06 20:31:42 +04:00 · 0b0759fb3b
commit 0b0759fb3b
parent 62ebecc87f
3 changed files with 59 additions and 7 deletions
--- a/backend/genius.py
+++ b/backend/genius.py
@ -16,9 +16,9 @@ lyrics_xpath = etree.XPath('//div[@id="lyrics-root"][1]/div[@data-lyrics-contain
 br_xpath = etree.XPath('.//br')


-def search(title: str, artist: str) -> str:
-    '''Searches for Genius lyrics using SearXNG + Yahoo and returns the first URL.
-    Irrelevant texts should be picked manually'''
+def search(title: str, artist: str) -> tuple[str, str]:
+    '''Searches for Genius lyrics using SearXNG + Yahoo
+    and returns the first result as tuple(title, url)'''

    resp = http_pool.get().request(
        'GET',
@ -34,7 +34,27 @@ def search(title: str, artist: str) -> str:
    result: dict[str, str] = resp.json()['results'][0]
    del resp

-    return result['url']
+    return (result['title'], result['url'])
+
+
+def raise_on_irrelevant_result(res_title: str, track_track: str, track_artist: str) -> None:
+    '''Raises ValueError
+    if no words from track title are present in search result track title
+    and no words from artist name are present in search result artist name'''
+
+    res_artist, res_track = res_title.lower().split(' \u2013 ', maxsplit=1)
+    if not (
+        any(
+            word.group(0).lower() in res_artist
+            for word in word_regex.finditer(track_artist)
+        )
+        and
+        any(
+            word.group(0).lower() in res_track
+            for word in word_regex.finditer(track_track)
+        )
+    ):
+        raise ValueError


 def parse(url: str) -> str:
--- a/backend/id3pp.py
+++ b/backend/id3pp.py
@ -63,7 +63,8 @@ class ID3TagsPP(PostProcessor):
            file['TCON'] = id3.TCON(encoding=ENC_UTF8, text=information['genre'])

        try:
-            lyr_url = genius.search(title, artists[0])
+            lyr_title, lyr_url = genius.search(title, artists[0])
+            genius.raise_on_irrelevant_result(lyr_title, title, artists[0])
            file['USLT'] = id3.USLT(encoding=ENC_UTF8, text=genius.parse(lyr_url))
        except:
            pass
--- a/backend/test_genius.py
+++ b/backend/test_genius.py
@ -23,15 +23,46 @@ LYR3 = '''you are gonna get yours
 Another day'''


+# There is no lyrics for this song on Genius
+# Maybe someday TITLE2 and ARTIST2 will need to be changed
+# (But really existing song is chosen intentionally)
+TITLE2 = 'Паруса'
+ARTIST2 = 'PIZZA'
+
+
 class TestGenius(TestCase):

    def setUp(self) -> None:
        http_pool.get()

-    def test_search_success(self) -> None:
-        url = genius.search(TITLE, ARTIST)
+    def test_search(self) -> None:
+        _, url = genius.search(TITLE, ARTIST)
        self.assertEqual(url, URL)

+    def test_search_success(self) -> None:
+        title, _ = genius.search(TITLE, ARTIST)
+        genius.raise_on_irrelevant_result(title, TITLE, ARTIST)
+
+    def test_search_failure(self) -> None:
+        title, _ = genius.search(TITLE2, ARTIST2)
+        with self.assertRaises(ValueError):
+            genius.raise_on_irrelevant_result(title, TITLE2, ARTIST2)
+
+    def test_relevancy_success(self) -> None:
+        genius.raise_on_irrelevant_result(
+            'ABC hEllo world!@ \u2013 sOmE artist123',
+            'Artist123',
+            'hello World',
+        )
+
+    def test_relevancy_failure(self) -> None:
+        with self.assertRaises(ValueError):
+            genius.raise_on_irrelevant_result(
+                'DEF hEllo world@!15 \u2013 anOther artist456',
+                'DEF 789',
+                'ABC irrelevant track title',
+            )
+
    def test_lyrics_parsing(self) -> None:
        lyrics = genius.parse(URL)
        self.assertTrue(lyrics.startswith(LYR1))