Lyrics: add irrelevant results filtering
Small note on how it works, copied from raise_on_irrelevant_result() docstring: Raises ValueError if no words from track title are present in search result track title and no words from artist name are present in search result artist name
This commit is contained in:
parent
62ebecc87f
commit
0b0759fb3b
3 changed files with 59 additions and 7 deletions
|
@ -16,9 +16,9 @@ lyrics_xpath = etree.XPath('//div[@id="lyrics-root"][1]/div[@data-lyrics-contain
|
|||
br_xpath = etree.XPath('.//br')
|
||||
|
||||
|
||||
def search(title: str, artist: str) -> str:
|
||||
'''Searches for Genius lyrics using SearXNG + Yahoo and returns the first URL.
|
||||
Irrelevant texts should be picked manually'''
|
||||
def search(title: str, artist: str) -> tuple[str, str]:
|
||||
'''Searches for Genius lyrics using SearXNG + Yahoo
|
||||
and returns the first result as tuple(title, url)'''
|
||||
|
||||
resp = http_pool.get().request(
|
||||
'GET',
|
||||
|
@ -34,7 +34,27 @@ def search(title: str, artist: str) -> str:
|
|||
result: dict[str, str] = resp.json()['results'][0]
|
||||
del resp
|
||||
|
||||
return result['url']
|
||||
return (result['title'], result['url'])
|
||||
|
||||
|
||||
def raise_on_irrelevant_result(res_title: str, track_track: str, track_artist: str) -> None:
|
||||
'''Raises ValueError
|
||||
if no words from track title are present in search result track title
|
||||
and no words from artist name are present in search result artist name'''
|
||||
|
||||
res_artist, res_track = res_title.lower().split(' \u2013 ', maxsplit=1)
|
||||
if not (
|
||||
any(
|
||||
word.group(0).lower() in res_artist
|
||||
for word in word_regex.finditer(track_artist)
|
||||
)
|
||||
and
|
||||
any(
|
||||
word.group(0).lower() in res_track
|
||||
for word in word_regex.finditer(track_track)
|
||||
)
|
||||
):
|
||||
raise ValueError
|
||||
|
||||
|
||||
def parse(url: str) -> str:
|
||||
|
|
|
@ -63,7 +63,8 @@ class ID3TagsPP(PostProcessor):
|
|||
file['TCON'] = id3.TCON(encoding=ENC_UTF8, text=information['genre'])
|
||||
|
||||
try:
|
||||
lyr_url = genius.search(title, artists[0])
|
||||
lyr_title, lyr_url = genius.search(title, artists[0])
|
||||
genius.raise_on_irrelevant_result(lyr_title, title, artists[0])
|
||||
file['USLT'] = id3.USLT(encoding=ENC_UTF8, text=genius.parse(lyr_url))
|
||||
except:
|
||||
pass
|
||||
|
|
|
@ -23,15 +23,46 @@ LYR3 = '''you are gonna get yours
|
|||
Another day'''
|
||||
|
||||
|
||||
# There is no lyrics for this song on Genius
|
||||
# Maybe someday TITLE2 and ARTIST2 will need to be changed
|
||||
# (But really existing song is chosen intentionally)
|
||||
TITLE2 = 'Паруса'
|
||||
ARTIST2 = 'PIZZA'
|
||||
|
||||
|
||||
class TestGenius(TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
http_pool.get()
|
||||
|
||||
def test_search_success(self) -> None:
|
||||
url = genius.search(TITLE, ARTIST)
|
||||
def test_search(self) -> None:
|
||||
_, url = genius.search(TITLE, ARTIST)
|
||||
self.assertEqual(url, URL)
|
||||
|
||||
def test_search_success(self) -> None:
|
||||
title, _ = genius.search(TITLE, ARTIST)
|
||||
genius.raise_on_irrelevant_result(title, TITLE, ARTIST)
|
||||
|
||||
def test_search_failure(self) -> None:
|
||||
title, _ = genius.search(TITLE2, ARTIST2)
|
||||
with self.assertRaises(ValueError):
|
||||
genius.raise_on_irrelevant_result(title, TITLE2, ARTIST2)
|
||||
|
||||
def test_relevancy_success(self) -> None:
|
||||
genius.raise_on_irrelevant_result(
|
||||
'ABC hEllo world!@ \u2013 sOmE artist123',
|
||||
'Artist123',
|
||||
'hello World',
|
||||
)
|
||||
|
||||
def test_relevancy_failure(self) -> None:
|
||||
with self.assertRaises(ValueError):
|
||||
genius.raise_on_irrelevant_result(
|
||||
'DEF hEllo world@!15 \u2013 anOther artist456',
|
||||
'DEF 789',
|
||||
'ABC irrelevant track title',
|
||||
)
|
||||
|
||||
def test_lyrics_parsing(self) -> None:
|
||||
lyrics = genius.parse(URL)
|
||||
self.assertTrue(lyrics.startswith(LYR1))
|
||||
|
|
Loading…
Reference in a new issue