Compare commits
No commits in common. "eb9bada544b87d96ad81e927d8f02c140c217bca" and "d62b2699c5e0a0f98af012e8087b5a051a686fb5" have entirely different histories.
eb9bada544
...
d62b2699c5
4 changed files with 2 additions and 102 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,5 +1,4 @@
|
||||||
venv/
|
venv/
|
||||||
__pycache__/
|
|
||||||
.ruff_cache/
|
.ruff_cache/
|
||||||
|
|
||||||
music/
|
music/
|
||||||
|
|
56
genius.py
56
genius.py
|
@ -1,56 +0,0 @@
|
||||||
import re
|
|
||||||
|
|
||||||
from lxml import html, etree # type: ignore
|
|
||||||
import urllib3
|
|
||||||
|
|
||||||
|
|
||||||
# Matches not only latin symbols,
|
|
||||||
# but also cyrillic alphabet, Chinese hyeroglyphics...
|
|
||||||
word_regex = re.compile(r'\w+')
|
|
||||||
|
|
||||||
# Get all <div>s with lyrics
|
|
||||||
lyrics_xpath = etree.XPath('//div[@id="lyrics-root"][1]/div[@data-lyrics-container="true"]')
|
|
||||||
|
|
||||||
# Get <br>s to add newlines after them for correct text parsing
|
|
||||||
br_xpath = etree.XPath('.//br')
|
|
||||||
|
|
||||||
|
|
||||||
def search(http: urllib3.PoolManager, title: str, artist: str) -> str:
|
|
||||||
'''Searches for Genius lyrics using SearXNG + Yahoo and returns the first URL.
|
|
||||||
Irrelevant texts should be picked manually'''
|
|
||||||
|
|
||||||
resp = http.request(
|
|
||||||
'GET',
|
|
||||||
'https://searx.dc09.ru/search',
|
|
||||||
fields={
|
|
||||||
'q': artist + ' ' + title + ' site:genius.com',
|
|
||||||
'engines': 'yahoo',
|
|
||||||
'safesearch': '0',
|
|
||||||
'format': 'json',
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
result: dict[str, str] = resp.json()['results'][0]
|
|
||||||
del resp
|
|
||||||
|
|
||||||
return result['url']
|
|
||||||
|
|
||||||
|
|
||||||
def parse(http: urllib3.PoolManager, url: str) -> str:
|
|
||||||
'''Requests a lyrics page and parses with libxml2,
|
|
||||||
leaving only text and line breaks'''
|
|
||||||
|
|
||||||
resp = http.request('GET', url)
|
|
||||||
tree = html.document_fromstring(resp.data)
|
|
||||||
divs = lyrics_xpath(tree)
|
|
||||||
del resp, tree
|
|
||||||
|
|
||||||
return '\n'.join(_parse_text(div) for div in divs).strip()
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_text(elem: html.HtmlElement) -> str:
|
|
||||||
|
|
||||||
for br in br_xpath(elem):
|
|
||||||
br.tail = '\n' + (br.tail or '')
|
|
||||||
|
|
||||||
return elem.text_content()
|
|
|
@ -1,5 +1,4 @@
|
||||||
websockets==12.0
|
websockets==12.0
|
||||||
yt-dlp==2024.4.9
|
yt-dlp>=2024.4.9
|
||||||
mutagen==1.47.0
|
|
||||||
urllib3==2.2.1
|
|
||||||
lxml==5.2.1
|
lxml==5.2.1
|
||||||
|
mutagen==1.47.0
|
||||||
|
|
|
@ -1,42 +0,0 @@
|
||||||
from unittest import TestCase
|
|
||||||
import urllib3
|
|
||||||
|
|
||||||
import genius
|
|
||||||
|
|
||||||
|
|
||||||
TITLE = 'A Line In The Sand'
|
|
||||||
ARTIST = 'Linkin Park'
|
|
||||||
URL = 'https://genius.com/Linkin-park-a-line-in-the-sand-lyrics'
|
|
||||||
|
|
||||||
LYR1 = '''[Intro: Mike Shinoda]
|
|
||||||
Today, we stood on the wall'''
|
|
||||||
|
|
||||||
LYR2 = '''little did we know
|
|
||||||
|
|
||||||
[Instrumental Break]
|
|
||||||
|
|
||||||
[Verse 1:'''
|
|
||||||
|
|
||||||
LYR3 = '''you are gonna get yours
|
|
||||||
|
|
||||||
[Chorus: Chester Bennington]
|
|
||||||
Another day'''
|
|
||||||
|
|
||||||
|
|
||||||
class TestGenius(TestCase):
|
|
||||||
|
|
||||||
def setUp(self) -> None:
|
|
||||||
self.http = urllib3.PoolManager()
|
|
||||||
|
|
||||||
def test_search_success(self) -> None:
|
|
||||||
url = genius.search(self.http, TITLE, ARTIST)
|
|
||||||
self.assertEqual(url, URL)
|
|
||||||
|
|
||||||
def test_lyrics_parsing(self) -> None:
|
|
||||||
lyrics = genius.parse(self.http, URL)
|
|
||||||
self.assertTrue(lyrics.startswith(LYR1))
|
|
||||||
self.assertTrue(LYR2 in lyrics)
|
|
||||||
self.assertTrue(LYR3 in lyrics)
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
|
||||||
self.http.clear()
|
|
Loading…
Add table
Reference in a new issue