445 lines
10 KiB
Python
Executable file
445 lines
10 KiB
Python
Executable file
#!/usr/bin/env python3
|
||
|
||
import os
|
||
import sys
|
||
import shutil
|
||
from pathlib import Path
|
||
|
||
import logging
|
||
|
||
import mimetypes
|
||
import subprocess
|
||
|
||
from typing import TypedDict
|
||
from typing import Optional, Any
|
||
|
||
import re
|
||
|
||
import requests
|
||
from bs4 import BeautifulSoup # type: ignore
|
||
|
||
from mutagen.id3 import ID3 # type: ignore
|
||
from mutagen.id3 import TPE1, TIT2, TALB
|
||
from mutagen.id3 import TYER, TRCK
|
||
from mutagen.id3 import USLT, APIC
|
||
|
||
BASEURL = 'https://www.azlyrics.com'
|
||
USERAGENT = (
|
||
'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) '
|
||
'Gecko/20100101 Firefox/110.0'
|
||
)
|
||
|
||
LYRICS_ROW = '.main-page>.row>.col-xs-12'
|
||
|
||
safename_re = re.compile(
|
||
r'[^A-Za-z0-9А-ЯЁа-яё \'".,()\[\]&!#$@_~=*+-]'
|
||
)
|
||
|
||
session = requests.Session()
|
||
session.headers['User-Agent'] = USERAGENT
|
||
|
||
|
||
class ParseResult(TypedDict):
|
||
title: str
|
||
artist: str
|
||
album: str
|
||
year: int
|
||
track_no: int
|
||
tracks: int
|
||
lyrics: str
|
||
cover: Optional[bytes]
|
||
cover_mime: Optional[str]
|
||
|
||
|
||
class ParseError(Exception):
|
||
|
||
EDIT = 'edit'
|
||
|
||
def __init__(self, parsing_obj: str) -> None:
|
||
|
||
super().__init__(
|
||
f'Unable to parse {parsing_obj}'
|
||
)
|
||
self.parsing_obj = parsing_obj
|
||
|
||
|
||
parsed = ParseResult(
|
||
title='', artist='',
|
||
album='', year=0,
|
||
track_no=0, tracks=0,
|
||
lyrics='',
|
||
cover=None,
|
||
cover_mime=None,
|
||
)
|
||
|
||
|
||
def main() -> None:
|
||
|
||
global parsed
|
||
|
||
copy = int(sys.argv[1]) == 1
|
||
file = sys.argv[2]
|
||
|
||
title = conv_title(file)
|
||
print(
|
||
'Enter new title to correct it, '
|
||
'or press Enter to continue',
|
||
'"!--" without quotes means that '
|
||
'you want to enter info and lyrics manually',
|
||
sep='\n',
|
||
)
|
||
print('Title:', title)
|
||
correct = input().strip()
|
||
|
||
if correct == '!--':
|
||
manual_info_input()
|
||
|
||
else:
|
||
|
||
if correct != '':
|
||
title = correct.lower()
|
||
|
||
try:
|
||
url = search_azurl(title)
|
||
print(url)
|
||
parse_azlyrics(url)
|
||
|
||
except Exception as err:
|
||
|
||
print(err)
|
||
|
||
# pylint: disable=no-member
|
||
if isinstance(err, ParseError) \
|
||
and err.parsing_obj == ParseError.EDIT:
|
||
pass
|
||
# pylint: enable=no-member
|
||
|
||
else:
|
||
print(
|
||
'In most cases, this error means that '
|
||
'the script have received some incorrect data, '
|
||
'so you should enter song info manually.'
|
||
)
|
||
|
||
manual_info_input(False)
|
||
|
||
tagmp3(file, copy)
|
||
|
||
|
||
# pylint: disable=redefined-builtin
|
||
def input(msg: str = '', def_: Any = '') -> str:
|
||
|
||
subprocess.call(
|
||
(
|
||
f'read -e -r -i "{def_}" -p "{msg}" input; '
|
||
'echo -n "$input" >./input'
|
||
),
|
||
shell=True,
|
||
executable='bash',
|
||
)
|
||
|
||
try:
|
||
with open('./input', 'rt', encoding='utf-8') as f:
|
||
return f.read() \
|
||
.removesuffix('\n') \
|
||
.removesuffix('\r')
|
||
except Exception:
|
||
return def_
|
||
# pylint: enable=redefined-builtin
|
||
|
||
|
||
def input_num(msg: str, def_: int = 0) -> int:
|
||
|
||
try:
|
||
return int(input(msg, def_))
|
||
except ValueError:
|
||
return def_
|
||
|
||
|
||
def safename(value: str) -> str:
|
||
|
||
return safename_re.sub(' ', value)
|
||
|
||
|
||
def conv_title(file: str) -> str:
|
||
|
||
# Remove file path
|
||
title = file \
|
||
.replace('./convert/', '') \
|
||
.replace('./files/', '')
|
||
|
||
# Remove a YT ID and an extension
|
||
title = re.sub(
|
||
r'-{3}[\w_-]*\.[\w_-]*',
|
||
'', title,
|
||
)
|
||
|
||
# Remove "(Official Audio)"
|
||
title = re.sub(
|
||
r'\(.*\)',
|
||
'', title,
|
||
)
|
||
|
||
# underscore -> space
|
||
title = title \
|
||
.replace('_', ' ') \
|
||
.strip() \
|
||
.lower()
|
||
|
||
return title
|
||
|
||
|
||
def search_azurl(title: str) -> str:
|
||
|
||
print('Searching...')
|
||
|
||
page = session.get(
|
||
'https://searx.dc09.ru/search',
|
||
params={ # type: ignore
|
||
'q': f'{title} site:azlyrics.com',
|
||
'language': 'ru-RU',
|
||
'safesearch': 0,
|
||
},
|
||
)
|
||
|
||
soup = BeautifulSoup(page.text, 'html.parser')
|
||
link = soup.select_one(
|
||
'div#urls>article>h3>a'
|
||
'[href*="azlyrics.com/lyrics/"]'
|
||
)
|
||
|
||
if link is None:
|
||
raise ParseError('song URL')
|
||
|
||
return str(link.get('href'))
|
||
|
||
|
||
def parse_azlyrics(link: str) -> None:
|
||
|
||
global parsed
|
||
|
||
print('Please wait...')
|
||
|
||
page = session.get(link)
|
||
soup = BeautifulSoup(page.text, 'html.parser')
|
||
|
||
lyrics = soup.select_one(
|
||
f'{LYRICS_ROW}>div'
|
||
':not(.div-share)'
|
||
':not(.lyricsh)'
|
||
':not(.ringtone)'
|
||
)
|
||
if lyrics is None:
|
||
raise ParseError('song lyrics')
|
||
parsed['lyrics'] = lyrics.get_text().strip()
|
||
|
||
lyrics_file = Path('.') / 'lyrics.txt'
|
||
with lyrics_file.open('wt', encoding='utf-8') as f:
|
||
f.write(parsed['lyrics'])
|
||
|
||
title_elem = soup.select_one(f'{LYRICS_ROW}>b')
|
||
if title_elem is None:
|
||
raise ParseError('song title')
|
||
parsed['title'] = title_elem.get_text().strip('" ')
|
||
|
||
artist_elem = soup.select_one(f'{LYRICS_ROW}>.lyricsh>h2')
|
||
if artist_elem is None:
|
||
raise ParseError('artist name')
|
||
parsed['artist'] = artist_elem.get_text() \
|
||
.removesuffix(' Lyrics') \
|
||
.strip()
|
||
|
||
album_blocks = soup.select('.songinalbum_title')
|
||
album = None
|
||
|
||
if len(album_blocks) > 1:
|
||
album = album_blocks[-2]
|
||
elif len(album_blocks) > 0:
|
||
album = album_blocks[0]
|
||
else:
|
||
raise ParseError('album name')
|
||
|
||
album_re = re.search(
|
||
r'album:\s*"(.+?)"\s*\((\d+)\)',
|
||
album.get_text()
|
||
)
|
||
if album_re is None:
|
||
raise ParseError('album name')
|
||
|
||
parsed['album'] = album_re[1]
|
||
parsed['year'] = int(album_re[2])
|
||
|
||
cover = album.select_one('img.album-image')
|
||
|
||
if cover is not None:
|
||
|
||
cover_url = str(cover.get('src'))
|
||
if cover_url.startswith('/'):
|
||
cover_url = BASEURL + cover_url
|
||
|
||
req = session.get(cover_url)
|
||
parsed['cover'] = req.content
|
||
parsed['cover_mime'] = req.headers.get(
|
||
'Content-Type', 'image/jpeg'
|
||
)
|
||
|
||
tracklist_elem = soup.select_one('.songlist-panel')
|
||
if tracklist_elem is not None:
|
||
|
||
tracklist = tracklist_elem.select(
|
||
'.listalbum-item'
|
||
)
|
||
parsed['tracks'] = len(tracklist)
|
||
|
||
current_url = re.search(
|
||
r'/(lyrics/.+?\.html)',
|
||
link,
|
||
)
|
||
|
||
parsed['track_no'] = 0
|
||
if current_url is not None:
|
||
for i, track in enumerate(tracklist):
|
||
|
||
track_url = track.select_one('a')
|
||
if track_url is None:
|
||
continue
|
||
|
||
track_href = str(track_url.get('href'))
|
||
if current_url[0] in track_href:
|
||
parsed['track_no'] = (i + 1)
|
||
break
|
||
|
||
print('Succesfully parsed')
|
||
print('Title:', parsed['title'])
|
||
print('Artist:', parsed['artist'])
|
||
print('Album:', parsed['album'])
|
||
print('Track:', parsed['track_no'], '/', parsed['tracks'])
|
||
print('Correct something?')
|
||
|
||
if input('[y/N] ').lower() == 'y':
|
||
print('Raising ParseError') # <-- TODO
|
||
raise ParseError(ParseError.EDIT)
|
||
|
||
print()
|
||
|
||
|
||
def manual_info_input(overwrite_lyrics: bool = True) -> None:
|
||
|
||
global parsed
|
||
|
||
parsed['title'] = input('Song title: ', parsed['title'])
|
||
parsed['artist'] = input('Artist name: ', parsed['artist'])
|
||
parsed['album'] = input('Album name: ', parsed['album'])
|
||
parsed['year'] = input_num('Release year: ', parsed['year'])
|
||
parsed['track_no'] = input_num('Track #', parsed['track_no'])
|
||
parsed['tracks'] = input_num('Tracks in album: ', parsed['tracks'])
|
||
|
||
editor = os.getenv('EDITOR', 'nano')
|
||
print('Now, paste the lyrics into a text editor')
|
||
print(f'Default editor: {editor}')
|
||
print('Enter another or press Enter to continue')
|
||
other_editor = input().strip()
|
||
|
||
if other_editor != '':
|
||
editor = other_editor
|
||
|
||
try:
|
||
lyrics_file = Path('.') / 'lyrics.txt'
|
||
|
||
if overwrite_lyrics or not lyrics_file.exists():
|
||
with lyrics_file.open('wt') as f:
|
||
f.write('\n')
|
||
|
||
subprocess.call([
|
||
editor,
|
||
lyrics_file,
|
||
])
|
||
|
||
print('Reading file...')
|
||
with open('lyrics.txt', 'rt', encoding='utf-8') as f:
|
||
parsed['lyrics'] = f.read().strip()
|
||
print('Done')
|
||
|
||
except OSError as err:
|
||
logging.exception(err)
|
||
|
||
cover = input('Insert an album cover? [Y/n] ')
|
||
if cover.lower() != 'n':
|
||
try:
|
||
print(
|
||
'Download the cover and enter its path:',
|
||
'(relative path is not recommended)',
|
||
sep='\n',
|
||
)
|
||
cover_file = Path(input().strip())
|
||
|
||
with cover_file.open('rb') as f:
|
||
parsed['cover'] = f.read()
|
||
|
||
parsed['cover_mime'] = (
|
||
mimetypes.guess_type(cover_file)[0]
|
||
or 'image/jpeg'
|
||
)
|
||
except Exception as err:
|
||
logging.exception(err)
|
||
|
||
print()
|
||
|
||
|
||
def tagmp3(
|
||
file: str,
|
||
copy: bool) -> None:
|
||
|
||
global parsed
|
||
|
||
oldpath = Path(file)
|
||
newpath = oldpath
|
||
|
||
if copy:
|
||
|
||
newdir = (
|
||
Path('./tagged') /
|
||
safename(parsed['artist']) /
|
||
safename(parsed['album'])
|
||
)
|
||
os.makedirs(newdir, exist_ok=True)
|
||
|
||
newpath = newdir / safename(
|
||
f"{parsed['track_no']}. " +
|
||
f"{parsed['title']}.mp3"
|
||
)
|
||
shutil.copy(oldpath, newpath)
|
||
|
||
if parsed['cover'] is not None:
|
||
|
||
ext = mimetypes.guess_extension(
|
||
parsed['cover_mime'] or ''
|
||
) or '.jpg'
|
||
|
||
cover = newdir / f'cover{ext}'
|
||
with cover.open('wb') as f:
|
||
f.write(parsed['cover'])
|
||
|
||
id3 = ID3(str(newpath))
|
||
id3['TPE1'] = TPE1(text=parsed['artist'])
|
||
id3['TIT2'] = TIT2(text=parsed['title'])
|
||
id3['TALB'] = TALB(text=parsed['album'])
|
||
id3['TYER'] = TYER(text=f"{parsed['year']}")
|
||
id3['TRCK'] = TRCK(
|
||
text=(
|
||
f"{parsed['track_no']}/"
|
||
f"{parsed['tracks']}"
|
||
)
|
||
)
|
||
id3['USLT'] = USLT(text=parsed['lyrics'])
|
||
if parsed['cover'] is not None:
|
||
id3['APIC'] = APIC(
|
||
data=parsed['cover'],
|
||
mime=parsed['cover_mime'],
|
||
)
|
||
id3.save()
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|