2023-02-03 19:59:13 +03:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import shutil
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
|
|
|
import mimetypes
|
|
|
|
import subprocess
|
|
|
|
|
|
|
|
from typing import TypedDict
|
2023-02-08 13:07:51 +03:00
|
|
|
from typing import Optional, Any
|
2023-02-03 19:59:13 +03:00
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
import requests
|
2023-02-08 14:00:20 +03:00
|
|
|
from bs4 import BeautifulSoup # type: ignore
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-08 14:00:20 +03:00
|
|
|
from mutagen.id3 import ID3 # type: ignore
|
2023-02-03 19:59:13 +03:00
|
|
|
from mutagen.id3 import TPE1, TIT2, TALB
|
|
|
|
from mutagen.id3 import TYER, TRCK
|
|
|
|
from mutagen.id3 import USLT, APIC
|
|
|
|
|
|
|
|
BASEURL = 'https://www.azlyrics.com'
|
|
|
|
USERAGENT = (
|
|
|
|
'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) '
|
|
|
|
'Gecko/20100101 Firefox/110.0'
|
|
|
|
)
|
|
|
|
|
|
|
|
LYRICS_ROW = '.main-page>.row>.col-xs-12'
|
|
|
|
|
2023-02-07 18:49:22 +03:00
|
|
|
safename_re = re.compile(
|
|
|
|
r'[^A-Za-z0-9А-ЯЁа-яё \'".,()\[\]&!#$@_~=*+-]'
|
|
|
|
)
|
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
session = requests.Session()
|
|
|
|
session.headers['User-Agent'] = USERAGENT
|
|
|
|
|
|
|
|
|
|
|
|
class ParseResult(TypedDict):
|
|
|
|
title: str
|
|
|
|
artist: str
|
|
|
|
album: str
|
|
|
|
year: int
|
|
|
|
track_no: int
|
|
|
|
tracks: int
|
|
|
|
lyrics: str
|
|
|
|
cover: Optional[bytes]
|
|
|
|
cover_mime: Optional[str]
|
|
|
|
|
|
|
|
|
|
|
|
class ParseError(Exception):
|
|
|
|
|
2023-02-08 14:00:20 +03:00
|
|
|
EDIT = 'edit'
|
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
def __init__(self, parsing_obj: str) -> None:
|
|
|
|
|
|
|
|
super().__init__(
|
|
|
|
f'Unable to parse {parsing_obj}'
|
|
|
|
)
|
2023-02-08 13:07:51 +03:00
|
|
|
self.parsing_obj = parsing_obj
|
2023-02-03 19:59:13 +03:00
|
|
|
|
|
|
|
|
2023-02-08 14:00:20 +03:00
|
|
|
parsed = ParseResult(
|
|
|
|
title='', artist='',
|
|
|
|
album='', year=0,
|
|
|
|
track_no=0, tracks=0,
|
|
|
|
lyrics='',
|
|
|
|
cover=None,
|
|
|
|
cover_mime=None,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
def main() -> None:
|
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
global parsed
|
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
copy = int(sys.argv[1]) == 1
|
|
|
|
file = sys.argv[2]
|
|
|
|
|
|
|
|
title = conv_title(file)
|
|
|
|
print(
|
|
|
|
'Enter new title to correct it, '
|
|
|
|
'or press Enter to continue',
|
|
|
|
'"!--" without quotes means that '
|
|
|
|
'you want to enter info and lyrics manually',
|
|
|
|
sep='\n',
|
|
|
|
)
|
|
|
|
print('Title:', title)
|
|
|
|
correct = input().strip()
|
|
|
|
|
|
|
|
if correct == '!--':
|
2023-02-08 13:07:51 +03:00
|
|
|
manual_info_input()
|
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
else:
|
2023-02-08 13:07:51 +03:00
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
if correct != '':
|
|
|
|
title = correct.lower()
|
2023-02-08 13:07:51 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
try:
|
|
|
|
url = search_azurl(title)
|
|
|
|
print(url)
|
2023-02-08 13:07:51 +03:00
|
|
|
parse_azlyrics(url)
|
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
except Exception as err:
|
2023-02-08 13:07:51 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
print(err)
|
2023-02-08 13:07:51 +03:00
|
|
|
|
2023-02-08 14:00:20 +03:00
|
|
|
# pylint: disable=no-member
|
2023-02-08 13:07:51 +03:00
|
|
|
if isinstance(err, ParseError) \
|
2023-02-08 14:00:20 +03:00
|
|
|
and err.parsing_obj == ParseError.EDIT:
|
2023-02-08 13:07:51 +03:00
|
|
|
pass
|
2023-02-08 14:00:20 +03:00
|
|
|
# pylint: enable=no-member
|
2023-02-08 13:07:51 +03:00
|
|
|
|
|
|
|
else:
|
|
|
|
print(
|
|
|
|
'In most cases, this error means that '
|
|
|
|
'the script have received some incorrect data, '
|
|
|
|
'so you should enter song info manually.'
|
|
|
|
)
|
|
|
|
|
|
|
|
manual_info_input(False)
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-08 14:00:20 +03:00
|
|
|
tagmp3(file, copy)
|
2023-02-03 19:59:13 +03:00
|
|
|
|
|
|
|
|
2023-02-08 14:00:20 +03:00
|
|
|
# pylint: disable=redefined-builtin
|
2023-02-08 13:07:51 +03:00
|
|
|
def input(msg: str = '', def_: Any = '') -> str:
|
|
|
|
|
|
|
|
subprocess.call(
|
|
|
|
(
|
|
|
|
f'read -e -r -i "{def_}" -p "{msg}" input; '
|
|
|
|
'echo -n "$input" >./input'
|
|
|
|
),
|
|
|
|
shell=True,
|
|
|
|
executable='bash',
|
|
|
|
)
|
|
|
|
|
|
|
|
try:
|
|
|
|
with open('./input', 'rt', encoding='utf-8') as f:
|
|
|
|
return f.read() \
|
|
|
|
.removesuffix('\n') \
|
|
|
|
.removesuffix('\r')
|
|
|
|
except Exception:
|
|
|
|
return def_
|
2023-02-08 14:00:20 +03:00
|
|
|
# pylint: enable=redefined-builtin
|
2023-02-08 13:07:51 +03:00
|
|
|
|
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
def input_num(msg: str, def_: int = 0) -> int:
|
|
|
|
|
|
|
|
try:
|
2023-02-08 13:07:51 +03:00
|
|
|
return int(input(msg, def_))
|
2023-02-03 19:59:13 +03:00
|
|
|
except ValueError:
|
|
|
|
return def_
|
|
|
|
|
|
|
|
|
2023-02-07 18:49:22 +03:00
|
|
|
def safename(value: str) -> str:
|
|
|
|
|
|
|
|
return safename_re.sub(' ', value)
|
|
|
|
|
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
def conv_title(file: str) -> str:
|
|
|
|
|
|
|
|
# Remove file path
|
|
|
|
title = file \
|
|
|
|
.replace('./convert/', '') \
|
|
|
|
.replace('./files/', '')
|
|
|
|
|
|
|
|
# Remove a YT ID and an extension
|
|
|
|
title = re.sub(
|
|
|
|
r'-{3}[\w_-]*\.[\w_-]*',
|
|
|
|
'', title,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Remove "(Official Audio)"
|
|
|
|
title = re.sub(
|
|
|
|
r'\(.*\)',
|
|
|
|
'', title,
|
|
|
|
)
|
|
|
|
|
|
|
|
# underscore -> space
|
|
|
|
title = title \
|
|
|
|
.replace('_', ' ') \
|
|
|
|
.strip() \
|
|
|
|
.lower()
|
|
|
|
|
|
|
|
return title
|
|
|
|
|
|
|
|
|
|
|
|
def search_azurl(title: str) -> str:
|
|
|
|
|
|
|
|
print('Searching...')
|
2023-02-08 14:00:20 +03:00
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
page = session.get(
|
|
|
|
'https://searx.dc09.ru/search',
|
2023-02-08 14:00:20 +03:00
|
|
|
params={ # type: ignore
|
2023-02-03 19:59:13 +03:00
|
|
|
'q': f'{title} site:azlyrics.com',
|
|
|
|
'language': 'ru-RU',
|
|
|
|
'safesearch': 0,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
soup = BeautifulSoup(page.text, 'html.parser')
|
|
|
|
link = soup.select_one(
|
2023-02-08 13:07:51 +03:00
|
|
|
'div#urls>article>h3>a'
|
|
|
|
'[href*="azlyrics.com/lyrics/"]'
|
2023-02-03 19:59:13 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
if link is None:
|
|
|
|
raise ParseError('song URL')
|
2023-02-08 14:00:20 +03:00
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
return str(link.get('href'))
|
|
|
|
|
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
def parse_azlyrics(link: str) -> None:
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
global parsed
|
2023-02-03 19:59:13 +03:00
|
|
|
|
|
|
|
print('Please wait...')
|
|
|
|
|
|
|
|
page = session.get(link)
|
|
|
|
soup = BeautifulSoup(page.text, 'html.parser')
|
2023-02-08 14:00:20 +03:00
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
lyrics = soup.select_one(
|
|
|
|
f'{LYRICS_ROW}>div'
|
|
|
|
':not(.div-share)'
|
|
|
|
':not(.lyricsh)'
|
|
|
|
':not(.ringtone)'
|
|
|
|
)
|
|
|
|
if lyrics is None:
|
|
|
|
raise ParseError('song lyrics')
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['lyrics'] = lyrics.get_text().strip()
|
|
|
|
|
|
|
|
lyrics_file = Path('.') / 'lyrics.txt'
|
|
|
|
with lyrics_file.open('wt', encoding='utf-8') as f:
|
|
|
|
f.write(parsed['lyrics'])
|
|
|
|
|
|
|
|
title_elem = soup.select_one(f'{LYRICS_ROW}>b')
|
|
|
|
if title_elem is None:
|
|
|
|
raise ParseError('song title')
|
|
|
|
parsed['title'] = title_elem.get_text().strip('" ')
|
2023-02-03 19:59:13 +03:00
|
|
|
|
|
|
|
artist_elem = soup.select_one(f'{LYRICS_ROW}>.lyricsh>h2')
|
|
|
|
if artist_elem is None:
|
2023-02-07 19:52:08 +03:00
|
|
|
raise ParseError('artist name')
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['artist'] = artist_elem.get_text() \
|
2023-02-07 19:52:08 +03:00
|
|
|
.removesuffix(' Lyrics') \
|
|
|
|
.strip()
|
2023-02-03 19:59:13 +03:00
|
|
|
|
|
|
|
album_blocks = soup.select('.songinalbum_title')
|
|
|
|
album = None
|
|
|
|
|
|
|
|
if len(album_blocks) > 1:
|
|
|
|
album = album_blocks[-2]
|
|
|
|
elif len(album_blocks) > 0:
|
|
|
|
album = album_blocks[0]
|
|
|
|
else:
|
2023-02-07 19:52:08 +03:00
|
|
|
raise ParseError('album name')
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
album_re = re.search(
|
|
|
|
r'album:\s*"(.+?)"\s*\((\d+)\)',
|
|
|
|
album.get_text()
|
|
|
|
)
|
2023-02-03 19:59:13 +03:00
|
|
|
if album_re is None:
|
2023-02-07 19:52:08 +03:00
|
|
|
raise ParseError('album name')
|
2023-02-08 14:00:20 +03:00
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['album'] = album_re[1]
|
|
|
|
parsed['year'] = int(album_re[2])
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
cover = album.select_one('img.album-image')
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
if cover is not None:
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
cover_url = str(cover.get('src'))
|
|
|
|
if cover_url.startswith('/'):
|
|
|
|
cover_url = BASEURL + cover_url
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
req = session.get(cover_url)
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['cover'] = req.content
|
|
|
|
parsed['cover_mime'] = req.headers.get(
|
2023-02-07 19:52:08 +03:00
|
|
|
'Content-Type', 'image/jpeg'
|
|
|
|
)
|
2023-02-08 14:00:20 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
tracklist_elem = soup.select_one('.songlist-panel')
|
|
|
|
if tracklist_elem is not None:
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
tracklist = tracklist_elem.select(
|
|
|
|
'.listalbum-item'
|
|
|
|
)
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['tracks'] = len(tracklist)
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
current_url = re.search(
|
|
|
|
r'/(lyrics/.+?\.html)',
|
|
|
|
link,
|
|
|
|
)
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['track_no'] = 0
|
2023-02-07 19:52:08 +03:00
|
|
|
if current_url is not None:
|
|
|
|
for i, track in enumerate(tracklist):
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
track_url = track.select_one('a')
|
|
|
|
if track_url is None:
|
|
|
|
continue
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-07 19:52:08 +03:00
|
|
|
track_href = str(track_url.get('href'))
|
|
|
|
if current_url[0] in track_href:
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['track_no'] = (i + 1)
|
2023-02-07 19:52:08 +03:00
|
|
|
break
|
2023-02-08 14:00:20 +03:00
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
print('Succesfully parsed')
|
|
|
|
print('Title:', parsed['title'])
|
|
|
|
print('Artist:', parsed['artist'])
|
|
|
|
print('Album:', parsed['album'])
|
|
|
|
print('Track:', parsed['track_no'], '/', parsed['tracks'])
|
|
|
|
print('Correct something?')
|
|
|
|
|
|
|
|
if input('[y/N] ').lower == 'y':
|
2023-02-08 14:00:20 +03:00
|
|
|
raise ParseError(ParseError.EDIT)
|
|
|
|
|
|
|
|
print()
|
2023-02-03 19:59:13 +03:00
|
|
|
|
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
def manual_info_input(overwrite_lyrics: bool = True) -> None:
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
global parsed
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['title'] = input('Song title: ', parsed['title'])
|
|
|
|
parsed['artist'] = input('Artist name: ', parsed['artist'])
|
|
|
|
parsed['album'] = input('Album name: ', parsed['album'])
|
|
|
|
parsed['year'] = input_num('Release year: ', parsed['year'])
|
|
|
|
parsed['track_no'] = input_num('Track #', parsed['track_no'])
|
|
|
|
parsed['tracks'] = input_num('Tracks in album: ', parsed['tracks'])
|
2023-02-03 19:59:13 +03:00
|
|
|
|
|
|
|
editor = os.getenv('EDITOR', 'nano')
|
|
|
|
print('Now, paste the lyrics into a text editor')
|
|
|
|
print(f'Default editor: {editor}')
|
|
|
|
print('Enter another or press Enter to continue')
|
|
|
|
other_editor = input().strip()
|
|
|
|
|
|
|
|
if other_editor != '':
|
|
|
|
editor = other_editor
|
|
|
|
|
|
|
|
try:
|
|
|
|
lyrics_file = Path('.') / 'lyrics.txt'
|
2023-02-08 13:07:51 +03:00
|
|
|
|
|
|
|
if overwrite_lyrics or not lyrics_file.exists():
|
|
|
|
with lyrics_file.open('wt') as f:
|
|
|
|
f.write('\n')
|
2023-02-03 19:59:13 +03:00
|
|
|
|
|
|
|
subprocess.call([
|
|
|
|
editor,
|
|
|
|
lyrics_file,
|
|
|
|
])
|
|
|
|
|
|
|
|
print('Reading file...')
|
|
|
|
with open('lyrics.txt', 'rt', encoding='utf-8') as f:
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['lyrics'] = f.read().strip()
|
2023-02-03 19:59:13 +03:00
|
|
|
print('Done')
|
|
|
|
|
|
|
|
except OSError as err:
|
|
|
|
logging.exception(err)
|
|
|
|
|
|
|
|
cover = input('Insert an album cover? [Y/n] ')
|
2023-02-08 13:07:51 +03:00
|
|
|
if cover.lower() != 'n':
|
2023-02-03 19:59:13 +03:00
|
|
|
try:
|
|
|
|
print(
|
|
|
|
'Download the cover and enter its path:',
|
|
|
|
'(relative path is not recommended)',
|
|
|
|
sep='\n',
|
|
|
|
)
|
|
|
|
cover_file = Path(input().strip())
|
|
|
|
|
|
|
|
with cover_file.open('rb') as f:
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['cover'] = f.read()
|
2023-02-03 19:59:13 +03:00
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
parsed['cover_mime'] = (
|
2023-02-03 19:59:13 +03:00
|
|
|
mimetypes.guess_type(cover_file)[0]
|
|
|
|
or 'image/jpeg'
|
|
|
|
)
|
|
|
|
except Exception as err:
|
|
|
|
logging.exception(err)
|
2023-02-08 14:00:20 +03:00
|
|
|
|
2023-02-08 13:07:51 +03:00
|
|
|
print()
|
2023-02-03 19:59:13 +03:00
|
|
|
|
|
|
|
|
|
|
|
def tagmp3(
|
|
|
|
file: str,
|
|
|
|
copy: bool) -> None:
|
|
|
|
|
2023-02-08 14:00:20 +03:00
|
|
|
global parsed
|
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
oldpath = Path(file)
|
|
|
|
newpath = oldpath
|
2023-02-08 14:00:20 +03:00
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
if copy:
|
|
|
|
|
|
|
|
newdir = (
|
|
|
|
Path('./tagged') /
|
2023-02-07 18:49:22 +03:00
|
|
|
safename(parsed['artist']) /
|
|
|
|
safename(parsed['album'])
|
2023-02-03 19:59:13 +03:00
|
|
|
)
|
|
|
|
os.makedirs(newdir, exist_ok=True)
|
|
|
|
|
2023-02-07 18:49:22 +03:00
|
|
|
newpath = newdir / safename(
|
2023-02-03 19:59:13 +03:00
|
|
|
f"{parsed['track_no']}. " +
|
|
|
|
f"{parsed['title']}.mp3"
|
|
|
|
)
|
|
|
|
shutil.copy(oldpath, newpath)
|
|
|
|
|
|
|
|
if parsed['cover'] is not None:
|
|
|
|
|
|
|
|
ext = mimetypes.guess_extension(
|
|
|
|
parsed['cover_mime'] or ''
|
|
|
|
) or '.jpg'
|
|
|
|
|
|
|
|
cover = newdir / f'cover{ext}'
|
|
|
|
with cover.open('wb') as f:
|
|
|
|
f.write(parsed['cover'])
|
2023-02-08 14:00:20 +03:00
|
|
|
|
2023-02-03 19:59:13 +03:00
|
|
|
id3 = ID3(str(newpath))
|
|
|
|
id3['TPE1'] = TPE1(text=parsed['artist'])
|
|
|
|
id3['TIT2'] = TIT2(text=parsed['title'])
|
|
|
|
id3['TALB'] = TALB(text=parsed['album'])
|
|
|
|
id3['TYER'] = TYER(text=f"{parsed['year']}")
|
|
|
|
id3['TRCK'] = TRCK(
|
|
|
|
text=(
|
|
|
|
f"{parsed['track_no']}/"
|
|
|
|
f"{parsed['tracks']}"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
id3['USLT'] = USLT(text=parsed['lyrics'])
|
|
|
|
if parsed['cover'] is not None:
|
|
|
|
id3['APIC'] = APIC(
|
|
|
|
data=parsed['cover'],
|
|
|
|
mime=parsed['cover_mime'],
|
|
|
|
)
|
|
|
|
id3.save()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|