This repository has been archived on 2024-04-27. You can view files and clone it, but cannot push or open issues or pull requests.
autoytdlp/.id3tag_helper.py

444 lines
10 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import os
import sys
import shutil
from pathlib import Path
import logging
import mimetypes
import subprocess
from typing import TypedDict
from typing import Optional, Any
import re
import requests
from bs4 import BeautifulSoup # type: ignore
from mutagen.id3 import ID3 # type: ignore
from mutagen.id3 import TPE1, TIT2, TALB
from mutagen.id3 import TYER, TRCK
from mutagen.id3 import USLT, APIC
BASEURL = 'https://www.azlyrics.com'
USERAGENT = (
'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) '
'Gecko/20100101 Firefox/110.0'
)
LYRICS_ROW = '.main-page>.row>.col-xs-12'
2023-02-07 18:49:22 +03:00
safename_re = re.compile(
r'[^A-Za-z0-9А-ЯЁа-яё \'".,()\[\]&!#$@_~=*+-]'
)
session = requests.Session()
session.headers['User-Agent'] = USERAGENT
class ParseResult(TypedDict):
title: str
artist: str
album: str
year: int
track_no: int
tracks: int
lyrics: str
cover: Optional[bytes]
cover_mime: Optional[str]
class ParseError(Exception):
EDIT = 'edit'
def __init__(self, parsing_obj: str) -> None:
super().__init__(
f'Unable to parse {parsing_obj}'
)
self.parsing_obj = parsing_obj
parsed = ParseResult(
title='', artist='',
album='', year=0,
track_no=0, tracks=0,
lyrics='',
cover=None,
cover_mime=None,
)
def main() -> None:
global parsed
copy = int(sys.argv[1]) == 1
file = sys.argv[2]
title = conv_title(file)
print(
'Enter new title to correct it, '
'or press Enter to continue',
'"!--" without quotes means that '
'you want to enter info and lyrics manually',
sep='\n',
)
print('Title:', title)
correct = input().strip()
if correct == '!--':
manual_info_input()
else:
if correct != '':
title = correct.lower()
2023-02-07 19:52:08 +03:00
try:
url = search_azurl(title)
print(url)
parse_azlyrics(url)
2023-02-07 19:52:08 +03:00
except Exception as err:
2023-02-07 19:52:08 +03:00
print(err)
# pylint: disable=no-member
if isinstance(err, ParseError) \
and err.parsing_obj == ParseError.EDIT:
pass
# pylint: enable=no-member
else:
print(
'In most cases, this error means that '
'the script have received some incorrect data, '
'so you should enter song info manually.'
)
manual_info_input(False)
tagmp3(file, copy)
# pylint: disable=redefined-builtin
def input(msg: str = '', def_: Any = '') -> str:
subprocess.call(
(
f'read -e -r -i "{def_}" -p "{msg}" input; '
'echo -n "$input" >./input'
),
shell=True,
executable='bash',
)
try:
with open('./input', 'rt', encoding='utf-8') as f:
return f.read() \
.removesuffix('\n') \
.removesuffix('\r')
except Exception:
return def_
# pylint: enable=redefined-builtin
def input_num(msg: str, def_: int = 0) -> int:
try:
return int(input(msg, def_))
except ValueError:
return def_
2023-02-07 18:49:22 +03:00
def safename(value: str) -> str:
return safename_re.sub(' ', value)
def conv_title(file: str) -> str:
# Remove file path
title = file \
.replace('./convert/', '') \
.replace('./files/', '')
# Remove a YT ID and an extension
title = re.sub(
r'-{3}[\w_-]*\.[\w_-]*',
'', title,
)
# Remove "(Official Audio)"
title = re.sub(
r'\(.*\)',
'', title,
)
# underscore -> space
title = title \
.replace('_', ' ') \
.strip() \
.lower()
return title
def search_azurl(title: str) -> str:
print('Searching...')
page = session.get(
'https://searx.dc09.ru/search',
params={ # type: ignore
'q': f'{title} site:azlyrics.com',
'language': 'ru-RU',
'safesearch': 0,
},
)
soup = BeautifulSoup(page.text, 'html.parser')
link = soup.select_one(
'div#urls>article>h3>a'
'[href*="azlyrics.com/lyrics/"]'
)
if link is None:
raise ParseError('song URL')
return str(link.get('href'))
def parse_azlyrics(link: str) -> None:
global parsed
print('Please wait...')
page = session.get(link)
soup = BeautifulSoup(page.text, 'html.parser')
lyrics = soup.select_one(
f'{LYRICS_ROW}>div'
':not(.div-share)'
':not(.lyricsh)'
':not(.ringtone)'
)
if lyrics is None:
raise ParseError('song lyrics')
parsed['lyrics'] = lyrics.get_text().strip()
lyrics_file = Path('.') / 'lyrics.txt'
with lyrics_file.open('wt', encoding='utf-8') as f:
f.write(parsed['lyrics'])
title_elem = soup.select_one(f'{LYRICS_ROW}>b')
if title_elem is None:
raise ParseError('song title')
parsed['title'] = title_elem.get_text().strip('" ')
artist_elem = soup.select_one(f'{LYRICS_ROW}>.lyricsh>h2')
if artist_elem is None:
2023-02-07 19:52:08 +03:00
raise ParseError('artist name')
parsed['artist'] = artist_elem.get_text() \
2023-02-07 19:52:08 +03:00
.removesuffix(' Lyrics') \
.strip()
album_blocks = soup.select('.songinalbum_title')
album = None
if len(album_blocks) > 1:
album = album_blocks[-2]
elif len(album_blocks) > 0:
album = album_blocks[0]
else:
2023-02-07 19:52:08 +03:00
raise ParseError('album name')
2023-02-07 19:52:08 +03:00
album_re = re.search(
r'album:\s*"(.+?)"\s*\((\d+)\)',
album.get_text()
)
if album_re is None:
2023-02-07 19:52:08 +03:00
raise ParseError('album name')
parsed['album'] = album_re[1]
parsed['year'] = int(album_re[2])
2023-02-07 19:52:08 +03:00
cover = album.select_one('img.album-image')
2023-02-07 19:52:08 +03:00
if cover is not None:
2023-02-07 19:52:08 +03:00
cover_url = str(cover.get('src'))
if cover_url.startswith('/'):
cover_url = BASEURL + cover_url
2023-02-07 19:52:08 +03:00
req = session.get(cover_url)
parsed['cover'] = req.content
parsed['cover_mime'] = req.headers.get(
2023-02-07 19:52:08 +03:00
'Content-Type', 'image/jpeg'
)
2023-02-07 19:52:08 +03:00
tracklist_elem = soup.select_one('.songlist-panel')
if tracklist_elem is not None:
2023-02-07 19:52:08 +03:00
tracklist = tracklist_elem.select(
'.listalbum-item'
)
parsed['tracks'] = len(tracklist)
2023-02-07 19:52:08 +03:00
current_url = re.search(
r'/(lyrics/.+?\.html)',
link,
)
parsed['track_no'] = 0
2023-02-07 19:52:08 +03:00
if current_url is not None:
for i, track in enumerate(tracklist):
2023-02-07 19:52:08 +03:00
track_url = track.select_one('a')
if track_url is None:
continue
2023-02-07 19:52:08 +03:00
track_href = str(track_url.get('href'))
if current_url[0] in track_href:
parsed['track_no'] = (i + 1)
2023-02-07 19:52:08 +03:00
break
print('Succesfully parsed')
print('Title:', parsed['title'])
print('Artist:', parsed['artist'])
print('Album:', parsed['album'])
print('Track:', parsed['track_no'], '/', parsed['tracks'])
print('Correct something?')
if input('[y/N] ').lower == 'y':
raise ParseError(ParseError.EDIT)
print()
def manual_info_input(overwrite_lyrics: bool = True) -> None:
global parsed
parsed['title'] = input('Song title: ', parsed['title'])
parsed['artist'] = input('Artist name: ', parsed['artist'])
parsed['album'] = input('Album name: ', parsed['album'])
parsed['year'] = input_num('Release year: ', parsed['year'])
parsed['track_no'] = input_num('Track #', parsed['track_no'])
parsed['tracks'] = input_num('Tracks in album: ', parsed['tracks'])
editor = os.getenv('EDITOR', 'nano')
print('Now, paste the lyrics into a text editor')
print(f'Default editor: {editor}')
print('Enter another or press Enter to continue')
other_editor = input().strip()
if other_editor != '':
editor = other_editor
try:
lyrics_file = Path('.') / 'lyrics.txt'
if overwrite_lyrics or not lyrics_file.exists():
with lyrics_file.open('wt') as f:
f.write('\n')
subprocess.call([
editor,
lyrics_file,
])
print('Reading file...')
with open('lyrics.txt', 'rt', encoding='utf-8') as f:
parsed['lyrics'] = f.read().strip()
print('Done')
except OSError as err:
logging.exception(err)
cover = input('Insert an album cover? [Y/n] ')
if cover.lower() != 'n':
try:
print(
'Download the cover and enter its path:',
'(relative path is not recommended)',
sep='\n',
)
cover_file = Path(input().strip())
with cover_file.open('rb') as f:
parsed['cover'] = f.read()
parsed['cover_mime'] = (
mimetypes.guess_type(cover_file)[0]
or 'image/jpeg'
)
except Exception as err:
logging.exception(err)
print()
def tagmp3(
file: str,
copy: bool) -> None:
global parsed
oldpath = Path(file)
newpath = oldpath
if copy:
newdir = (
Path('./tagged') /
2023-02-07 18:49:22 +03:00
safename(parsed['artist']) /
safename(parsed['album'])
)
os.makedirs(newdir, exist_ok=True)
2023-02-07 18:49:22 +03:00
newpath = newdir / safename(
f"{parsed['track_no']}. " +
f"{parsed['title']}.mp3"
)
shutil.copy(oldpath, newpath)
if parsed['cover'] is not None:
ext = mimetypes.guess_extension(
parsed['cover_mime'] or ''
) or '.jpg'
cover = newdir / f'cover{ext}'
with cover.open('wb') as f:
f.write(parsed['cover'])
id3 = ID3(str(newpath))
id3['TPE1'] = TPE1(text=parsed['artist'])
id3['TIT2'] = TIT2(text=parsed['title'])
id3['TALB'] = TALB(text=parsed['album'])
id3['TYER'] = TYER(text=f"{parsed['year']}")
id3['TRCK'] = TRCK(
text=(
f"{parsed['track_no']}/"
f"{parsed['tracks']}"
)
)
id3['USLT'] = USLT(text=parsed['lyrics'])
if parsed['cover'] is not None:
id3['APIC'] = APIC(
data=parsed['cover'],
mime=parsed['cover_mime'],
)
id3.save()
if __name__ == '__main__':
main()