From 435b4f637d91a8af274c68b825923ef3bf6ca28e Mon Sep 17 00:00:00 2001 From: DarkCat09 Date: Fri, 8 Apr 2022 16:22:30 +0400 Subject: [PATCH] First commit --- .gitignore | 134 ++--------------------------------------- README.md | 0 knigavuhe/__init__.py | 46 ++++++++++++++ knigavuhe/kvubook.py | 114 +++++++++++++++++++++++++++++++++++ knigavuhe/kvucol.py | 23 +++++++ knigavuhe/kvuconst.py | 2 + knigavuhe/kvusearch.py | 115 +++++++++++++++++++++++++++++++++++ pyproject.toml | 6 ++ requirements.txt | 2 + setup.py | 32 ++++++++++ tests/search.py | 6 ++ xpath.txt | 34 +++++++++++ 12 files changed, 386 insertions(+), 128 deletions(-) create mode 100644 README.md create mode 100644 knigavuhe/__init__.py create mode 100644 knigavuhe/kvubook.py create mode 100644 knigavuhe/kvucol.py create mode 100644 knigavuhe/kvuconst.py create mode 100644 knigavuhe/kvusearch.py create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 tests/search.py create mode 100644 xpath.txt diff --git a/.gitignore b/.gitignore index b6e4761..22c3ba1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,129 +1,7 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class +# Python +__pycache__ +dist/* +*.egg-info/* -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ +# Vim +*.swp diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/knigavuhe/__init__.py b/knigavuhe/__init__.py new file mode 100644 index 0000000..597a01f --- /dev/null +++ b/knigavuhe/__init__.py @@ -0,0 +1,46 @@ +import requests +from typing import Optional + +from .kvusearch import Search +from .kvuconst import REQUA + +__all__ = ['REQUA', 'Client', 'kvubook', 'kvucol', 'kvusearch'] + +class Client: + + def __init__(self, session:Optional[requests.Session]=None) -> None: + + self.session = session if session else requests.Session() + self.session.headers.update({'User-Agent': REQUA}) + + @classmethod + def from_credentials(cls, login:str, password:str): + + session = requests.Session() + session.headers.update({'User-Agent': REQUA}) + session.post( + 'https://knigavuhe.org/login/', + data={ + 'email': login, + 'password': password, + 'token': '0a8fb778ee0cb5bf7e56' + } + ).raise_for_status() + + if not 'auth' in session.cookies: + raise ValueError('Check your username/email and password') + + return cls(session) + + @classmethod + def from_cookies(cls, auth:str): + + session = requests.Session() + session.headers.update({'User-Agent': REQUA}) + session.cookies.set('auth', auth) + + return cls(session) + + def search(self, query:str, limit:int=30, page:int=1): + + return Search(self.session, query, limit, page) diff --git a/knigavuhe/kvubook.py b/knigavuhe/kvubook.py new file mode 100644 index 0000000..fa6bab0 --- /dev/null +++ b/knigavuhe/kvubook.py @@ -0,0 +1,114 @@ +import re +import datetime +import requests +import lxml.html + +from .kvucol import Collection +from .kvuconst import BASEURL + +class Book: + + def __init__( + self, + session:requests.Session, + url:str) -> None: + + if url.startswith('http'): + # absolute link + self.url = url + else: + # relative link + self.url = BASEURL + url + + self.id = 0 + self.cover = '' + self.title = '' + self.author = None + self.reader = None + self.genre = None + self.description = '' + self.duration = 0 + self.added = '' + self.likes = 0 + self.dislikes = 0 + self.favs = 0 + self.views = 0 + + self.session = session + + def fetch(self) -> None: + + page = requests.get(self.url) + tree = lxml.html.fromstring(page.content) + + info = tree.xpath('//div[@class="book_left_blocks"]')[0] + acts = tree.xpath('//div[@class="book_right_blocks"]')[0] + + genreurl = info.xpath('//div[@class="book_genre_pretitle"]/a/@href')[0] + self.genre = Collection(genreurl, self.session) + + pagetitle = info.xpath('//div[@class="page_title"]/h1')[0] + self.title = pagetitle.xpath('./span[@itemprop="name"]')[0]\ + .text_content().strip() + + authorurl = pagetitle.xpath('.//span[@itemprop="author"]/a/@href')[0] + self.author = Collection(authorurl, self.session) + + readerurl = pagetitle.xpath('.//a[starts-with(@href,"/reader")]/@href')[0] + self.reader = Collection(readerurl, self.session) + + self.cover = info.xpath('//div[@class="book_cover"]/img/@src')[0] + self.id = re.search(r'/covers/(\d+)/', self.cover).group(1) + + block = info.xpath('//div[@class="book_blue_block]') + labels = block.xpath('./div[not(@class="-is_invis")]') + + # tags which should be removed + tagdel = [ + 'Время звучания:', + 'Добавлена:' + ] + time = labels[0].text_content() + time = time.replace(tagdel[0], '') + + h, m, s = time.strip().split(':') + self.duration = int(s) + int(m)*60 + int(h)*3600 + + added = labels[1].text_content() + added = added.replace(tagdel[1], '') + self.added = self.convert_date(added) + + descr = info.xpath('//div[@itemprop="description"]')[0] + self.description = descr.text_content().strip() + + likes = acts.xpath('//span[@id="book_likes_count"]')[0] + self.likes = int(likes.text_content().strip()) + + dlikes = acts.xpath('//span[@id="book_dislikes_count"]')[0] + self.dislikes = int(dlikes.text_content().strip()) + + favs = acts.xpath('//span[@id="book_fave_count"]')[0] + self.favs = int(favs.text_content().strip()) + + plays = acts.xpath('//div[@class="book_action_plays"]')[0] + self.views = int(plays.text_content().strip()) + + def download(self) -> None: + requests.get(f'https://s10.knigavuhe.org/3/audio/{self.id}/PART.mp3') + + def convert_date(self, date:str) -> datetime.date: + + d, m, y = date.strip().lower().split(' ') + if m.startswith('янв'): m = 1 + if m.startswith('фев'): m = 2 + if m.startswith('мар'): m = 3 + if m.startswith('апр'): m = 4 + if m.startswith('май'): m = 5 + if m.startswith('июн'): m = 6 + if m.startswith('июл'): m = 7 + if m.startswith('авг'): m = 8 + if m.startswith('сен'): m = 9 + if m.startswith('окт'): m = 10 + if m.startswith('ноя'): m = 11 + if m.startswith('дек'): m = 12 + return datetime.date(y, m, d) diff --git a/knigavuhe/kvucol.py b/knigavuhe/kvucol.py new file mode 100644 index 0000000..ba44746 --- /dev/null +++ b/knigavuhe/kvucol.py @@ -0,0 +1,23 @@ +import requests +from typing import Optional + +from .kvuconst import BASEURL + +class Collection: + + def __init__( + self, url:str, + session:Optional[requests.Session]=None) -> None: + + if url.startswith('http'): + # absolute link + self.url = url + else: + # relative link + self.url = BASEURL + url + + self.books = [] + self.session = session + + def fetch(self) -> None: + pass diff --git a/knigavuhe/kvuconst.py b/knigavuhe/kvuconst.py new file mode 100644 index 0000000..cbcd965 --- /dev/null +++ b/knigavuhe/kvuconst.py @@ -0,0 +1,2 @@ +BASEURL = 'https://knigavuhe.org' +REQUA = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36 OPR/84.0.4316.14' diff --git a/knigavuhe/kvusearch.py b/knigavuhe/kvusearch.py new file mode 100644 index 0000000..1230428 --- /dev/null +++ b/knigavuhe/kvusearch.py @@ -0,0 +1,115 @@ +import re +import requests +import lxml.html +from typing import List, Dict, Any, Union + +from .kvubook import Book +from .kvucol import Collection +from .kvuconst import BASEURL, REQUA + +class Search: + + def __init__( + self, session:requests.Session, + query:str, limit:int=30, page:int=1) -> None: + + self.q = query + self.len = len(query) + self.lim = limit + self.page = page + + self.session = session + + def makereq(self, stype:str) -> bytes: + + byletter = f'/{stype}/letter/{self.q}' + bytext = f'/search/{stype}' + req = None + + if (self.len < 1): + raise ValueError('Empty search request!') + + elif (self.len == 1 and stype != ''): + # if the search text is one letter + req = self.session.get( + BASEURL + byletter + ) + + else: + req = self.session.get( + BASEURL + bytext, + params={ + 'q': self.q, + 'page': self.page + } + ) + + req.raise_for_status() + return req.content + + def parse(self, cls, response:bytes, expr:str) -> Dict[str,Union[List[Any],int]]: + + tree = lxml.html.fromstring(response) + res = tree.xpath(expr) + lst = [] + + for n, item in enumerate(res): + if n > self.lim: + break + lst.append(cls(item, self.session)) + + pages = tree.xpath('//div[@class="pn_page_buttons"]/a[contains(@class," -page")][last()]/@data-pages') + + count = 0 + countexp = [ + # on different pages the count label + # is located in diff. places + tree.xpath('//div[@class="page_title"]/b'), + tree.xpath('//div[@class="page_title_ext_sublabel"]/b'), + tree.xpath('//div[@class="page_title_count"]') + ] + for c in countexp: + if not c: + continue + # extracting a number + match = re.search(r'^([\d ]+)', c[0].text_content()) + if match: + count = match[0].replace(' ', '') + + return { + 'results': lst, + 'pages': int(pages), + 'count': int(count) + } + + def books(self) -> Dict[str,Union[List[Book],int]]: + + return self.parse( + Book, + self.makereq(''), + '//div[@class="bookkitem"]/a/@href' + ) + + def authors(self) -> Dict[str,Union[List[Collection],int]]: + + return self.parse( + Collection, + self.makereq('authors'), + '//div[contains(@class,"common_list_item ")]/a/@href' + ) + + def readers(self) -> Dict[str,Union[List[Collection],int]]: + + return self.parse( + Collection, + self.makereq('readers'), + '//div[contains(@class,"common_list_item ")]/a/@href' + ) + + def series(self) -> Dict[str,Union[List[Collection],int]]: + + return self.parse( + Collection, + self.makereq('series'), + '//div[contains(@class,"common_list_item ")]/a/@href' + ) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..236c211 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel" +] +build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8a4061c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +lxml==4.8.0 +requests==2.25.1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c6465e3 --- /dev/null +++ b/setup.py @@ -0,0 +1,32 @@ +import setuptools + +with open('README.md', 'rt') as readme: + long_description = readme.read() + +with open('requirements.txt', 'rt') as f: + requires = f.readlines() + for i, r in enumerate(requires): + requires[i] = r.strip('\r\n') + +setuptools.setup( + name='knigavuhe', + version='0.1.0', + author='Chechkenev Andrey (@DarkCat09)', + author_email='aacd0709@mail.ru', + description='Unofficial knigavuhe.org API', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/DarkCat09/knigavuhe', + project_urls={ + 'Bug Tracker': 'https://github.com/DarkCat09/knigavuhe/issues', + }, + classifiers=[ + 'Development Status :: 4 - Beta', + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent' + ], + install_requires=requires, + packages=['knigavuhe'], + python_requires=">=3.6", +) diff --git a/tests/search.py b/tests/search.py new file mode 100644 index 0000000..f1bc487 --- /dev/null +++ b/tests/search.py @@ -0,0 +1,6 @@ +from knigavuhe import Client + +cl = Client() +r = cl.search_books('Агата Кристи') +print(r) +print(r[0]) diff --git a/xpath.txt b/xpath.txt new file mode 100644 index 0000000..f7e61be --- /dev/null +++ b/xpath.txt @@ -0,0 +1,34 @@ +--- +cover = head/meta[@property="og:image"]/@content [0] +***ID*** = re.search(r'knigavuhe.org/2/covers/(\d+)/', cover) + +--- +pagetitle = //div[@class="page_title"]/h1 [0] + +#booktitle = //span[@class="book_title_elem book_title_name"] [0].text_content() +booktitle = //span[@itemprop="name"] [0].text_content() + +bookauthor = //span[@itemprop="author"]/a [0].text_content() + +--- +cover = //div[@class="book_cover_wrap"]/div[@class="book_cover"]/img/@src [0] + +#description = //div[@class="book_description"] [0].text_content() +description = //div[@itemprop="description"] [0].text_content() + +--- +lst = //div[@class="gm-scroll-view"]/div[@class="book_playlist_item"] +for i in lst: +***PART*** = /div[@class="book_playlist_item_name --text-overflow"] [0].text_content() + +--- +https://s10.knigavuhe.org/3/audio/***ID***/***PART***.mp3 + +--- +POST https://knigavuhe.org/play/id/20977/progress/ +trackIndex: 3 (part number in a playlist, from 0) +position: 1074.834395 (can be integer or float, from the beginning of part, in seconds) +speed: 1 (can be integer or float, from 0.5 to 3) +JSON response: +0: "flood" /* = pause */ or null /* = playing */ +1: null /* = pause */ or 1 /* = playing */