python-scripts/habr_parser.py

import requests
from lxml import html
from lxml import etree

useragent_headers={
    "User-agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'
    }

site_tree = html.fromstring(requests.get("https://habr.com/", headers=useragent_headers).text)

habr_news = site_tree.xpath('//div[@class="new-block"]//ul[@class="content-list"]/li[@class="content-list__item content-list__item_news-topic"]' +
                            '//a[@class="news-topic__title"]')

for habr_new in habr_news:

    try:
        article_text = etree.tostring(
            html.fromstring(requests.get(habr_new.xpath('./@href')[0], headers=useragent_headers).text).xpath(
                '//div[@class="post__text post__text-html post__text_v1" and @id="post-content-body"]')[0]).decode("utf-8")
    except Exception:
        continue

    print()
    print("----- beginning of article")
    print(habr_new.text_content())
    print("----- beginning of text")
    print(article_text)
    print("----- end of text")
    print("----- end of article")