pgbot/pgbotlib/api.py

""" Some functions for api calls """

import json
import random
import re
import typing

import requests
import bs4
import fake_headers

import pgbotlib.dbstuff


class ApiWrapper:
    FAILED = 'я обосрался :<'
    GIF_REGEX = {
        'part': re.compile(r'(?<=\<center\>).*(?=\<\/center\>)'),
        'gif': re.compile(r'(?<=src=").*(?="\s)')
    }
    SEARCH_TOKENS = ['botname', '!find']

    def __init__(self, tokens: dict, db_conn: pgbotlib.dbstuff.DBConn) -> None:
        self.tokens = tokens
        self.db_conn = db_conn
        self.nonw = re.compile(r'\W')
        self.headers = fake_headers.Headers(headers=True)

    # this is the entry point for the api calls
    # if you add another api, make sure there is a match here
    # this could have used match - case statement, but python 3.9
    def call(self, api: str, data: typing.Union[str, None],
             message: str) -> str:
        if api == 'img_url': return self.format_img(data)
        elif api == 'gif': return self.get_gif()
        elif api == 'kmp': return self.get_kmp()
        elif api == 'fga': return self.get_fga()
        elif api == 'fakenews': return self.get_fakenews()
        elif api == 'anek': return self.get_anek()
        elif api == 'y_search': return self.y_search(message)
        return self.FAILED

    def __sanitize_search(self, message: str) -> str:
        """Removes one of each of the search tokens from the query
           so that "bot find" phrase does not poison the search query

           It's not guaranteed it will delete the first match though,
           and I see no point in implementing that"""
        keywords = self.nonw.sub(' ', message)
        for token_spec in self.tokens:
            if token_spec[0] not in self.SEARCH_TOKENS:
                continue
            for regex in token_spec[1]:
                sub_spec = regex.subn('', keywords, count=1)
                if sub_spec[1]:
                    keywords = sub_spec[0]
                    break
        return keywords

    def y_search(self, message: str) -> str:
        """Pretty much copy & paste from the original bot
           I have no fucking clue how this black magic works"""
        query = self.__sanitize_search(message)
        request = requests.get('https://yandex.ru/images/search',
                               timeout=30,
                               params={'text': query,
                                       'nomisspell': 1,
                                       'noreask': 1,
                                       'isize': 'medium'},
                               headers=self.headers.generate())
        parser = bs4.BeautifulSoup(request.text, 'html.parser')
        items_place = parser.find('div', {'class': 'serp-list'})
        items = items_place.find_all('div', {'class': 'serp-item'})
        images = []
        for item in items:
            data = json.loads(item.get('data-bem'))
            images.append(data['serp-item']['img_href'])
        if not images:
            return None
        result = random.choice(images)
        return f'[url]({result})'

    def get_gif(self) -> str:
        resp = requests.get("http://xdgif.ru/random/", timeout=30)
        part = self.GIF_REGEX['part'].search(resp.text).group(0)
        gif = self.GIF_REGEX['gif'].search(part).group(0)
        return gif

    @staticmethod
    def get_kmp() -> str:
        request = requests.get("https://killpls.me/random/", timeout=30)
        parser = bs4.BeautifulSoup(request.text, features="html.parser")
        result = parser.find("div", attrs={
            "style": "margin:0.5em 0;line-height:1.785em"})
        return result.text.strip()

    @staticmethod
    def get_fga() -> str:
        request = requests.get("http://fucking-great-advice.ru/api/random",
                               timeout=30)
        return json.loads(request.text)["text"]

    @staticmethod
    def get_fakenews() -> str:
        request = requests.get("http://news.olegmakarenko.ru/news", timeout=30)
        parser = bs4.BeautifulSoup(request.text, features="html.parser")
        news = [item.text.strip() for item in parser.find_all(
                    "span", attrs={"class": "headlinetext"})]
        return random.choice(news)

    @staticmethod
    def get_anek() -> str:
        request = requests.get("http://rzhunemogu.ru/Rand.aspx?CType=11",
                               timeout=30)
        result = request.text.split('<content>')[1].split('</content>')[0]
        return result.strip()

    @staticmethod
    def format_img(data: str) -> str:
        return f'[url]({data})'