43 lines
1.4 KiB
Python
43 lines
1.4 KiB
Python
import time
|
|
|
|
from bs4 import BeautifulSoup
|
|
from page_source import PageSource
|
|
|
|
class PageParser:
|
|
|
|
__html = None
|
|
|
|
def __init__(self, keyword):
|
|
text = PageSource(search_keyword=keyword)
|
|
time.sleep(1)
|
|
self.__html = BeautifulSoup(str(text), 'html5lib')
|
|
|
|
|
|
def get_all_questions(self):
|
|
list = []
|
|
container = self.__html.find_all('span', attrs={"class": "C7GS5b rkGIWe"})
|
|
for entry in container:
|
|
list.append(entry.find_next("div", attrs={"class": "BNeawe s3v9rd AP7Wnd"}).text)
|
|
return list
|
|
|
|
def get_all_answers(self):
|
|
list = []
|
|
container = self.__html.find_all('span', attrs={"class": "C7GS5b rkGIWe"})
|
|
for entry in container:
|
|
try:
|
|
answer = entry.find_next_sibling("div", attrs={"class": "kCrYT"}).find_next("div", attrs={"class": "BNeawe s3v9rd AP7Wnd"}).find_next("div", attrs={"class": "BNeawe s3v9rd AP7Wnd"})
|
|
list.append(answer.text)
|
|
except AttributeError:
|
|
pass
|
|
return list
|
|
|
|
def get_all_url(self):
|
|
list = []
|
|
container = self.__html.find_all("div", attrs={"class": "BNeawe UPmit AP7Wnd UwRFLe"})
|
|
for entry in container:
|
|
try:
|
|
answer = entry.text
|
|
list.append(answer)
|
|
except AttributeError:
|
|
pass
|
|
return list |