Files
PythonPeopleAskScript/page_parser.py
2023-10-13 08:55:25 -07:00

43 lines
1.4 KiB
Python

import time
from bs4 import BeautifulSoup
from page_source import PageSource
class PageParser:
__html = None
def __init__(self, keyword):
text = PageSource(search_keyword=keyword)
time.sleep(1)
self.__html = BeautifulSoup(str(text), 'html5lib')
def get_all_questions(self):
list = []
container = self.__html.find_all('span', attrs={"class": "C7GS5b rkGIWe"})
for entry in container:
list.append(entry.find_next("div", attrs={"class": "BNeawe s3v9rd AP7Wnd"}).text)
return list
def get_all_answers(self):
list = []
container = self.__html.find_all('span', attrs={"class": "C7GS5b rkGIWe"})
for entry in container:
try:
answer = entry.find_next_sibling("div", attrs={"class": "kCrYT"}).find_next("div", attrs={"class": "BNeawe s3v9rd AP7Wnd"}).find_next("div", attrs={"class": "BNeawe s3v9rd AP7Wnd"})
list.append(answer.text)
except AttributeError:
pass
return list
def get_all_url(self):
list = []
container = self.__html.find_all("div", attrs={"class": "BNeawe UPmit AP7Wnd UwRFLe"})
for entry in container:
try:
answer = entry.text
list.append(answer)
except AttributeError:
pass
return list