added csv function
This commit is contained in:
16
main.py
16
main.py
@ -1,6 +1,7 @@
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from urllib.parse import urlparse, urlunparse
|
from urllib.parse import urlparse, urlunparse
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
def scrape_headings(url):
|
def scrape_headings(url):
|
||||||
try:
|
try:
|
||||||
@ -20,24 +21,33 @@ def scrape_headings(url):
|
|||||||
# Find all the heading elements (h1, h2, h3, etc.)
|
# Find all the heading elements (h1, h2, h3, etc.)
|
||||||
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||||
|
|
||||||
# Extract and print the text from the headings
|
# Extract and print the text from the headings and put to csv
|
||||||
for heading in headings:
|
heading_list = [heading.text for heading in headings]
|
||||||
print(heading.text)
|
df = pd.DataFrame(heading_list)
|
||||||
|
df.to_csv('output.csv')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")
|
print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"An error occurred: {str(e)}")
|
print(f"An error occurred: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
# To check if to do another search for URL
|
||||||
def main_start():
|
def main_start():
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
url = input("Enter the URL: ")
|
url = input("Enter the URL: ")
|
||||||
scrape_headings(url)
|
scrape_headings(url)
|
||||||
|
# df = pd.DataFrame(scrape_headings(url))
|
||||||
search_again = input("Do you want to search again? y/n:").lower()
|
search_again = input("Do you want to search again? y/n:").lower()
|
||||||
if search_again == 'y':
|
if search_again == 'y':
|
||||||
|
# df.to_csv('output.csv')
|
||||||
main_start()
|
main_start()
|
||||||
else:
|
else:
|
||||||
|
# df.to_csv('output.csv')
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
|
||||||
main_start()
|
main_start()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user