From e2fe8ab8e8e83a333ba6707849c45f6eedaa21a2 Mon Sep 17 00:00:00 2001 From: cid Date: Sat, 23 Dec 2023 23:14:12 +0800 Subject: [PATCH] added csv function --- main.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index d3c3df4..80d8e6c 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ import requests from bs4 import BeautifulSoup from urllib.parse import urlparse, urlunparse +import pandas as pd def scrape_headings(url): try: @@ -20,24 +21,33 @@ def scrape_headings(url): # Find all the heading elements (h1, h2, h3, etc.) headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) - # Extract and print the text from the headings - for heading in headings: - print(heading.text) + # Extract and print the text from the headings and put to csv + heading_list = [heading.text for heading in headings] + df = pd.DataFrame(heading_list) + df.to_csv('output.csv') + else: print(f"Failed to retrieve content from {url}. Status code: {response.status_code}") except Exception as e: print(f"An error occurred: {str(e)}") + +# To check if to do another search for URL def main_start(): + if __name__ == "__main__": url = input("Enter the URL: ") scrape_headings(url) + # df = pd.DataFrame(scrape_headings(url)) search_again = input("Do you want to search again? y/n:").lower() if search_again == 'y': + # df.to_csv('output.csv') main_start() else: + # df.to_csv('output.csv') exit() main_start() +