added csv function

2023-12-23 23:14:12 +08:00
parent 0627bd446b
commit e2fe8ab8e8
1 changed files with 13 additions and 3 deletions
--- a/main.py
+++ b/main.py
@ -1,6 +1,7 @@
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse, urlunparse
+import pandas as pd

 def scrape_headings(url):
    try:
@ -20,24 +21,33 @@ def scrape_headings(url):
            # Find all the heading elements (h1, h2, h3, etc.)
            headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])

-            # Extract and print the text from the headings
-            for heading in headings:
-                print(heading.text)
+            # Extract and print the text from the headings and put to csv
+            heading_list = [heading.text for heading in headings]
+            df = pd.DataFrame(heading_list)
+            df.to_csv('output.csv')
+
        else:
            print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

+
+# To check if to do another search for URL
 def main_start():
+
    if __name__ == "__main__":
        url = input("Enter the URL: ")
        scrape_headings(url)
+        # df = pd.DataFrame(scrape_headings(url))
        search_again = input("Do you want to search again? y/n:").lower()
        if search_again == 'y':
+            # df.to_csv('output.csv')
            main_start()
        else:
+            # df.to_csv('output.csv')
            exit()


 main_start()
+