Add output path

2023-12-31 23:50:03 -05:00
parent 8e34d94acd
commit fe38c2226f
4 changed files with 6 additions and 4 deletions
@@ -3,7 +3,7 @@ from bs4 import BeautifulSoup
 from urllib.parse import urlparse, urlunparse
 import pandas as pd

-def scrape_headings(url):
+def scrape_headings(url, output_path):
    try:
        # Check if the URL has a scheme (http/https), and add one if missing
        parsed_url = urlparse(url)
@@ -30,7 +30,8 @@ def scrape_headings(url):

            # Convert to DataFrame and save to CSV
            df = pd.DataFrame(heading_data)
-            df.to_csv('output.csv', index=False)
+            df.to_csv(output_path, index=False)
+            return heading_data

        else:
            print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")
@@ -41,8 +42,9 @@ def scrape_headings(url):


 def main():
-    url = input("Enter the URL: ")ls
-    scrape_headings(url)
+    url = input("Enter the URL: ")
+    output_path = 'output.csv'
+    scrape_headings(url, output_path)
    # df = pd.DataFrame(scrape_headings(url))
    search_again = input("Do you want to search again? y/n:").lower()
    if search_again == 'y':