Add output path

This commit is contained in:
2023-12-31 23:50:03 -05:00
parent 8e34d94acd
commit fe38c2226f
4 changed files with 6 additions and 4 deletions

0
FETCH_HEAD Normal file
View File

0
__init__.py Normal file
View File

0
git Normal file
View File

10
main.py
View File

@ -3,7 +3,7 @@ from bs4 import BeautifulSoup
from urllib.parse import urlparse, urlunparse
import pandas as pd
def scrape_headings(url):
def scrape_headings(url, output_path):
try:
# Check if the URL has a scheme (http/https), and add one if missing
parsed_url = urlparse(url)
@ -30,7 +30,8 @@ def scrape_headings(url):
# Convert to DataFrame and save to CSV
df = pd.DataFrame(heading_data)
df.to_csv('output.csv', index=False)
df.to_csv(output_path, index=False)
return heading_data
else:
print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")
@ -41,8 +42,9 @@ def scrape_headings(url):
def main():
url = input("Enter the URL: ")ls
scrape_headings(url)
url = input("Enter the URL: ")
output_path = 'output.csv'
scrape_headings(url, output_path)
# df = pd.DataFrame(scrape_headings(url))
search_again = input("Do you want to search again? y/n:").lower()
if search_again == 'y':