Add output path
This commit is contained in:
0
FETCH_HEAD
Normal file
0
FETCH_HEAD
Normal file
0
__init__.py
Normal file
0
__init__.py
Normal file
10
main.py
10
main.py
@ -3,7 +3,7 @@ from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
import pandas as pd
|
||||
|
||||
def scrape_headings(url):
|
||||
def scrape_headings(url, output_path):
|
||||
try:
|
||||
# Check if the URL has a scheme (http/https), and add one if missing
|
||||
parsed_url = urlparse(url)
|
||||
@ -30,7 +30,8 @@ def scrape_headings(url):
|
||||
|
||||
# Convert to DataFrame and save to CSV
|
||||
df = pd.DataFrame(heading_data)
|
||||
df.to_csv('output.csv', index=False)
|
||||
df.to_csv(output_path, index=False)
|
||||
return heading_data
|
||||
|
||||
else:
|
||||
print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")
|
||||
@ -41,8 +42,9 @@ def scrape_headings(url):
|
||||
|
||||
|
||||
def main():
|
||||
url = input("Enter the URL: ")ls
|
||||
scrape_headings(url)
|
||||
url = input("Enter the URL: ")
|
||||
output_path = 'output.csv'
|
||||
scrape_headings(url, output_path)
|
||||
# df = pd.DataFrame(scrape_headings(url))
|
||||
search_again = input("Do you want to search again? y/n:").lower()
|
||||
if search_again == 'y':
|
||||
|
||||
Reference in New Issue
Block a user