import requests from bs4 import BeautifulSoup import pandas as pd def get_user_input(): urls = [] while True: url = input("Enter a URL (or type 'done' to finish): ") if url.lower() == 'done': break urls.append(url) return urls def scrape_body(url): try: response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') body = soup.find('body') return body.prettify() if body else "No body content" except requests.RequestException as e: return f"Error: {e}" def scrape_heads(url): try: response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') return [h.text for h in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])] except requests.RequestException as e: return [f"Error: {e}"] if __name__ == "__main__": urls = get_user_input() headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'} all_bodies = [] all_headers = [] for url in urls: body_content = scrape_body(url) all_bodies.append(body_content) heads = scrape_heads(url) all_headers.extend([(url, h) for h in heads]) with open('output.txt', 'w') as f: f.writelines(all_bodies) headers_df = pd.DataFrame(all_headers, columns=['URL', 'Heading']) headers_df.to_csv('headers.csv', index=False)