Compare commits

...

1 Commits

Author SHA1 Message Date
cid
0c9cea5225 Udated main.py "added user-agent headers" 2023-12-29 15:02:15 -05:00

View File

@ -3,6 +3,7 @@ from bs4 import BeautifulSoup
from urllib.parse import urlparse, urlunparse from urllib.parse import urlparse, urlunparse
import pandas as pd import pandas as pd
headers = {'User-Agent': 'myprogram/1.0'}
def scrape_headings(url): def scrape_headings(url):
try: try:
# Check if the URL has a scheme (http/https), and add one if missing # Check if the URL has a scheme (http/https), and add one if missing
@ -11,7 +12,7 @@ def scrape_headings(url):
url = urlunparse(('http',) + parsed_url[1:]) url = urlunparse(('http',) + parsed_url[1:])
# Send an HTTP GET request to the specified URL # Send an HTTP GET request to the specified URL
response = requests.get(url) response = requests.get(url, headers=headers)
# Check if the request was successful # Check if the request was successful
if response.status_code == 200: if response.status_code == 200: