Compare commits
1 Commits
thierry-de
...
0c9cea5225
| Author | SHA1 | Date | |
|---|---|---|---|
| 0c9cea5225 |
3
main.py
3
main.py
@ -3,6 +3,7 @@ from bs4 import BeautifulSoup
|
|||||||
from urllib.parse import urlparse, urlunparse
|
from urllib.parse import urlparse, urlunparse
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
headers = {'User-Agent': 'myprogram/1.0'}
|
||||||
def scrape_headings(url):
|
def scrape_headings(url):
|
||||||
try:
|
try:
|
||||||
# Check if the URL has a scheme (http/https), and add one if missing
|
# Check if the URL has a scheme (http/https), and add one if missing
|
||||||
@ -11,7 +12,7 @@ def scrape_headings(url):
|
|||||||
url = urlunparse(('http',) + parsed_url[1:])
|
url = urlunparse(('http',) + parsed_url[1:])
|
||||||
|
|
||||||
# Send an HTTP GET request to the specified URL
|
# Send an HTTP GET request to the specified URL
|
||||||
response = requests.get(url)
|
response = requests.get(url, headers=headers)
|
||||||
|
|
||||||
# Check if the request was successful
|
# Check if the request was successful
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
|
|||||||
Reference in New Issue
Block a user