commit c246a09b8971c80155a8a920c8d69e4baeac6000 Author: cid Date: Thu Dec 21 01:17:03 2023 +0800 Add pythonProject3 diff --git a/PycharmProjects/pythonProject3/.idea/.gitignore b/PycharmProjects/pythonProject3/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/PycharmProjects/pythonProject3/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/PycharmProjects/pythonProject3/.idea/inspectionProfiles/profiles_settings.xml b/PycharmProjects/pythonProject3/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/PycharmProjects/pythonProject3/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/PycharmProjects/pythonProject3/.idea/misc.xml b/PycharmProjects/pythonProject3/.idea/misc.xml new file mode 100644 index 0000000..b344175 --- /dev/null +++ b/PycharmProjects/pythonProject3/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/PycharmProjects/pythonProject3/.idea/modules.xml b/PycharmProjects/pythonProject3/.idea/modules.xml new file mode 100644 index 0000000..1fb3c8b --- /dev/null +++ b/PycharmProjects/pythonProject3/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/PycharmProjects/pythonProject3/.idea/pythonProject3.iml b/PycharmProjects/pythonProject3/.idea/pythonProject3.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/PycharmProjects/pythonProject3/.idea/pythonProject3.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/PycharmProjects/pythonProject3/chromedriver b/PycharmProjects/pythonProject3/chromedriver new file mode 100755 index 0000000..84cc332 Binary files /dev/null and b/PycharmProjects/pythonProject3/chromedriver differ diff --git a/PycharmProjects/pythonProject3/main.py b/PycharmProjects/pythonProject3/main.py new file mode 100644 index 0000000..d3c3df4 --- /dev/null +++ b/PycharmProjects/pythonProject3/main.py @@ -0,0 +1,43 @@ +import requests +from bs4 import BeautifulSoup +from urllib.parse import urlparse, urlunparse + +def scrape_headings(url): + try: + # Check if the URL has a scheme (http/https), and add one if missing + parsed_url = urlparse(url) + if not parsed_url.scheme: + url = urlunparse(('http',) + parsed_url[1:]) + + # Send an HTTP GET request to the specified URL + response = requests.get(url) + + # Check if the request was successful + if response.status_code == 200: + # Parse the HTML content using BeautifulSoup + soup = BeautifulSoup(response.text, 'html.parser') + + # Find all the heading elements (h1, h2, h3, etc.) + headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) + + # Extract and print the text from the headings + for heading in headings: + print(heading.text) + else: + print(f"Failed to retrieve content from {url}. Status code: {response.status_code}") + + except Exception as e: + print(f"An error occurred: {str(e)}") + +def main_start(): + if __name__ == "__main__": + url = input("Enter the URL: ") + scrape_headings(url) + search_again = input("Do you want to search again? y/n:").lower() + if search_again == 'y': + main_start() + else: + exit() + + +main_start()