Add pythonProject3

2023-12-21 01:17:03 +08:00
commit c246a09b89
7 changed files with 75 additions and 0 deletions
--- a/PycharmProjects/pythonProject3/.idea/.gitignore
+++ b/PycharmProjects/pythonProject3/.idea/.gitignore
@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/PycharmProjects/pythonProject3/.idea/inspectionProfiles/profiles_settings.xml
+++ b/PycharmProjects/pythonProject3/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/PycharmProjects/pythonProject3/.idea/misc.xml
+++ b/PycharmProjects/pythonProject3/.idea/misc.xml
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.12 (WebScrape with URL input)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (WebScrape with URL input)" project-jdk-type="Python SDK" />
+</project>
--- a/PycharmProjects/pythonProject3/.idea/modules.xml
+++ b/PycharmProjects/pythonProject3/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/pythonProject3.iml" filepath="$PROJECT_DIR$/.idea/pythonProject3.iml" />
+    </modules>
+  </component>
+</project>
--- a/PycharmProjects/pythonProject3/.idea/pythonProject3.iml
+++ b/PycharmProjects/pythonProject3/.idea/pythonProject3.iml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/PycharmProjects/pythonProject3/chromedriver
+++ b/PycharmProjects/pythonProject3/chromedriver
--- a/PycharmProjects/pythonProject3/main.py
+++ b/PycharmProjects/pythonProject3/main.py
@ -0,0 +1,43 @@
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urlparse, urlunparse
+
+def scrape_headings(url):
+    try:
+        # Check if the URL has a scheme (http/https), and add one if missing
+        parsed_url = urlparse(url)
+        if not parsed_url.scheme:
+            url = urlunparse(('http',) + parsed_url[1:])
+
+        # Send an HTTP GET request to the specified URL
+        response = requests.get(url)
+
+        # Check if the request was successful
+        if response.status_code == 200:
+            # Parse the HTML content using BeautifulSoup
+            soup = BeautifulSoup(response.text, 'html.parser')
+
+            # Find all the heading elements (h1, h2, h3, etc.)
+            headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
+
+            # Extract and print the text from the headings
+            for heading in headings:
+                print(heading.text)
+        else:
+            print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")
+
+    except Exception as e:
+        print(f"An error occurred: {str(e)}")
+
+def main_start():
+    if __name__ == "__main__":
+        url = input("Enter the URL: ")
+        scrape_headings(url)
+        search_again = input("Do you want to search again? y/n:").lower()
+        if search_again == 'y':
+            main_start()
+        else:
+            exit()
+
+
+main_start()