Add pythonProject3
This commit is contained in:
3
PycharmProjects/pythonProject3/.idea/.gitignore
generated
vendored
Normal file
3
PycharmProjects/pythonProject3/.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
6
PycharmProjects/pythonProject3/.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
PycharmProjects/pythonProject3/.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
7
PycharmProjects/pythonProject3/.idea/misc.xml
generated
Normal file
7
PycharmProjects/pythonProject3/.idea/misc.xml
generated
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.12 (WebScrape with URL input)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (WebScrape with URL input)" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
PycharmProjects/pythonProject3/.idea/modules.xml
generated
Normal file
8
PycharmProjects/pythonProject3/.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/pythonProject3.iml" filepath="$PROJECT_DIR$/.idea/pythonProject3.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
8
PycharmProjects/pythonProject3/.idea/pythonProject3.iml
generated
Normal file
8
PycharmProjects/pythonProject3/.idea/pythonProject3.iml
generated
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
BIN
PycharmProjects/pythonProject3/chromedriver
Executable file
BIN
PycharmProjects/pythonProject3/chromedriver
Executable file
Binary file not shown.
43
PycharmProjects/pythonProject3/main.py
Normal file
43
PycharmProjects/pythonProject3/main.py
Normal file
@ -0,0 +1,43 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
def scrape_headings(url):
|
||||
try:
|
||||
# Check if the URL has a scheme (http/https), and add one if missing
|
||||
parsed_url = urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
url = urlunparse(('http',) + parsed_url[1:])
|
||||
|
||||
# Send an HTTP GET request to the specified URL
|
||||
response = requests.get(url)
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200:
|
||||
# Parse the HTML content using BeautifulSoup
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Find all the heading elements (h1, h2, h3, etc.)
|
||||
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||
|
||||
# Extract and print the text from the headings
|
||||
for heading in headings:
|
||||
print(heading.text)
|
||||
else:
|
||||
print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {str(e)}")
|
||||
|
||||
def main_start():
|
||||
if __name__ == "__main__":
|
||||
url = input("Enter the URL: ")
|
||||
scrape_headings(url)
|
||||
search_again = input("Do you want to search again? y/n:").lower()
|
||||
if search_again == 'y':
|
||||
main_start()
|
||||
else:
|
||||
exit()
|
||||
|
||||
|
||||
main_start()
|
||||
Reference in New Issue
Block a user