Spaces:
Running
Running
| import requests | |
| from bs4 import BeautifulSoup | |
| def get_url_content(url: str) -> str: | |
| """ | |
| Retrieve the content of a URL. | |
| :param url: The URL to retrieve content from. | |
| :return: The content of the URL as a string. | |
| """ | |
| response = requests.get( | |
| url, | |
| headers={ | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" | |
| } | |
| ) | |
| if response.status_code != 200: | |
| print(f"Failed to retrieve content from {url}. Status code: {response.status_code} - {response.reason}") | |
| return "" | |
| # parse the html content using BeautifulSoup | |
| parser = BeautifulSoup(response.text, 'html.parser') | |
| # extract text from the parsed HTML | |
| return parser.text.strip() if parser.text else "" | |