-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathweb_scraping_get_content.py
More file actions
24 lines (20 loc) · 931 Bytes
/
Copy pathweb_scraping_get_content.py
File metadata and controls
24 lines (20 loc) · 931 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
def web_scraping(list_of_urls, headers):
for url_link in list_of_urls:
response = requests.get(url_link, headers= headers)
html_content = bs(response.content, 'html.parser')
html_content.prettify()
things = html_content.get_text()
things = html_content.find_all('p')
print(things)
return things
if __name__ == '__main__':
headers={'User-Agent': 'Mozilla/5.0 (iPad; U; CPU OS 3_2_1 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Mobile/7B405'}
url_user = input("Please specify an url that allows web scraping: ")
search_query = input("Please provide with a search query:")
url = [url_user]
content = web_scraping(url, headers=headers)
content_dataframe = pd.DataFrame(content)
content_csv = content_dataframe.to_csv("./content_{}".format(search_query))