14 lines
543 B
Python
14 lines
543 B
Python
from bs4 import BeautifulSoup
|
|
import urllib.request as urllib2
|
|
import re
|
|
DEFAULT_USER_AGENT = "Mozilla/5.0 (X11; SnarfBot; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
|
|
def linkview(url, user_agent=DEFAULT_USER_AGENT):
|
|
req = urllib2.Request(url, data=None, headers={'User-Agent':user_agent})
|
|
html_page = urllib2.urlopen(req)
|
|
soup = BeautifulSoup(html_page, features="lxml")
|
|
links = []
|
|
|
|
for link in soup.findAll('a', attrs={'href': re.compile("^http[s]?://")}):
|
|
links.append(link.get('href'))
|
|
|
|
return links |