from urllib.request import urlopen
from bs4 import BeautifulSoup
pages = set()
def getLinks(pageUrl):
global pages
html = urlopen("https://www.80sy.com")
bsObj = BeautifulSoup(html, features="html.parser")
for link in bsObj.findAll("a"):
if 'href' in link.attrs:
if '80sy' in link.attrs['href']:
pages.add(link.attrs['href'])
if link.attrs['href'] not in pages:
newPage = link.attrs['href']
print(newPage)
pages.add(newPage)
getLinks(newPage)
getLinks("")
for i in pages:
print(i)
print(len(pages))
文章转载请说明出处:八零岁月 » Python之网站地图
评论前必须登录!