from bs4 import BeautifulSoup
import time
import requests
headers = {
'User-Agent':'Mozilla/5.0(Windows NT 6.1; WOW64) AppleWebKit/537.36(KHTML, like Gecko) Chorme/53.0.2785.143 Safari/537.36'
}
def judgement_sex(class_name):
if class_name == ['member_ico1']:
return 'Å®'
else:
return 'ÄÐ'
def get_links(url):
wb_data = requests.get(url,headers=headers)
soup = BeautifulSoup(wb_data.text, 'lxml')
print(url)
links = soup.select('#page_list > ul > li > a ')
for link in links:
href = link.get("href")
get_info(href)
def get_info(url):
print(url)
wb_data = requests.get(url,headers=headers)
soup = BeautifulSoup(wb_data.text, 'lxml')
titles = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
addresses = soup.select('#introducePart > div:nth-child(2) > div.info_r > div.intro_item_content > p')
prices = soup.select('#pricePart > div.day_l > span')
imgs = soup.select('#imgMouseCusor')