python爬虫89
时间: 2023-08-28 13:23:12 浏览: 158
python爬虫
以下是一个简单的示例代码,可以用于访问89免费代理网站并获取代理IP:
```python
import requests
from bs4 import BeautifulSoup
def get_proxies():
url = 'http://www.89ip.cn/index_{}.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36'
}
proxies = []
for page in range(1, 6): # 获取前5页的代理IP
response = requests.get(url.format(page), headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', attrs={'class': 'layui-table'})
rows = table.find_all('tr')[1:] # 第一行为表头,跳过
for row in rows:
columns = row.find_all('td')
ip = columns[0].text.strip()
port = columns[1].text.strip()
proxy = f'{ip
阅读全文