代理ip汇总
- 编辑时间: 2020-05-12 23:28:09
- 浏览量: 1190
- 作者: makerroot
- 文章分类: python后端
- 评论数: 暂无评论
代理ip频繁出问题,大家有的粘贴一下链接,走在一起是缘分,一起在走是幸福。
def proxy_pool(): url = "https://proxygather.com/zh" resp = requests.get(url) proxy_list = [] if resp.status_code == 200: demo = resp.content soup = BeautifulSoup(demo, 'lxml') src = soup.select('script')[7:-4] for i in src: temp = json.loads( str(i).replace('<script type="text/javascript">', '').replace('</script>', '').replace('gp.insertPrx(', '').replace(');', '')) proxy_list.append({'http': temp['PROXY_IP'] + ':' + str(int(temp['PROXY_PORT'], 16))}) return proxy_list def proxy_pool_temp(): url = "http://cn-proxy.com/archives/218" resp = requests.get(url) proxy_list = [] if resp.status_code == 200: demo = resp.content soup = BeautifulSoup(demo, 'lxml') src = soup.find_all("tr") max_lenght = len(src) for i in range(2, max_lenght): if len(src[i].find_all('td')) > 2: if src[i].find_all('td')[2].string == '高度匿名': if src[i].find_all('td')[2].string == '443': proxy_list.append( {'https': src[i].find_all('td')[0].string + ':' + src[i].find_all('td')[1].string}) else: proxy_list.append( {'http': src[i].find_all('td')[0].string + ':' + src[i].find_all('td')[1].string}) return proxy_list def get_ips1(self): ips = [] for i in range(1, 3): url = "http://www.xicidaili.com/wt/" + str(i) response = requests.get(url, headers=self.header) root = etree.HTML(response.text) response.encoding = 'utf8' ip_table = root.xpath("//*[@id='ip_list']")[0] ip_table = ip_table.xpath("./tr")[1:] for one in ip_table: r = one.xpath("./td/text()") ip = "http://" + r[0] + ":" + r[1] ips.append(ip) return ips def get_ips2(self): ips = [] url = "https://www.ip-adress.com/proxy-list" response = requests.get(url, headers=self.header) root = etree.HTML(response.text) response.encoding = 'utf8' ip_table = root.xpath("/html/body/main/table")[0] ip_table = ip_table.xpath("./tbody/tr")[1:] for one in ip_table: ip0 = one.xpath("./td/a/text()")[0] port = one.xpath("./td/text()")[0] ip = "http://" + ip0 + port ips.append(ip) return ips def get_ips3(self): ips = [] for kk in range(2, 30): url = "http://www.66ip.cn/" + str(kk) + ".html" response = requests.get(url, headers=self.header) response.encoding = 'gb2312' pp = re.findall("<td>\d+\.\d+\.\d+\.\d+</td>", response.text) po = re.findall("<td>\d+</td>", response.text) for i, o in zip(pp, po): i = i.replace('<', '').replace('>', '').replace('/', '').replace("td", '') o = o.replace('<', '').replace('>', '').replace('/', '').replace("td", '') ip = "http://" + i + ':' + o ips.append(ip) return ips
上一篇: python版登录验证码自动识别
下一篇: geohash编码
评论列表
暂无评论
热门阅读
10你有酒我有故事
提交评论