python搭建免费的百度baidu搜索引擎结果http服务api示例代码

代码语言:python

所属分类:web系统

代码描述:python搭建免费的百度baidu搜索引擎结果http服务api示例代码,通过flask和selenium抓取实现。

代码标签: python 搭建 免费 百度 baidu 搜索 引擎 结果 http 服务 api 示例 代码

下面为部分代码预览,完整代码请点击下载或在bfwstudio webide中打开

#!/usr/local/python3/bin/python3
# -*- coding: utf-8 -*
from flask import Flask, request, jsonify
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests
import time

app = Flask(__name__)




def fetch_json_from_baidu_link(baidu_link):
    # 发送请求并获取重定向后的URL
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1'
    }
    response = requests.get(baidu_link, headers=headers, allow_redirects=True)
    # 获取重定向后的真实URL
    real_url = "https://r.jina.ai/" + response.url
    
    # 设置请求头
    headers = {
        'Accept': 'application/json'
    }
    
    try:
        # 发送GET请求
        response = requests.get(real_url, headers=headers)
        
        # 检查请求是否成功(状态码200表示成功)
        response.raise_for_status()  # 如果状态码不是200,会抛出异常
        
        # 获取JSON数据
        json_data = response.json()
        return json_data

    except requests.exceptions.HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')
    except requests.exceptions.ConnectionError as conn_err:
        print(f'Connection error occurred: {conn_err}')
    except requests.exceptions.Timeout as timeout_err:
        print(f'Timeout error occurred: {timeout_err}')
    except requests.exceptions.RequestException as req_err:
        print(f'An error occurred: {req_err}')
    
    return None	
    

# 爬取数据
def scrape_data(keyword):

    chromeOptions = webdriver.ChromeOptions()
    chromeOptions.add_argument("--headless")
    chromeOptions.add_argument("--remote-debugging-port=9222")
    chromeOptions.add_argument('--no-sandbox')
    browser = webdriver.Chrome('/usr/bin/chromedriver',chrome_options=chromeOptions)
    
    browser.get("https://www.baidu.com/")               #进入相关网站
    #保存网站截屏图片   
    
    list_result=[]
    browser.find_element_by_id('kw').send_keys(keyword,Keys.RETURN)  # 输入框
    browser.find.........完整代码请登录后点击上方下载按钮下载查看

网友评论0