python解析网页中的table表格并下载为csv代码
代码语言:python
所属分类:web系统
代码描述:python解析网页中的table表格并下载为csv代码
下面为部分代码预览,完整代码请点击下载或在bfwstudio webide中打开
#!/usr/local/python3/bin/python3 # -*- coding: utf-8 -* import requests import pandas as pd from bs4 import BeautifulSoup as bs USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36" # US english LANGUAGE = "en-US,en;q=0.5" def get_soup(url): """Constructs and returns a soup using the HTML content of `url` passed""" # initialize a session session = requests.Session() # set the User-Agent as a regular browser session.headers['User-Agent'] = USER_AGENT # request for english content (optional) session.headers['Accept-Language'] = LANGUAGE session.headers['Content-Language'] = LANGUAGE # make the request html = session.get(url) # return the soup return bs(html.content, "html.parser") def get_all_tables(soup): """Extracts and returns all tables in a soup object""" return soup.find_all("table") def get_table_headers(table): """Given a table soup, returns all the headers""" headers = [] for th in table.find("tr").find_all("th"): headers.append(th.text.strip()) return headers def get_table_rows(table): """Given a table, returns all its rows""" rows = [] for tr in table.find_all("tr")[1:]: cells = [] # grab all td tags in this table row tds = tr.find_all("td") if len(tds) == 0: # i.........完整代码请登录后点击上方下载按钮下载查看
网友评论0