from selenium import webdriverimport csvdef get_pages_numger(browser): res = browser.find_elements_by_xpath('//div[@class="page-component"]/a[7]') return int(res.text) def get_next_page_buttun(browser): button = browser.find_elements_by_xpath() return buttondef get_rooms_number_in_a_page(browser): res = [] for li in browser.find_elements_by_xpath('//li[@data-id]'): id = li.get_attribute("data-id") nickname = li.find_element_by_xpath('//span[@class="video-nickname"]') number = li.find_element_by_xpath('//span[@class="video-number"]') # 观众数 cate = li.find_element_by_xpath('//span[@class="video-cate"]') res.append([id, nickname, cate, number]) return resdef get_rooms_number_in_all_pages(browser, pages_number): res = [] for i in range(pages_number): print('第{}页'.format(i+1)) # 抓取 res.extend(get_rooms_number_in_a_page(browser)) # 点击进入下一页 next_page_button = get_next_page_buttun(browser) next_page_button.click() browser.close() return res def save_to_csv(rooms_number): with open('live_rooms_number.csv', 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerow(['id','nickname','cate', 'number']) # 表头 writer.writerows(rooms_number) def read_from_csv(): with open('live_rooms_number.csv', 'r') as f: reader = csv.reader(f) your_list = list(reader)[1:] # 去掉表头 #print(your_list) return your_list def get_rooms_number(): browser = webdriver.Firefox() browser.get('http://www.panda.tv/all') assert '熊猫TV' in browser.title pages_number = get_pages_numger(browser) all_live_rooms_number = get_rooms_number_in_all_pages(browser, pages_number) return all_live_rooms_number if __name__ == '__main__': rooms_number = get_rooms_number() save_to_csv(rooms_number) #rooms_number = read_from_csv()