-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathyahoo_selenium.py
More file actions
36 lines (28 loc) · 1.1 KB
/
yahoo_selenium.py
File metadata and controls
36 lines (28 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#@title Yahoo搜尋 "霍華德"
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
chrome_options = Options()
chrome_options.add_argument('--headless') # 無頭模式
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
wd = webdriver.Chrome(options=chrome_options)
url='https://tw.yahoo.com/'
wd.get(url)
# 操作網頁元素
element = wd.find_element(By.ID, 'header-search-input')
key_word = 'OpenAI'
element.send_keys(key_word)
wd.find_element(By.ID, 'header-desktop-search-button').click()
# 等待目標表格'id 為 web'的div出現
WebDriverWait(wd, 5).until(
expected_conditions.presence_of_element_located((By.ID, 'web')))
#然後就是beautifulsoup的範疇了,將目前頁面用bs4解析
soup = BeautifulSoup(wd.page_source,"html.parser")
links = soup.select('div#web h3')
for link in links:
print(link.get_text())
wd.quit()