关于python:Selenium Scrolling Dilemma

Selenium Scrolling Dilemma

这是我能找到的唯一一个向下滚动到页面末尾的代码,其他代码都不起作用。问题是,while-true语句永远不会完成,它会继续尝试向下滚动,即使在它触底之后,也不会转到下一个打印步骤。如何结束while-true语句并打印结果?谢谢你

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
 from selenium import webdriver

    url = 'http://www.tradingview.com/screener'
    driver = webdriver.Firefox()
    driver.get(url)

    # Get scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # Scroll down to bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # will give a list of all tickers
    tickers = driver.find_elements_by_css_selector('a.tv-screener__symbol')

    for index in range(len(tickers)):
       print("Row" + tickers[index].text +"")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
Errors I'm receiving


>>> from selenium import webdriver
>>> url = '
http://www.tradingview.com/screener'
>>> driver = webdriver.Firefox()
>>> driver.get(url)
>>>
>>> # Get scroll height
... last_height = driver.execute_script("return document.body.scrollHeight")
>>>
>>> selector = '
.js-field-total.tv-screener-table__field-value--total'
>>> matches = driver.find_element_by_css_selector(selector)
>>> matches = int(matches.text.split()[0])
>>>
>>> visible_rows = 0
>>> scrolls = 0
>>>
>>> while visible_rows < matches:
...
  File"<stdin>", line 2

    ^
IndentationError: expected an indented block
>>>     driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
  File"<stdin>", line 1
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    ^
IndentationError: unexpected indent
>>>
>>>     # Wait 10 scrolls before updating row information
...     if scrolls == 10:
  File"<stdin>", line 2
    if scrolls == 10:
    ^
IndentationError: unexpected indent
>>>         table = driver.find_elements_by_class_name('
tv-data-table__tbody')
  File"<stdin>", line 1
    table = driver.find_elements_by_class_name('
tv-data-table__tbody')
    ^
IndentationError: unexpected indent
>>>         visible_rows = len(table[1].find_elements_by_tag_name('
tr'))
  File"<stdin>", line 1
    visible_rows = len(table[1].find_elements_by_tag_name('
tr'))
    ^
IndentationError: unexpected indent
>>>         scrolls = 0
  File"<stdin>", line 1
    scrolls = 0
    ^
IndentationError: unexpected indent
>>>
>>>     scrolls += 1
  File"<stdin>", line 1
    scrolls += 1
    ^
IndentationError: unexpected indent
>>>
>>> # will give a list of all tickers
... tickers = driver.find_elements_by_css_selector('
a.tv-screener__symbol')
>>>
>>> for index in range(len(tickers)):
...    print("Row" + tickers[index].text +"")
...


在ticker下,它告诉您表中有多少行(匹配项)。因此,一个选项是将可见行数与总行数进行比较。当达到该数目(可见行数)时,将退出循环。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
url = 'http://www.tradingview.com/screener'
driver = webdriver.Firefox()
driver.get(url)

# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")

selector = '.js-field-total.tv-screener-table__field-value--total'
matches = driver.find_element_by_css_selector(selector)
matches = int(matches.text.split()[0])

visible_rows = 0
scrolls = 0

while visible_rows < matches:

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # Wait 10 scrolls before updating row information
    if scrolls == 10:
        table = driver.find_elements_by_class_name('tv-data-table__tbody')
        visible_rows = len(table[1].find_elements_by_tag_name('tr'))
        scrolls = 0

    scrolls += 1

# will give a list of all tickers
tickers = driver.find_elements_by_css_selector('a.tv-screener__symbol')

for index in range(len(tickers)):
   print("Row" + tickers[index].text +"")

编辑:由于您的设置似乎不允许使用以前的解决方案,因此您可以尝试以下不同的方法。该页一次加载150行。因此,我们不需要计算可见行的数量,而是使用我们期望的总匹配数/行数(例如4894),然后将其除以150,得到需要滚动的次数。如果我们滚动至少那么多次,理论上,所有的行都应该是可见的,我们可以继续代码。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

url = 'http://www.tradingview.com/screener'
driver = webdriver.Chrome('./chromedriver')
driver.get(url)

try:

    selector = '.js-field-total.tv-screener-table__field-value--total'
    condition = EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
    matches = WebDriverWait(driver, 10).until(condition)
    matches = int(matches.text.split()[0])

except (TimeoutException, Exception):
    print ('Problem finding matches, setting default...')
    matches = 4895 # Set default

# The page loads 150 rows at a time; divide matches by
# 150 to determine the number of times we need to scroll;
# add 5 extra scrolls just to be sure
num_loops = int(matches / 150 + 5)

for _ in range(num_loops):

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    sleep(2) # Pause briefly to allow loading time

# will give a list of all tickers
tickers = driver.find_elements_by_css_selector('a.tv-screener__symbol')

n_tickers = len(tickers)

msg = 'Correct ' if n_tickers == matches else 'Incorrect '
msg += 'number of tickers ({}) found'
print(msg.format(n_tickers))

for index in range(n_tickers):
    print("Row" + tickers[index].text +"")