关于cookies:雅虎财经历史数据下载器网址无效

Yahoo Finance Historical data downloader url is not working

我使用下面的URL从雅虎财务获取历史数据。从2017年5月16日起,URL不起作用。

http://real-chart.finance.yahoo.com/table.csv?S=AAL&A=04&B=01&C=2017&D=04&E=02&F=2017&G=D&Ignore=0.csv

似乎他们已经更改了URL,新的URL是:

https://query1.finance.yahoo.com/v7/finance/download/aal?期间1=1494873000&period2=1494959400&interval=1d&events=history&crumb=l0aetookocj

在上面更改的URL中有一个会话cookie,它是crumb。有没有办法用Java编程实现这个cookie?


我最近写了一个简单的python脚本来下载一个股票的历史。下面是一个如何调用它的示例:python get_quote_history.py--symbol=ibm--from=2017-01-01--to=2017-05-25-o ibm.csv这将从2017-01-01到2017-05-25下载IBM历史价格,并将其保存在ibm.csv文件中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import re
import urllib2
import calendar
import datetime
import getopt
import sys
import time

crumble_link = 'https://finance.yahoo.com/quote/{0}/history?p={0}'
crumble_regex = r'CrumbStore":{"crumb":"(.*?)"}'
cookie_regex = r'Set-Cookie: (.*?); '
quote_link = 'https://query1.finance.yahoo.com/v7/finance/download/{}?period1={}&period2={}&interval=1d&events=history&crumb={}'


def get_crumble_and_cookie(symbol):
    link = crumble_link.format(symbol)
    response = urllib2.urlopen(link)
    match = re.search(cookie_regex, str(response.info()))
    cookie_str = match.group(1)
    text = response.read()
    match = re.search(crumble_regex, text)
    crumble_str = match.group(1)
    return crumble_str, cookie_str


def download_quote(symbol, date_from, date_to):
    time_stamp_from = calendar.timegm(datetime.datetime.strptime(date_from,"%Y-%m-%d").timetuple())
    time_stamp_to = calendar.timegm(datetime.datetime.strptime(date_to,"%Y-%m-%d").timetuple())

    attempts = 0
    while attempts < 5:
        crumble_str, cookie_str = get_crumble_and_cookie(symbol)
        link = quote_link.format(symbol, time_stamp_from, time_stamp_to, crumble_str)
        #print link
        r = urllib2.Request(link, headers={'Cookie': cookie_str})

        try:
            response = urllib2.urlopen(r)
            text = response.read()
            print"{} downloaded".format(symbol)
            return text
        except urllib2.URLError:
            print"{} failed at attempt # {}".format(symbol, attempts)
            attempts += 1
            time.sleep(2*attempts)
    return""

if __name__ == '__main__':
    print get_crumble_and_cookie('KO')
    from_arg ="from"
    to_arg ="to"
    symbol_arg ="symbol"
    output_arg ="o"
    opt_list = (from_arg+"=", to_arg+"=", symbol_arg+"=")
    try:
        options, args = getopt.getopt(sys.argv[1:],output_arg+":",opt_list)
    except getopt.GetoptError as err:
        print err

    for opt, value in options:
        if opt[2:] == from_arg:
            from_val = value
        elif opt[2:] == to_arg:
            to_val = value
        elif opt[2:] == symbol_arg:
            symbol_val = value
        elif opt[1:] == output_arg:
            output_val = value

    print"downloading {}".format(symbol_val)
    text = download_quote(symbol_val, from_val, to_val)

    with open(output_val, 'wb') as f:
        f.write(text)
    print"{} written to {}".format(symbol_val, output_val)


AndreaGaleazzi的完美答案:增加了拆分和分红的选项,以及python 3的扭曲选项。

也被更改为"to:date"包含在返回的结果中,以前的代码返回到但不包括"to:date"。只是不同!

而且要注意,雅虎在价格取整、列顺序和拆分语法方面做了一些小的改动。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
## Downloaded from
## https://stackoverflow.com/questions/44044263/yahoo-finance-historical-data-downloader-url-is-not-working
## Modified for Python 3
## Added --event=history|div|split   default = history
## changed so"to:date" is included in the returned results
## usage: download_quote(symbol, date_from, date_to, events).decode('utf-8')

import re
from urllib.request import urlopen, Request, URLError
import calendar
import datetime
import getopt
import sys
import time

crumble_link = 'https://finance.yahoo.com/quote/{0}/history?p={0}'
crumble_regex = r'CrumbStore":{"crumb":"(.*?)"}'
cookie_regex = r'Set-Cookie: (.*?); '
quote_link = 'https://query1.finance.yahoo.com/v7/finance/download/{}?period1={}&period2={}&interval=1d&events={}&crumb={}'


def get_crumble_and_cookie(symbol):
    link = crumble_link.format(symbol)
    response = urlopen(link)
    match = re.search(cookie_regex, str(response.info()))
    cookie_str = match.group(1)
    text = response.read().decode("utf-8")
    match = re.search(crumble_regex, text)
    crumble_str = match.group(1)
    return crumble_str , cookie_str


def download_quote(symbol, date_from, date_to,events):
    time_stamp_from = calendar.timegm(datetime.datetime.strptime(date_from,"%Y-%m-%d").timetuple())
    next_day = datetime.datetime.strptime(date_to,"%Y-%m-%d") + datetime.timedelta(days=1)
    time_stamp_to = calendar.timegm(next_day.timetuple())

    attempts = 0
    while attempts < 5:
        crumble_str, cookie_str = get_crumble_and_cookie(symbol)
        link = quote_link.format(symbol, time_stamp_from, time_stamp_to, events,crumble_str)
        #print link
        r = Request(link, headers={'Cookie': cookie_str})

        try:
            response = urlopen(r)
            text = response.read()
            print ("{} downloaded".format(symbol))
            return text
        except URLError:
            print ("{} failed at attempt # {}".format(symbol, attempts))
            attempts += 1
            time.sleep(2*attempts)
    return b''

if __name__ == '__main__':
    print (get_crumble_and_cookie('KO'))
    from_arg ="from"
    to_arg ="to"
    symbol_arg ="symbol"
    event_arg ="event"
    output_arg ="o"
    opt_list = (from_arg+"=", to_arg+"=", symbol_arg+"=", event_arg+"=")
    try:
        options, args = getopt.getopt(sys.argv[1:],output_arg+":",opt_list)
    except getopt.GetoptError as err:
        print (err)

    symbol_val =""
    from_val =""
    to_val =""
    output_val =""
    event_val ="history"
    for opt, value in options:
        if opt[2:] == from_arg:
            from_val = value
        elif opt[2:] == to_arg:
            to_val = value
        elif opt[2:] == symbol_arg:
            symbol_val = value
        elif opt[2:] == event_arg:
            event_val = value
        elif opt[1:] == output_arg:
            output_val = value

    print ("downloading {}".format(symbol_val))
    text = download_quote(symbol_val, from_val, to_val,event_val)
    if text:
        with open(output_val, 'wb') as f:
            f.write(text)
        print ("{} written to {}".format(symbol_val, output_val))


让它工作,现在我只需要解析csv。我想我会分享,因为我在语法上遇到了麻烦。

1
2
3
4
5
6
7
8
9
10
11
12
Dim crumb As String:    crumb ="xxxx"
Dim cookie As String:   cookie ="yyyy"

Dim urlStock As String: urlStock ="https://query1.finance.yahoo.com/v7/finance/download/SIRI?" & _
   "period1=1274158800&" & _
   "period2=1495059477&" & _
   "interval=1d&events=history&crumb=" & crumb

Dim http As MSXML2.XMLHTTP:   Set http = New MSXML2.ServerXMLHTTP
http.Open"GET", urlStock, False
http.setRequestHeader"Cookie", cookie
http.send


我写了一个轻量级的脚本,它汇集了这个线程中的许多建议来解决这个问题。https://github.com/andrewrporter/yahoo-historical

但是,还有更好的解决方案,如https://github.com/ranaroussi/fix-yahoo-finance

希望这些资源有帮助!


您可以手动将crumb/cookie对保存到chrome中,也可以使用类似的方法来生成它。然后,只需在Java中设置Cookie头,并在URL中传递相应的碎屑


回答很好,Andrea,我已经在您的代码中添加了允许下载多个股票的代码。(Python2.7)

文件1:down.py

1
2
3
4
5
6
7
8
9
import os

myfile = open("ticker.csv","r")
lines = myfile.readlines()

for line in lines:
        ticker = line.strip();
        cmd ="python get_quote_history.py --symbol=%s --from=2017-01-01 --to=2017-05-25 -o %s.csv"  %(ticker,ticker)
        os.system(cmd)

文件2:ticker.csv美国航空航天局MSFT

文件3:get_quote_history.py


我在Excel/VBA中为此问题开发了以下解决方案。关键的挑战是创建crumb/cookie对。一旦创建了它,你就可以重新使用它来调用雅虎的历史价格。

在这里看到crumb/cookie的关键代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
Sub GetYahooRequest(strCrumb As String, strCookie As String)
'This routine will use a sample request to Yahoo to obtain a valid Cookie and Crumb

Dim strUrl                      As String: strUrl ="https://finance.yahoo.com/lookup?s=%7B0%7D"  
Dim objRequest                  As WinHttp.WinHttpRequest

Set objRequest = New WinHttp.WinHttpRequest

    With objRequest
        .Open"GET", strUrl, True
        .setRequestHeader"Content-Type","application/x-www-form-urlencoded; charset=UTF-8"
        .send
        .waitForResponse
        strCrumb = strExtractCrumb(.responseText)
        strCookie = Split(.getResponseHeader("Set-Cookie"),";")(0)
    End With

End Sub

有关演示如何提取雅虎历史价格的Excel工作簿示例,请参阅我的网站上的以下雅虎历史价格摘录。