多线程批量下载ASCAT风场数据

ftp进不去,只能用http接口这样子。
主页:http://www.remss.com/missions/ascat/
说明:http://data.remss.com/ascat/metopa/readme_ASCAT_metopA.txt
bytemap格式数据:http://data.remss.com/ascat/metopa/bmaps_v02.1/

from bs4 import BeautifulSoup
import requests
import threading
import os
import time

threadslimit = 50
global count
count = 0

domain = r"http://data.remss.com"
urlprefix = r"/ascat/metopa/bmaps_v02.1/y2018/m"

def download(url):
    global count
    count += 1
    id = count
    filename = url.split('/')[-1]
    print("【%03d】Downloading: %s" % (id, filename))
    r = requests.get(url, stream=True)
    f = open(os.path.join(os.getcwd(),'download', filename), 'wb')
    for chunk in r.iter_content(chunk_size=1024*1024):
        if chunk:
            f.write(chunk)
    f.close()
    print("【%03d】download completed." % id)
  
threads = []
for m in range(1, 13):
    url = domain + urlprefix + "%02d" % m
    r = requests.get(url)
    r.encoding = "utf-8"
    soup = BeautifulSoup(r.text)
    print(m)
    for item in soup.select("a"):
        if item['href'].endswith('1.gz'):
            downlink = domain + item['href']
            threads.append(threading.Thread(target=download, args=(downlink, )))
    r.close()

print('Threads initialization complited, downloading...')
for t in threads:
    t.start()
    while True:
        if(len(threading.enumerate()) <= threadslimit):
            break
        time.sleep(0.1)

发表评论

电子邮件地址不会被公开。