import csvfrom xml.etree.ElemenTree import Element, ElementTreeimport requestsfrom StringIO import StringIOfrom xml_pretty import prettydef download(url): response = requests.get(url,timeout=3) if response.ok: return StringIO(response.content)def csvToxml(scsv,fxml): reader = csv.reader(scsv) header = reader.next() headers = map(lambda h: h.replace( , ),headers) root = Element("Data") for row in reader: eRow = Element("Row") root.append(eRow) for tag,text in zip(headers,row): e = Element(tag) e.text = text eRow.append(e) pretty(root) et = ElementTree(root) et.write(fxml)def handle(sid): print('Download...(%d)' % sid) url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz' url %= str(sid).rjust(6,'0') rf = download(url) if rf is None:return print('convert to xml...(%d)' % sid) fname = str(sid).rjust(6,'0') +'.xml' with open(fname,'wb') as wf: csvToxml(rf,wf)# 方法一from threading import Threadt = Thread(target=handle,args=(1,)) # 创建一个线程对象,并处理第一支股票t.start # 执行线程# 方法二class MyThread(Thread): def __init__(self,sid): Thread.__init__(self) # 调用父类的构造器 self.sid = sid def run(self): handle(self.sid)threads = []for i in xrange(1,11): t = MyThread(i) threads.append(t) t.start()for t in threads: t.join() # 阻塞函数等待子线程的退出,如果run函数没有执行完主线程函数不会退出,即下面没有打印print('main thread')# io型操作,相当于超市订货,例如上面的download操作# cpu型操作,相当于超市货物搬运,例如csv转换xml文件# 在python中不适合于用cpu密集型操作,原因是global interpreter lock,全局解释器锁,python中的线程只适合处理io型的操作if __name__ == '__main__': url = 'http://table.finance.yahoo.com/table.csv?s=000001.sz' rf = download(url) if rf: with open('000001.xml',wb) as wf: csvToxml(rf,wf)=================================================================import requestsimport base64from io import StringIOimport csvfrom xml.etree.ElementTree import ElementTree, Element, SubElementapikey = 'OjZlY2MzYTQwNGVlMTI3Y2VkYjMyYTZiNzJiYzdlOTFk'def download_csv(page_number): print('download csv data [page=%s]' % page_number) url = "https://api.intrinio.com/prices.csv?api_key=OjZlY2MzYTQwNGVlMTI3Y2VkYjMyYTZiNzJiYzdlOTFk&identifier=AAPL&page_size=20&page_number=%s&start_date=2017-09-28&end_date=2020-09-28" % page_number # auth = b'Basic ' + base64.b64encode(b'%s' % api_key) # headers = {'Authorization' : auth} response = requests.get(url) if response.ok: return StringIO(response.text)def csv_to_xml(csv_file, xml_path): print('Convert csv data to %s' % xml_path) reader = csv.reader(csv_file) headers = next(reader) root = Element('Data') root.text = 'nt' root.tail = 'n' for row in reader: book = SubElement(root, 'Row') book.text = 'ntt' book.tail = 'nt' for tag, text in zip(headers, row): e = SubElement(book, tag) e.text = text e.tail = 'ntt' e.tail = 'nt' ElementTree(root).write(xml_path, encoding='utf8')def download_and_save(page_number, xml_path): # IO csv_file = None while not csv_file: csv_file = download_csv(page_number) # CPU csv_to_xml(csv_file, 'data%s.xml' % page_number)from threading import Threadclass MyThread(Thread): def __init__(self, page_number, xml_path): super().__init__() self.page_number = page_number self.xml_path = xml_path def run(self): download_and_save(self.page_number, self.xml_path)if __name__ == '__main__': import time t0 = time.time() thread_list = [] for i in range(1, 6): t = MyThread(i, 'data%s.xml' % i) t.start() thread_list.append(t) for t in thread_list: t.join() # for i in range(1, 6): # download_and_save(i, 'data%s.xml' % i) print(time.time() - t0) print('main thread end.')
|
|
2020-11-10 15:18:20
评论
举报
|
|
|