python默认单线程执行代码。多线程可减少程序所用时间
for iteratee in iterator:
function(iteratee)
如果不关心迭代器
from multiprocessing.dummy import Pool
with Pool(threads_num) as p:
p.map(function, iterator)
线程并发进行,执行顺序与迭代器
import time
import numpy as np
from multiprocessing.dummy import Pool
np.random.seed(0)
iterator = list('abcde')
def pause_print_return(iteratee):
time.sleep(np.random.rand())
print(iteratee)
return iteratee
# %%
# foo
with Pool(16) as p:
return_result = p.map(pause_print_return, iterator)
# %%
# bar
result = []
for iteratee in iterator:
return_result.append(pause_print_return(iteratee))
| code block | execute time | output | return_result |
|---|---|---|---|
| foo | 726ms | edacb | ['a', 'b', 'c', 'd', 'e'] |
| bar | 2.88s | abcde | ['a', 'b', 'c', 'd', 'e'] |
通过,执行顺序与迭代器
应用1:爬虫
import requests
import pandas as pd
from multiprocessing.dummy import Pool
pages = np.arange(20) + 1
def crawler(page : int) -> pd.DataFrame:
html = requests.get(\
f'http://stock.finance.sina.com.cn/stock/go.php/vReport_List/kind/lastest/index.phtml?p={page}')
dataframe = pd.read_html(html.text)[0].dropna()
return dataframe
with Pool(16) as p:
result = p.map(crawler, pages)
result = pd.concat(result).set_index('发布日期')
应用2:网络扫描
import telnetlib
from multiprocessing.dummy import Pool
def get_ip_status(ip):
server = telnetlib.Telnet()
port = 8
try:
server.open(ip, port)
print(f'{ip} port {port} is open')
finally:
server.close()
host = ['122.6.186.'+str(i) for i in range(256)]
with Pool(16) as p:
p.map(get_ip_status, host)
除此,更优雅的多线程工具是
于2022-08-13修改