import time from concurrent import futures def sum_range(start, end): total = 0 for i in range(start, end): total += i return total if __name__ == '__main__': start_time = time.time() with futures.ProcessPoolExecutor(2) as executor: task1 = executor.submit(sum_range,1,50000001) task2 = executor.submit(sum_range,50000001,100000001) result = [task1.result(), task2.result()] total = sum(result) end_time = time.time() print("并行总和为:", total) print("并行用时:", end_time - start_time, "秒") start_time = time.time() result = sum_range(1,100000001) end_time = time.time() print("串行总和为:", result) print("串行用时:", end_time - start_time, "秒")
时间: 2023-11-12 10:09:03 浏览: 62
这是一个Python程序,用于演示并行计算和串行计算的效率比较。程序中定义了一个函数`sum_range`,它会计算从`start`到`end`的整数的总和。在主函数中,我们使用`concurrent.futures`模块中的`ProcessPoolExecutor`来创建一个进程池,然后使用`executor.submit`方法提交两个任务,每个任务计算一半的整数和。最后,我们等待两个任务完成,并将它们的结果相加得到最终的总和。同时我们也计算串行计算的结果和用时。
这个程序可以帮助我们比较并行计算和串行计算的效率,因为并行计算可以同时利用多个CPU核心来加速计算,从而提高计算速度。
相关问题
import time from concurrent import futures #定义一个计算某一范围内整数和的函数 def sum_range(start, end): total = 0 # 遍历范围内的整数并相加 for i in range(start, end): total += i return total if name == 'main': start_time = time.time() # 创建一个进程池,最多同时执行2个进程 with futures.ProcessPoolExecutor(2) as executor: # 将任务1和任务2提交到进程池中并执行 task1 = executor.submit(sum_range,1,50000001) task2 = executor.submit(sum_range,50000001,100000001) # 获取任务1和任务2的结果并相加 result = [task1.result(), task2.result()] total = sum(result) end_time = time.time() # 输出并行计算结果和用时 print("并行总和为:", total) print("并行用时:", end_time - start_time, "秒") start_time = time.time() # 直接调用函数计算整数和 result = sum_range(1,100000001) end_time = time.time() # 输出串行计算结果和用时 print("串行总和为:", result) print("串行用时:", end_time - start_time, "秒")分析上述程序流程
这个程序是一个并行计算整数和的示例,具体流程如下:
1. 导入time和futures模块。
2. 定义一个计算某一范围内整数和的函数sum_range,遍历范围内的整数并相加,返回总和。
3. 在主程序中,获取程序开始时间。
4. 创建一个进程池,最多同时执行2个进程,将任务1和任务2提交到进程池中并执行。
5. 获取任务1和任务2的结果并相加,得到并行计算的总和。
6. 输出并行计算结果和用时。
7. 计算串行计算的整数和,输出串行计算结果和用时。
8. 程序结束。
在这个示例中,使用了Python的concurrent.futures模块实现了并行计算,这个模块提供了线程池和进程池两种并发机制,可以方便地进行并行计算。同时,这个示例也展示了并行计算的速度优势,比串行计算更快。
import requests from bs4 import BeautifulSoup from threading import Thread def crawl_books(start, end): session = requests.Session() for i in range(start, end): url = 'http://search.dangdang.com/?key=%BC%C6%CB%E3%BB%FA&act=input&page_index={}'.format(i) try: response = session.get(url, timeout=10) except requests.exceptions.Timeout: print('Timeout occurred when accessing: ' + url) continue page = response.text soup = BeautifulSoup(page, 'lxml') books = soup.find('ul', class_='bigimg') for book in books.find_all('li'): title = book.find('a', class_='pic').get('title') author = book.find('p', class_='search_book_author').text price = book.find('p', class_='price').find('span', class_='search_now_price').text book_link = 'https:' + book.find('a', class_='pic').get('href') try: response = session.get(book_link, timeout=10) except requests.exceptions.Timeout: print('Timeout occurred when accessing: ' + book_link) continue page = response.text soup = BeautifulSoup(page, 'lxml') comment_num_tag = soup.find('a', class_='review_num') if comment_num_tag: comment_num = comment_num_tag.text else: comment_num = '0' print(title, author, price, comment_num) threads = [] for i in range(1, 101, 10): t = Thread(target=crawl_books, args=(i, i+10)) threads.append(t) t.start() for t in threads: t.join()再优化一次,使评论数量能够得到
可以使用线程池来优化代码,使用`concurrent.futures`模块中的`ThreadPoolExecutor`类来实现线程池。在每个任务中获取评论数量后,将其保存到一个列表中,在所有任务执行完后,统计评论数量的总和。代码如下:
```python
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
def crawl_books(start, end):
session = requests.Session()
comments = []
for i in range(start, end):
url = 'http://search.dangdang.com/?key=%BC%C6%CB%E3%BB%FA&act=input&page_index={}'.format(i)
try:
response = session.get(url, timeout=10)
except requests.exceptions.Timeout:
print('Timeout occurred when accessing: ' + url)
continue
page = response.text
soup = BeautifulSoup(page, 'lxml')
books = soup.find('ul', class_='bigimg')
for book in books.find_all('li'):
title = book.find('a', class_='pic').get('title')
author = book.find('p', class_='search_book_author').text
price = book.find('p', class_='price').find('span', class_='search_now_price').text
book_link = 'https:' + book.find('a', class_='pic').get('href')
try:
response = session.get(book_link, timeout=10)
except requests.exceptions.Timeout:
print('Timeout occurred when accessing: ' + book_link)
continue
page = response.text
soup = BeautifulSoup(page, 'lxml')
comment_num_tag = soup.find('a', class_='review_num')
if comment_num_tag:
comment_num = int(comment_num_tag.text)
else:
comment_num = 0
comments.append(comment_num)
print(title, author, price, comment_num)
return comments
if __name__ == '__main__':
with ThreadPoolExecutor(max_workers=10) as executor:
results = executor.map(crawl_books, range(1, 101, 10))
comments = [num for result in results for num in result]
total_comments = sum(comments)
print('总评论数:', total_comments)
```
这样可以更加高效地爬取数据,并且得到评论数量的总和。
阅读全文