Python编写一个B树类,实现以下功能:读取“电影评分数据.xlsx”的“Split Rows”页,列名分别为“Title”“Overview”“genre”“Vote Average”“Vote Count”,根据电影评分记录中的“Vote Average”属性值构建B树索引。根据B树索引实现电影评分“Vote Average”的范围查询,如果评分“Vote Average”相同,则根据“Vote Count”属性值降序排序,把排序后范围内的电影的所有信息(包括“Title”“Overview”“genre”“Vote Average”“Vote Count”)保存到“result_3.csv”里。例如查询所有评分在5.5-6.7之间的电影
时间: 2023-10-22 10:07:52 浏览: 90
[i][0]:
i += 1
if i < len(x.keys) and k == x.keys[i][0]:
return x.keys[i]
elif x.leaf:
return None
else:
return self.search(k, x.child[i])
else以下是一个使用Python编写的B树类,实现了您所描述的功能:
```python
import pandas as pd
:
return self.search(k, self.root)
def range_query(self, start, end, x=None):
if isinstance(x, Bclass BTreeNode:
def __init__(self, leaf=False):
self.leaf = leaf
self.keys = []
self.childTreeNode):
results = []
i = 0
while i < len(x.keys) and start > x.keys[i][0]:
= []
class BTree:
def __init__(self, t):
self.root = BTreeNode(True)
self.t = t
i += 1
if x.leaf:
while i < len(x.keys) and start <= x.keys[i][0] def insert(self, k):
root = self.root
if len(root.keys) == (2 * self.t) - 1 <= end:
results.append(x.keys[i])
i += 1
elif i < len(x.keys) and start <= x.keys:
temp = BTreeNode()
self.root = temp
temp.child.insert(0, root)
self.split_child(temp, [i][0]:
results.extend(self.range_query(start, end, x.child[i]))
if i < len(x.child):
results.extend0)
self.insert_non_full(temp, k)
else:
self.insert_non_full(root, k)
def insert_non_full(self(self.range_query(start, end, x.child[i]))
return results
else:
return self.range_query(start, end, self.root, x, k):
i = len(x.keys) - 1
if x.leaf:
x.keys.append((None,)
def create_btree(file_path):
# 读取电影评分数据
df = pd.read_excel(file_path, sheet None, None, k))
x.keys.sort(key=lambda tup: (tup[3], -tup[4]), reverse=True)
_name='Split Rows')
movie_records = df[['Title', 'Overview', 'genre', 'Vote Average', 'Vote Count']].values.tolist else:
while i >= 0 and k < x.keys[i][3]:
i -= 1
i += 1()
# 创建B树索引
btree = BTree(t=3) # 设置B树的度为3
if len(x.child[i].keys) == (2 * self.t) - 1:
self.split_child(x, i)
for record in movie_records:
key = (record[3], record[4]) # 评分和评分数量组 if k > x.keys[i][3]:
i += 1
self.insert_non_full(x.child[i], k)
def成键值
btree.insert(key)
return btree
def save_results(results, file_path):
with open(file_path, split_child(self, x, i):
t = self.t
y = x.child[i]
z = BTreeNode(y.leaf 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['Title', ')
x.child.insert(i + 1, z)
x.keys.insert(i, y.keys[t - 1])
z.keys =Overview', 'genre', 'Vote Average', 'Vote Count'])
writer.writerows(results)
if __name__ == '__main__':
b y.keys[t: (2 * t) - 1]
y.keys = y.keys[0: t - 1]
iftree = create_btree('电影评分数据.xlsx')
results = btree.range_query(5.5, 6.7 not y.leaf:
z.child = y.child[t: 2 * t]
y.child = y.child[0: t -)
save_results(results, 'result_3.csv')
```
请确保在运行代码之前安装了以下依赖 1]
def range_query(self, x, min_val, max_val, result):
i = 0
while i <库:
- pandas
- csv
该代码会读取名为"电影评分数据.xlsx"的文件中的"Split len(x.keys) and min_val > x.keys[i][3]:
i += 1
if x.leaf:
while i Rows"页,提取相关列的数据并构建B树索引。然后,它将执行范围查询,找 < len(x.keys) and min_val <= x.keys[i][3] <= max_val:
result.append(x.keys[i])
i +=到评分在指定范围内的电影,并将结果保存在名为"result_3.csv"的文件中。
阅读全文