import ast from dataclasses import dataclass from typing import List import pandas as pd import json ["text", "六十一岁还能办什么保险"] @dataclass class FAQ: title: str sim_questions: List[str] answer: str faq_id: int ori_data = pd.read_csv('baoxianzhidao_filter.csv') data = [] exist_titles = set() for index, row in enumerate(ori_data.iterrows()): row_dict = row[1] title = row_dict['title'] if title not in exist_titles: data.append(FAQ(title=title, answer=row_dict['reply'], sim_questions=[title], faq_id=index)) exist_titles.add(title) from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks pipeline_ins = pipeline(Tasks.faq_question_answering, 'damo/nlp_mgimn_faq-question-answering_chinese-base') bsz = 32 all_sentence_vecs = [] batch = [] sentence_list = [faq.title for faq in data] for i,sent in enumerate(sentence_list): batch.append(sent) if len(batch) == bsz or (i == len(sentence_list)-1 and len(batch)>0): # if i == len(sentence_list)-1 and len(batch)>0: sentence_vecs = pipeline_ins.get_sentence_embedding(batch) all_sentence_vecs.extend(sentence_vecs) batch.clear() import faiss import numpy as np hidden_size = # hidden_size = pipeline_ins.model.bert.config.hidden_size index = faiss.IndexFlatIP(hidden_size) vecs = np.asarray(all_sentence_vecs, dtype='float32') index.add(vecs) from modelscope.outputs import OutputKeys def ask_faq(input, history=[]): # step1: get sentence vector of query query_vec = pipeline_ins.get_sentence_embedding([input])[0] query_vec = np.asarray(query_vec, dtype='float32').reshape([1, -1]) # step2: faq dense retrieval _, indices =, k=30) # step3: build support set support_set = [] for i in indices.tolist()[0]: faq = data[i] support_set.append({"text": faq.title, "label": faq.faq_id, "index": i}) # step4: faq ranking rst = pipeline_ins(input={"query_set": input, "support_set": support_set}) rst = rst[OutputKeys.OUTPUT][0][0] pred_label = rst['label'] pred_score = rst['score'] # get answer by faq_id pred_answer = "" pred_title = "" for faq in data: if faq.faq_id == pred_label: pred_answer = faq.answer pred_title = faq.title break history.append((f'{pred_answer}|(pred_title:{pred_title},pred_score:{pred_score:.3f})')) return history优化这段代码
时间: 2023-06-04 18:04:28 浏览: 240
这段代码是一个Python脚本,用于读取CSV文件中的保险相关问题和答案,构建一个FAQ对象(包含问题、答案、相似问题和FAQ ID),并使用modelscope库中的pipeline进行常见问题解答。其中用到了ast、dataclass、List、pandas和json等Python模块。具体实现过程请参考上述代码。
import numpy as np import matplotlib.pyplot as plt import pandas as pd import seaborn as sns from seaborn.external.kde import gaussian_kde sns.set() from scipy import stats from typing import * df = pd.read_excel("D:\\pythonProject\\data\\冬天.xls") power = df["功率"] #获取一列,用一维数据 power = np.array(power) print(power) import numpy as np from sklearn.neighbors import KernelDensity # 将 DataFrame 转换为 numpy 数组 data = df.to_numpy() # 从DataFrame类型中提取所需的列并将其转换为numpy数组类型 data = np.array(df['功率']) # 使用gaussian_kde函数进行核密度估计 density = gaussian_kde(data) # 生成横坐标 x = np.linspace(min(data), max(data),60) plt.plot(x, density(x)) import numpy as np from scipy import interpolate # 准备数据 x = data y = density(x) # 进行B样条曲线拟合 tck = interpolate.splrep(x, y, k=3, s=0) # 计算拟合曲线的值 x_new = np.linspace(x.min(), x.max(), 500) y_new = interpolate.splev(x_new, tck, der=0) # 保存系数矩阵 np.savez('tck.npz', tck)
解释以下代码import numpy as np from numpy import sqrt, pi, exp from numpy import fft from scipy.constants import (e as e0, epsilon_0 as eps0, h as h, hbar as hbar, electron_mass as m0, c as c0) from scipy.linalg import null_space import scipy.sparse as sparse import scipy.sparse.linalg as splg try: from . import OneDQuantum as onedq except OSError: onedq = None print('C library is not compiled. Features are limited.') from . import Material import copy from typing import List, Tuple, Union
- `import numpy as np`:导入 NumPy 库,并将其命名为 `np`,这是一个常用的科学计算库,提供了大量用于数组和矩阵操作的函数。
- `from numpy import sqrt, pi, exp`:从 NumPy 中导入 `sqrt`、`pi` 和 `exp` 函数,这些函数分别用于计算平方根、圆周率和指数。
- `from numpy import fft`:从 NumPy 中导入 `fft` 模块,该模块提供了快速傅里叶变换(FFT)的函数。
- `from scipy.constants import ...`:从 SciPy 库的 `constants` 模块中导入一些常数,例如电子电荷、真空介电常数、普朗克常数等。
- `from scipy.linalg import null_space`:从 SciPy 库的 `linalg` 模块中导入 `null_space` 函数,用于计算矩阵的零空间。
- `import scipy.sparse as sparse`:导入 SciPy 库的 `sparse` 模块,该模块提供了稀疏矩阵的处理功能。
- `import scipy.sparse.linalg as splg`:导入 SciPy 库的 `sparse.linalg` 模块,该模块提供了稀疏矩阵的线性代数运算函数。
- `try...except...`:尝试导入一个名为 `OneDQuantum` 的模块,如果导入失败,则将变量 `onedq` 设置为 `None`,并打印一条错误信息。这里使用了相对导入(`.`表示当前目录)。
- `from . import Material`:从当前目录中导入名为 `Material` 的模块。
- `import copy`:导入 Python 内置的 `copy` 模块,用于对象的复制操作。
- `from typing import List, Tuple, Union`:从 Python 内置的 `typing` 模块中导入一些类型注解,用于函数参数和返回值的类型提示。