使用python编写基于紫外-可见光谱的水质COD检测代码,读取三份excel文件,分别为10mg/L,15mg/L,20mg/L的标准溶液,excel文件中第一列数据为波长,第二列为透过率,对数据通过导数法进行预处理,得到一阶导数谱图,并绘制在同一个折线图上,再计算一阶导数谱的排列熵,通过排列熵算法进行特征波长的提取,最后,利用提取的特征波长处的特征值与对应的COD浓度进行PLS建模,得到COD的预测模型
时间: 2023-10-05 09:09:51 浏览: 96
首先,需要安装并导入所需的库,包括`pandas`、`numpy`、`matplotlib`、`scipy`和`sklearn`。可以使用以下代码安装和导入这些库:
```python
!pip install pandas numpy matplotlib scipy scikit-learn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
from sklearn.cross_decomposition import PLSRegression
```
接下来,读取并处理excel数据,得到一阶导数谱图。可以使用以下代码读取数据并绘制谱图:
```python
# 读取excel数据
data1 = pd.read_excel('10mg_L.xlsx')
data2 = pd.read_excel('15mg_L.xlsx')
data3 = pd.read_excel('20mg_L.xlsx')
# 获取波长和透过率数据
wavelength = data1.iloc[:, 0].values
tr1 = data1.iloc[:, 1].values
tr2 = data2.iloc[:, 1].values
tr3 = data3.iloc[:, 1].values
# 对透过率数据进行预处理,得到一阶导数谱图
def get_derivative_spectrum(tr):
# 对透过率数据进行Savitzky-Golay滤波
tr_smoothed = savgol_filter(tr, 31, 3)
# 计算一阶导数
dy = np.diff(tr_smoothed)
dx = np.diff(wavelength)
dy_dx = dy / dx
# 对一阶导数进行Savitzky-Golay滤波
dy_dx_smoothed = savgol_filter(dy_dx, 31, 3)
return dy_dx_smoothed
ds1 = get_derivative_spectrum(tr1)
ds2 = get_derivative_spectrum(tr2)
ds3 = get_derivative_spectrum(tr3)
# 绘制一阶导数谱图
plt.plot(wavelength[1:], ds1, label='10mg/L')
plt.plot(wavelength[1:], ds2, label='15mg/L')
plt.plot(wavelength[1:], ds3, label='20mg/L')
plt.xlabel('Wavelength (nm)')
plt.ylabel('First Derivative')
plt.legend()
plt.show()
```
接下来,计算一阶导数谱的排列熵,并通过排列熵算法提取特征波长。可以使用以下代码实现:
```python
# 计算一阶导数谱的排列熵
def get_permutation_entropy(ds):
# 对一阶导数谱进行离散化
ds_discrete = np.digitize(ds, np.histogram(ds, bins=20)[1])
# 计算排列熵
n = len(ds_discrete)
pe = 0
for m in range(2, 7):
count = {}
for i in range(n - m + 1):
seg = tuple(ds_discrete[i:i+m])
if seg in count:
count[seg] += 1
else:
count[seg] = 1
pe_m = 0
for seg in count:
p = count[seg] / (n - m + 1)
pe_m -= p * np.log(p)
pe += pe_m
return pe
# 提取特征波长
pe1 = get_permutation_entropy(ds1)
pe2 = get_permutation_entropy(ds2)
pe3 = get_permutation_entropy(ds3)
f1 = wavelength[np.argmin(np.abs(ds1 - pe1))]
f2 = wavelength[np.argmin(np.abs(ds2 - pe2))]
f3 = wavelength[np.argmin(np.abs(ds3 - pe3))]
print('Feature wavelengths:', f1, f2, f3)
```
最后,利用特征波长处的特征值与对应的COD浓度进行PLS建模,得到COD的预测模型。可以使用以下代码实现:
```python
# 读取COD浓度数据
cod1 = 10
cod2 = 15
cod3 = 20
# 构建PLS模型
X_train = np.array([ds1[np.argmin(np.abs(wavelength - f1))], ds2[np.argmin(np.abs(wavelength - f2))], ds3[np.argmin(np.abs(wavelength - f3))]]).reshape(3, 1)
y_train = np.array([cod1, cod2, cod3]).reshape(3, 1)
pls = PLSRegression(n_components=1)
pls.fit(X_train, y_train)
# 预测COD浓度
X_test = np.array([ds1[np.argmin(np.abs(wavelength - f1))], ds2[np.argmin(np.abs(wavelength - f2))], ds3[np.argmin(np.abs(wavelength - f3))]]).reshape(3, 1)
y_pred = pls.predict(X_test)
print('Predicted COD concentrations:', y_pred[:, 0])
```
这样就完成了基于紫外-可见光谱的水质COD检测代码的编写。完整代码如下:
```python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
from sklearn.cross_decomposition import PLSRegression
# 读取excel数据
data1 = pd.read_excel('10mg_L.xlsx')
data2 = pd.read_excel('15mg_L.xlsx')
data3 = pd.read_excel('20mg_L.xlsx')
# 获取波长和透过率数据
wavelength = data1.iloc[:, 0].values
tr1 = data1.iloc[:, 1].values
tr2 = data2.iloc[:, 1].values
tr3 = data3.iloc[:, 1].values
# 对透过率数据进行预处理,得到一阶导数谱图
def get_derivative_spectrum(tr):
# 对透过率数据进行Savitzky-Golay滤波
tr_smoothed = savgol_filter(tr, 31, 3)
# 计算一阶导数
dy = np.diff(tr_smoothed)
dx = np.diff(wavelength)
dy_dx = dy / dx
# 对一阶导数进行Savitzky-Golay滤波
dy_dx_smoothed = savgol_filter(dy_dx, 31, 3)
return dy_dx_smoothed
ds1 = get_derivative_spectrum(tr1)
ds2 = get_derivative_spectrum(tr2)
ds3 = get_derivative_spectrum(tr3)
# 绘制一阶导数谱图
plt.plot(wavelength[1:], ds1, label='10mg/L')
plt.plot(wavelength[1:], ds2, label='15mg/L')
plt.plot(wavelength[1:], ds3, label='20mg/L')
plt.xlabel('Wavelength (nm)')
plt.ylabel('First Derivative')
plt.legend()
plt.show()
# 计算一阶导数谱的排列熵
def get_permutation_entropy(ds):
# 对一阶导数谱进行离散化
ds_discrete = np.digitize(ds, np.histogram(ds, bins=20)[1])
# 计算排列熵
n = len(ds_discrete)
pe = 0
for m in range(2, 7):
count = {}
for i in range(n - m + 1):
seg = tuple(ds_discrete[i:i+m])
if seg in count:
count[seg] += 1
else:
count[seg] = 1
pe_m = 0
for seg in count:
p = count[seg] / (n - m + 1)
pe_m -= p * np.log(p)
pe += pe_m
return pe
# 提取特征波长
pe1 = get_permutation_entropy(ds1)
pe2 = get_permutation_entropy(ds2)
pe3 = get_permutation_entropy(ds3)
f1 = wavelength[np.argmin(np.abs(ds1 - pe1))]
f2 = wavelength[np.argmin(np.abs(ds2 - pe2))]
f3 = wavelength[np.argmin(np.abs(ds3 - pe3))]
print('Feature wavelengths:', f1, f2, f3)
# 读取COD浓度数据
cod1 = 10
cod2 = 15
cod3 = 20
# 构建PLS模型
X_train = np.array([ds1[np.argmin(np.abs(wavelength - f1))], ds2[np.argmin(np.abs(wavelength - f2))], ds3[np.argmin(np.abs(wavelength - f3))]]).reshape(3, 1)
y_train = np.array([cod1, cod2, cod3]).reshape(3, 1)
pls = PLSRegression(n_components=1)
pls.fit(X_train, y_train)
# 预测COD浓度
X_test = np.array([ds1[np.argmin(np.abs(wavelength - f1))], ds2[np.argmin(np.abs(wavelength - f2))], ds3[np.argmin(np.abs(wavelength - f3))]]).reshape(3, 1)
y_pred = pls.predict(X_test)
print('Predicted COD concentrations:', y_pred[:, 0])
```
阅读全文