import numpy as np import pandas as pd def read_asc(filepath): usecols = [0, 4] # 仅读取第0列和第4列 chunksize = 100000 # 每次读取100000行数据 for chunk in pd.read_csv(filepath, skiprows=4, encoding="gbk", engine='python', sep=' ', delimiter=None, index_col=False, header=None, skipinitialspace=True, usecols=usecols, chunksize=chunksize): file = np.array(chunk) data = read_message(file) filter_step_size(data) def read_message(file): mask = file[:, 1] == "107" data = file[mask] return data def filter_step_size(data): diff = np.diff(data[:, 0].astype(float)) # 将第0列转换为浮点数类型 mask = np.logical_and(0.090 < diff, diff < 0.110) success_sum = np.count_nonzero(mask) fail_sum = len(mask) - success_sum result = np.column_stack((data[:-1, 0], data[1:, 0], diff)) result = result[~mask] print("报文通过数: {}".format(success_sum)) print("报文未通过数: {}".format(fail_sum)) print("未通过前一项值:未通过后一项值:差值:") print(result) if __name__ == '__main__': read_asc("E:\\package\\107(2).asc") 增加条件,在读取asc文件后,读取第四列="Rx",第五列="1a7"的第一列数据
时间: 2024-04-15 21:27:05 浏览: 109
你可以在`read_message`函数中添加一个条件来筛选第四列为"Rx"和第五列为"1a7"的数据。修改后的代码如下:
```python
def read_message(file):
mask = np.logical_and(file[:, 3] == "Rx", file[:, 4] == "1a7")
data = file[mask]
return data
```
这样,`read_message`函数将会返回第四列为"Rx"和第五列为"1a7"的数据。
相关问题
import numpy as np import pandas as pd def read_asc(filepath): usecols = [0, 4] # 仅读取第0列和第4列 chunksize = 100000 # 每次读取100000行数据 for chunk in pd.read_csv(filepath, skiprows=4, encoding="gbk", engine='python', sep=' ', delimiter=None, index_col=False, header=None, skipinitialspace=True, usecols=usecols, chunksize=chunksize): file = np.array(chunk) data = read_message(file) filter_step_size(data) def read_message(file): mask = file[:, 1] == "107" data = file[mask] return data def filter_step_size(data): diff = np.diff(data[:, 0].astype(float)) # 将第0列转换为浮点数类型 mask = np.logical_and(0.090 < diff, diff < 0.110) success_sum = np.count_nonzero(mask) fail_sum = len(mask) - success_sum result = np.column_stack((data[:-1, 0], data[1:, 0], diff)) result = result[~mask] print("报文通过数: {}".format(success_sum)) print("报文未通过数: {}".format(fail_sum)) print("未通过前一项值:未通过后一项值:差值:") print(result) if __name__ == '__main__': read_asc("E:\\package\\107(2).asc") 增加条件,将读取第0列和第四列,并读取第四列值为107的数据。改为读取第0列,第三列和第四列,并读取第三列为Rx且第四列为107的数据
修改后的代码如下:
```python
import numpy as np
import pandas as pd
def read_asc(filepath):
usecols = [0, 3, 4] # 仅读取第0列、第3列和第4列
chunksize = 100000 # 每次读取100000行数据
for chunk in pd.read_csv(filepath, skiprows=4, encoding="gbk", engine='python', sep=' ', delimiter=None,
index_col=False, header=None, skipinitialspace=True, usecols=usecols, chunksize=chunksize):
file = np.array(chunk)
data = read_message(file)
filter_step_size(data)
def read_message(file):
mask = np.logical_and(file[:, 2] == "Rx", file[:, 3] == "107") # 第三列为Rx且第四列为107
data = file[mask]
return data
def filter_step_size(data):
diff = np.diff(data[:, 0].astype(float)) # 将第0列转换为浮点数类型
mask = np.logical_and(0.090 < diff, diff < 0.110)
success_sum = np.count_nonzero(mask)
fail_sum = len(mask) - success_sum
result = np.column_stack((data[:-1, 0], data[1:, 0], diff))
result = result[~mask]
print("报文通过数: {}".format(success_sum))
print("报文未通过数: {}".format(fail_sum))
print("未通过前一项值:未通过后一项值:差值:")
print(result)
if __name__ == '__main__':
read_asc("E:\\package\\107(2).asc")
```
这样修改后的代码会读取第0列、第3列和第4列,并且只读取第三列为"Rx"且第四列为"107"的数据。
import numpy as np import pandas as pd def read_asc(filepath): usecols = [0, 3, 4] # 仅读取第0列和第4列 chunksize = 100000 # 每次读取100000行数据 for chunk in pd.read_csv(filepath, skiprows=4, encoding="gbk", engine='python', sep=' ', delimiter=None, index_col=False, header=None, skipinitialspace=True, usecols=usecols, chunksize=chunksize): file = np.array(chunk) data = read_message(file) filter_step_size(data) def read_message(file): mask = np.logical_and(file[:, 3] == "Rx", file[:, 4] == "1a7") data = file[mask] return data def filter_step_size(data): diff = np.diff(data[:, 0].astype(float)) # 将第0列转换为浮点数类型 mask = np.logical_and(0.090 < diff, diff < 0.110) success_sum = np.count_nonzero(mask) fail_sum = len(mask) - success_sum result = np.column_stack((data[:-1, 0], data[1:, 0], diff)) result = result[~mask] print("报文通过数: {}".format(success_sum)) print("报文未通过数: {}".format(fail_sum)) print("未通过前一项值:未通过后一项值:差值:") print(result) if __name__ == '__main__': read_asc("E:\\package\\107(2).asc") 这段代码报错”index 3 is out of bounds for axis 1 with size 3“,修改优化
这个错误是因为你尝试访问一个超出数组索引范围的列。根据你的代码,`usecols`列表中只包含了0、3、4三个列的索引,但是`file`数组中的列索引是从0开始的,所以你需要将`usecols`中的索引值减去1。
修改后的代码如下:
```python
def read_asc(filepath):
usecols = [1, 4, 5] # 仅读取第0列和第4列
chunksize = 100000 # 每次读取100000行数据
for chunk in pd.read_csv(filepath, skiprows=4, encoding="gbk", engine='python', sep=' ', delimiter=None,
index_col=False, header=None, skipinitialspace=True, usecols=usecols, chunksize=chunksize):
file = np.array(chunk)
data = read_message(file)
filter_step_size(data)
```
这样修改后,`usecols`列表中的索引值就与实际的列索引对应了。
阅读全文