import pandas as pd import networkx as nx import os from multiprocessing import Pool def process_csv(csv_path): df = pd.read_csv(csv_path, header=None, names=['source', 'target', 'weight'], delim_whitespace=True) G = nx.Graph() for index, row in df.iterrows(): G.add_edge(row['source'], row['target'], weight=row['weight']) dist_matrix = dict(nx.all_pairs_dijkstra_path_length(G)) with open(os.path.splitext(csv_path)[0] + '.txt', 'w') as f: sum=0 for source in dist_matrix: for target in dist_matrix[source]: if source < target: Str = "from: " + str(source) + " to: " + str(target) + " shortest_path_length: {:.2f}".format(dist_matrix[source][target]) f.write(Str + '\n') sum+=1/dist_matrix[source][target] N = G.number_of_nodes() ret='E(G)='+str(sum/(N*(N-1))) print(ret) f.write(ret + '\n') if __name__ == '__main__': folder_path = r'D:\2012POI\大连市2012_网络' csv_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.csv')] pool = Pool() pool.map(process_csv, csv_files) pool.close() pool.join(),帮我解决结果出现E(G)=inf的问题,并给出完整代码
时间: 2023-12-29 09:02:09 浏览: 154
快速解决pandas.read_csv()乱码的问题
5星 · 资源好评率100%
这个问题可能是因为在计算 E(G) 时,某些节点之间的最短路径长度为 0。这可能是因为在 csv 文件中存在自环,即源节点和目标节点是同一个节点,或者两个节点之间没有边连接。
可以通过在计算 E(G) 之前先检查图中是否有自环或孤立节点来解决这个问题。如果出现这些情况,可以通过删除自环或孤立节点来处理。
以下是修改后的代码,包括检查自环和孤立节点的代码:
```python
import pandas as pd
import networkx as nx
import os
from multiprocessing import Pool
def process_csv(csv_path):
df = pd.read_csv(csv_path, header=None, names=['source', 'target', 'weight'], delim_whitespace=True)
# Remove self-loops and isolated nodes
df = df[df['source'] != df['target']]
nodes = set(df['source']) | set(df['target'])
G = nx.Graph()
G.add_nodes_from(nodes)
for index, row in df.iterrows():
G.add_edge(row['source'], row['target'], weight=row['weight'])
dist_matrix = dict(nx.all_pairs_dijkstra_path_length(G))
with open(os.path.splitext(csv_path)[0] + '.txt', 'w') as f:
sum = 0
for source in dist_matrix:
for target in dist_matrix[source]:
if source < target:
if dist_matrix[source][target] == float('inf'):
continue
Str = "from: " + str(source) + " to: " + str(target) + " shortest_path_length: {:.2f}".format(dist_matrix[source][target])
f.write(Str + '\n')
sum += 1 / dist_matrix[source][target]
N = G.number_of_nodes()
if N <= 1:
ret = 'E(G)=0'
else:
ret = 'E(G)={:.4f}'.format(sum / (N * (N - 1)))
print(ret)
f.write(ret + '\n')
if __name__ == '__main__':
folder_path = r'D:\2012POI\大连市2012_网络'
csv_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.csv')]
pool = Pool()
pool.map(process_csv, csv_files)
pool.close()
pool.join()
```
这个代码首先检查了 csv 文件中是否有自环,如果有自环就删除。然后检查了是否有孤立节点,如果有孤立节点,就在图中添加这些节点,并且不会增加 E(G)。最后,在计算 E(G) 时,跳过了最短路径长度为无穷大的节点对。
另外,修改后的代码还增加了对节点数量少于 2 的情况的处理。如果只有一个节点,则没有边,E(G) 等于 0。如果有两个节点,则只有一条边,E(G) 等于这条边的权重。
阅读全文