# coding=utf-8
import math
import multiprocessing
import time
import scipy.stats as stats
def calculate2(i, X, all_glb, data_array_glb):
all = all_glb.value
result = []
for j in range(i + 1, all):
x = X
y = data_array_glb[j]
if math.fsum(x) == 0 or math.fsum(y) == 0:
continue
corr, p = stats.spearmanr(x, y)
if p > 0.05:
continue
result.append([i + 1, j + 1, corr, p])
return result
if __name__ == "__main__":
multiprocessing.freeze_support()
input_file = 'D:/MASTER2016/5.CUDA/data-ID-top30-kv3.txt'
output_file = 'D:/MASTER2016/5.CUDA/result-python.txt'
print '>> loading ...'
start = time.clock()
data = open(input_file)
data_array = []
for line in data:
data_array.append(map(int, line.strip().split(' ')))
data.close()
print time.clock()-start, 's'
print '>> calculating ...'
results = []
pool_size = 8
pool = multiprocessing.Pool(processes=pool_size)
all = len(data_array)
manager = multiprocessing.Manager()
all_share = manager.Value('i', int(all))
data_array_share = manager.list(data_array)
for i in range(all):
data_X = data_array[i]
results.append(pool.apply_async(calculate2, args=(i,
data_X, all_share, data_array_share)))
pool.close()
pool.join()
print time.clock() - start, 's'
data_array = None
print '>> saving ...'
data2 = open(output_file, 'w')
for res in results:
temp_list = res.get()
for temp in temp_list:
data2.write('X'+str(temp[0])+'\t'+'X'+str(temp[1])
+'\t'+str(temp[2])+'\t'+str(temp[3])+'\n')
print time.clock()-start, 's'
data2.close()