Each result returned by the distributed processing is not made into a PN-sized matrix, but is placed as is. This lightens the processing.
import random
import math
import time
import gc
import numpy as np
from multiprocessing import Pool
np.set_printoptions(threshold=np.inf)
random.seed(1)
XX = 0
YY = 1
ZZ = 2
core = 6
#total particle num
PN = 10000
#---------------------------------------------------
#
# parameter : divide_N
#
# The number of particles per side of a small square
# divided for the splitting process when creating a
# large PN^2 region with all particles PN.
#
# The smaller the setting, the more it will be divided
# into many smaller jobs.
#---------------------------------------------------
divide_N = 1000
xyzV = [[0 for i in range(3)] for j in range(PN)]
xyzP = np.zeros((PN,3))
xyzF = np.zeros((PN,3))
def wrapper(args):
return calc_XY_start_end(*args)
def find_pair():
global core
global PN
global xyzF
global xyzP
worklist = []
thread = 0
remainder = PN % divide_N
quotient = PN // divide_N
if remainder != 0: #ex PN=11, divide_N=4, remainder=3, quotient=2
quotient = quotient + 1 # 2 -> 3
extra = divide_N - remainder # 1
X_st = 0
X_ed = 0
Y_st = 0
Y_ed = 0
Thead_num = 0
sum_f = np.zeros((PN,PN,3))
for Xn in range(0,quotient):
for Yn in range(0,Xn+1):
worklist.append([Xn, Yn, quotient, remainder, divide_N, extra])
Thead_num = Thead_num + 1
#----------------------------------------
#local_force = calc_XY_start_end(Xn, Yn, quotient, remainder, divide_N, extra)
#----------------------------------------
#start thread. results is callback in array.
p = Pool(core)
callback = p.map(wrapper, worklist)
p.close()
t1 = 0
for Th in range(0,Thead_num):
xn_l = callback[Th][0]
yn_l = callback[Th][1]
xxst = callback[Th][2]
xxed = callback[Th][3]
yyst = callback[Th][4]
yyed = callback[Th][5]
local_force = callback[Th][6]
time_sta = time.time()
sb = local_force.shape
ten = -1 * local_force.transpose(1,0,2)
tensb = ten.shape
sum_f[yyst:yyst+sb[0], xxst:xxst+sb[1]] += local_force
if xn_l != yn_l:
sum_f[xxst:xxst+tensb[0], yyst:yyst+tensb[1]] += ten
time_end = time.time()
t1 =t1 + (time_end - time_sta)
print ("=====t1=",t1)
print(np.sum(sum_f))
#mmm = sum_f.sum(axis=1)
#xyzF = xyzF + mmm
def calc_XY_start_end(Xn, Yn, quotient, remainder, divide_N, extra):
global PN
global xyzP
if Xn != quotient-1 and remainder != 0 or remainder == 0:
X_st = Xn*divide_N
X_ed = (Xn+1)*divide_N-1
else:
X_st = Xn*divide_N - (divide_N - remainder)
X_ed = (Xn+1)*divide_N - (divide_N - remainder) -1
if Yn != quotient-1 and remainder != 0 or remainder == 0:
Y_st = Yn*divide_N
Y_ed = (Yn+1)*divide_N-1
else:
Y_st = Yn*divide_N - (divide_N - remainder)
Y_ed = (Yn+1)*divide_N -(divide_N - remainder) -1
Xa = np.arange(X_st,X_ed+1)
Ya = np.arange(Y_st,Y_ed+1)
mx1,my1 = np.meshgrid(Xa,Ya)
local_xyz = xyzP[mx1]-xyzP[my1]
distance_a = np.linalg.norm(local_xyz, axis=2)
tmp_d = distance_a[:,:,np.newaxis]
tmp_f = local_xyz/tmp_d
tmp_f = np.nan_to_num(tmp_f,0)
#extra clear
if remainder != 0:
if Xn + 1 == quotient:
for dd in range(0,extra):
tmp_f[:,dd] = 0
if Yn + 1 == quotient:
for dd in range(0,extra):
tmp_f[[dd]] = 0
return Xn,Yn,X_st,X_ed,Y_st,Y_ed,tmp_f
def init_lattice():
global xyzF
global xyzP
pnum = 0
while pnum < PN:
xyzP[pnum][XX] = random.uniform(-1,1)
xyzP[pnum][YY] = random.uniform(-1,1)
xyzP[pnum][ZZ] = random.uniform(-1,1)
xyzF[pnum][XX] = random.uniform(-1,1)
xyzF[pnum][YY] = random.uniform(-1,1)
xyzF[pnum][ZZ] = random.uniform(-1,1)
pnum += 1
def results_sum():
global xyzF
pnum = 0
total_F = 0
# while pnum < PN:
# total_F = total_F + xyzF[pnum][XX]
# total_F = total_F + xyzF[pnum][YY]
# total_F = total_F + xyzF[pnum][ZZ]
# pnum += 1
#
# print (total_F)
if __name__ == "__main__":
init_lattice()
find_pair()
results_sum()
