The split process reduced the matrix size as much as possible, but it still eats up too much memory.
Multiprocessing consumes a lot of memory.
import random
import math
import time
import gc
import numpy as np
from multiprocessing import Pool
np.set_printoptions(threshold=np.inf)
random.seed(1)
XX = 0
YY = 1
ZZ = 2
#total particle num
PN = 1200
#---------------------------------------------------
#
# parameter : divide_N
#
# The number of particles per side of a small square
# divided for the splitting process when creating a
# large PN^2 region with all particles PN.
#
# The smaller the setting, the more it will be divided
# into many smaller jobs.
#---------------------------------------------------
divide_N = 50
core = 2
xyzV = [[0 for i in range(3)] for j in range(PN)]
xyzP = np.zeros((PN,3))
xyzF = np.zeros((PN,3))
def wrapper(args):
return calc_XY_start_end(*args)
def find_pair():
global PN
global xyzF
global xyzP
worklist = []
thread = 0
remainder = PN % divide_N
quotient = PN // divide_N
if remainder != 0: #ex PN=11, divide_N=4, remainder=3, quotient=2
quotient = quotient + 1 # 2 -> 3
extra = divide_N - remainder # 1
X_st = 0
X_ed = 0
Y_st = 0
Y_ed = 0
Thead_num = 0
sum_f = np.zeros((PN,PN,3))
for Xn in range(0,quotient):
for Yn in range(0,Xn+1):
worklist.append([Xn, Yn, quotient, remainder, divide_N, extra])
Thead_num = Thead_num + 1
#----------------------------------------
#local_force = calc_XY_start_end(Xn, Yn, quotient, remainder, divide_N, extra)
#----------------------------------------
p = Pool(core)
#start thread. results is callback in array.
callback = p.map(wrapper, worklist)
p.close()
for Th in range(0,Thead_num):
xn_l = callback[Th][0]
yn_l = callback[Th][1]
xxst = callback[Th][2]
xxed = callback[Th][3]
yyst = callback[Th][4]
yyed = callback[Th][5]
local_force = callback[Th][6]
if xxst != 0:
r2 = np.zeros((xxst,divide_N,3))
local_force = np.insert(local_force ,0,r2,axis=1)
if xxed+1 != PN:
r2 = np.zeros((PN - xxed - 1,divide_N,3))
local_force = np.insert(local_force ,xxed + 1,r2,axis=1)
if yyst != 0:
r2 = np.zeros((yyst,PN,3))
local_force = np.insert(local_force ,0,r2,axis=0)
if yyed+1 != PN:
r2 = np.zeros((PN - yyed - 1,PN,3))
local_force = np.insert(local_force ,yyed + 1,r2,axis=0)
del r2
if xn_l != yn_l:
sum_f = sum_f + local_force
sum_f = sum_f - local_force.transpose(1,0,2)
else:
sum_f = sum_f + local_force
del local_force
gc.collect()
#print (sum_f)
print("=====================================")
mmm = sum_f.sum(axis=1)
xyzF = xyzF + mmm
def calc_XY_start_end(Xn, Yn, quotient, remainder, divide_N, extra):
global PN
global xyzP
if Xn != quotient-1 and remainder != 0 or remainder == 0:
X_st = Xn*divide_N
X_ed = (Xn+1)*divide_N-1
else:
X_st = Xn*divide_N - (divide_N - remainder)
X_ed = (Xn+1)*divide_N - (divide_N - remainder) -1
if Yn != quotient-1 and remainder != 0 or remainder == 0:
Y_st = Yn*divide_N
Y_ed = (Yn+1)*divide_N-1
else:
Y_st = Yn*divide_N - (divide_N - remainder)
Y_ed = (Yn+1)*divide_N -(divide_N - remainder) -1
Xa = np.arange(X_st,X_ed+1)
Ya = np.arange(Y_st,Y_ed+1)
mx1,my1 = np.meshgrid(Xa,Ya)
local_xyz = xyzP[mx1]-xyzP[my1]
distance_a = np.linalg.norm(local_xyz, axis=2)
tmp_d = distance_a[:,:,np.newaxis]
tmp_f = local_xyz/tmp_d
tmp_f = np.nan_to_num(tmp_f,0)
#extra clear
if remainder != 0:
if Xn + 1 == quotient:
for dd in range(0,extra):
tmp_f[:,dd] = 0
if Yn + 1 == quotient:
for dd in range(0,extra):
tmp_f[[dd]] = 0
return Xn,Yn,X_st,X_ed,Y_st,Y_ed,tmp_f
def init_lattice():
global xyzF
global xyzP
pnum = 0
while pnum < PN:
xyzP[pnum][XX] = random.uniform(-1,1)
xyzP[pnum][YY] = random.uniform(-1,1)
xyzP[pnum][ZZ] = random.uniform(-1,1)
xyzF[pnum][XX] = random.uniform(-1,1)
xyzF[pnum][YY] = random.uniform(-1,1)
xyzF[pnum][ZZ] = random.uniform(-1,1)
pnum += 1
def results_sum():
global xyzF
pnum = 0
total_F = 0
while pnum < PN:
total_F = total_F + xyzF[pnum][XX]
total_F = total_F + xyzF[pnum][YY]
total_F = total_F + xyzF[pnum][ZZ]
pnum += 1
print (total_F)
if __name__ == "__main__":
init_lattice()
find_pair()
results_sum()
