The process of brute force calculation by Coulomb force with multiprocessing and numpy. TEST2

The split process reduced the matrix size as much as possible, but it still eats up too much memory.
Multiprocessing consumes a lot of memory.

import random
import math
import time
import gc
import numpy as np

from multiprocessing import Pool

np.set_printoptions(threshold=np.inf)
random.seed(1)

XX = 0
YY = 1
ZZ = 2

#total particle num
PN = 1200
#---------------------------------------------------
#
# parameter : divide_N
#
# The number of particles per side of a small square 
# divided for the splitting process when creating a 
# large PN^2 region with all particles PN.
#
# The smaller the setting, the more it will be divided
# into many smaller jobs.
#---------------------------------------------------
divide_N = 50
core = 2
xyzV = [[0 for i in range(3)] for j in range(PN)]
xyzP = np.zeros((PN,3))
xyzF = np.zeros((PN,3))

def wrapper(args):
  return calc_XY_start_end(*args)

def find_pair():
  global PN
  global xyzF
  global xyzP

  worklist = []
  thread   = 0

  remainder = PN %  divide_N
  quotient  = PN // divide_N

  if remainder != 0:                 #ex PN=11, divide_N=4, remainder=3, quotient=2
      quotient = quotient + 1        #   2 -> 3
  extra = divide_N - remainder       #   1

  X_st = 0
  X_ed = 0
  Y_st = 0
  Y_ed = 0

  Thead_num = 0
  sum_f = np.zeros((PN,PN,3))

  for Xn in range(0,quotient):
    for Yn in range(0,Xn+1):

      worklist.append([Xn, Yn, quotient, remainder, divide_N, extra])
      Thead_num = Thead_num + 1
      #----------------------------------------
      #local_force = calc_XY_start_end(Xn, Yn, quotient, remainder, divide_N, extra)
      #----------------------------------------

  p = Pool(core)

  #start thread. results is callback in array.
  callback = p.map(wrapper, worklist)
  p.close()


  for Th in range(0,Thead_num):

    xn_l = callback[Th][0]
    yn_l = callback[Th][1]
    xxst = callback[Th][2]
    xxed = callback[Th][3]
    yyst = callback[Th][4]
    yyed = callback[Th][5]
    local_force = callback[Th][6]

    if xxst != 0:
      r2 = np.zeros((xxst,divide_N,3))
      local_force        = np.insert(local_force     ,0,r2,axis=1)
    if xxed+1 != PN:
      r2 = np.zeros((PN - xxed - 1,divide_N,3))
      local_force      = np.insert(local_force     ,xxed + 1,r2,axis=1)

    if yyst != 0:
      r2 = np.zeros((yyst,PN,3))
      local_force      = np.insert(local_force     ,0,r2,axis=0)
    if yyed+1 != PN:
      r2 = np.zeros((PN - yyed - 1,PN,3))
      local_force      = np.insert(local_force     ,yyed + 1,r2,axis=0)

    del r2

    if xn_l != yn_l:
      sum_f = sum_f + local_force
      sum_f = sum_f - local_force.transpose(1,0,2)
    else:
      sum_f = sum_f + local_force

    del local_force
    gc.collect()

  #print (sum_f)

  print("=====================================")
  mmm = sum_f.sum(axis=1)
  xyzF = xyzF + mmm


def calc_XY_start_end(Xn, Yn, quotient, remainder, divide_N, extra):
  global PN
  global xyzP

  if Xn != quotient-1 and remainder != 0 or remainder == 0:
    X_st = Xn*divide_N
    X_ed = (Xn+1)*divide_N-1
  else:
    X_st = Xn*divide_N - (divide_N - remainder)
    X_ed = (Xn+1)*divide_N  - (divide_N - remainder) -1
  if Yn != quotient-1 and remainder != 0 or remainder == 0:
    Y_st = Yn*divide_N
    Y_ed = (Yn+1)*divide_N-1
  else:
    Y_st = Yn*divide_N - (divide_N - remainder)
    Y_ed = (Yn+1)*divide_N  -(divide_N - remainder) -1

  Xa = np.arange(X_st,X_ed+1)
  Ya = np.arange(Y_st,Y_ed+1)
  mx1,my1 = np.meshgrid(Xa,Ya)
  local_xyz  = xyzP[mx1]-xyzP[my1]
  distance_a = np.linalg.norm(local_xyz, axis=2)

  tmp_d = distance_a[:,:,np.newaxis]
  tmp_f = local_xyz/tmp_d
  tmp_f = np.nan_to_num(tmp_f,0)

  #extra clear
  if remainder != 0:
    if Xn + 1 == quotient:
      for dd in range(0,extra):
        tmp_f[:,dd] = 0
    if Yn + 1 == quotient:
      for dd in range(0,extra):
        tmp_f[[dd]] = 0

  return Xn,Yn,X_st,X_ed,Y_st,Y_ed,tmp_f



def init_lattice():
  global xyzF
  global xyzP

  pnum = 0
  while pnum < PN:
    xyzP[pnum][XX] = random.uniform(-1,1)
    xyzP[pnum][YY] = random.uniform(-1,1)
    xyzP[pnum][ZZ] = random.uniform(-1,1)
    xyzF[pnum][XX] = random.uniform(-1,1)
    xyzF[pnum][YY] = random.uniform(-1,1)
    xyzF[pnum][ZZ] = random.uniform(-1,1)
    pnum += 1


def results_sum():
  global xyzF
  pnum = 0
  total_F = 0
  while pnum < PN:
    total_F = total_F + xyzF[pnum][XX]
    total_F = total_F + xyzF[pnum][YY]
    total_F = total_F + xyzF[pnum][ZZ]
    pnum += 1

  print (total_F)


if __name__ == "__main__":

  init_lattice()
  find_pair()
  results_sum()

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

import random

import math

import time

import gc

import numpy as np

from multiprocessing import Pool

np.set_printoptions(threshold=np.inf)

random.seed(1)

XX = 0

YY = 1

ZZ = 2

#total particle num

PN = 1200

#---------------------------------------------------

# parameter : divide_N

# The number of particles per side of a small square

# divided for the splitting process when creating a

# large PN^2 region with all particles PN.

# The smaller the setting, the more it will be divided

# into many smaller jobs.

#---------------------------------------------------

divide_N = 50

core = 2

xyzV = [[0 for i in range(3)] for j in range(PN)]

xyzP = np.zeros((PN,3))

xyzF = np.zeros((PN,3))

def wrapper(args):

return calc_XY_start_end(*args)

def find_pair():

global PN

global xyzF

global xyzP

worklist = []

thread = 0

remainder = PN % divide_N

quotient = PN // divide_N

if remainder != 0: #ex PN=11, divide_N=4, remainder=3, quotient=2

quotient = quotient + 1 # 2 -> 3

extra = divide_N - remainder # 1

X_st = 0

X_ed = 0

Y_st = 0

Y_ed = 0

Thead_num = 0

sum_f = np.zeros((PN,PN,3))

for Xn in range(0,quotient):

for Yn in range(0,Xn+1):

worklist.append([Xn, Yn, quotient, remainder, divide_N, extra])

Thead_num = Thead_num + 1

#----------------------------------------

#local_force = calc_XY_start_end(Xn, Yn, quotient, remainder, divide_N, extra)

#----------------------------------------

p = Pool(core)

#start thread. results is callback in array.

callback = p.map(wrapper, worklist)

p.close()

for Th in range(0,Thead_num):

xn_l = callback[Th][0]

yn_l = callback[Th][1]

xxst = callback[Th][2]

xxed = callback[Th][3]

yyst = callback[Th][4]

yyed = callback[Th][5]

local_force = callback[Th][6]

if xxst != 0:

r2 = np.zeros((xxst,divide_N,3))

local_force = np.insert(local_force ,0,r2,axis=1)

if xxed+1 != PN:

r2 = np.zeros((PN - xxed - 1,divide_N,3))

local_force = np.insert(local_force ,xxed + 1,r2,axis=1)

if yyst != 0:

r2 = np.zeros((yyst,PN,3))

local_force = np.insert(local_force ,0,r2,axis=0)

if yyed+1 != PN:

r2 = np.zeros((PN - yyed - 1,PN,3))

local_force = np.insert(local_force ,yyed + 1,r2,axis=0)

del r2

if xn_l != yn_l:

sum_f = sum_f + local_force

sum_f = sum_f - local_force.transpose(1,0,2)

else:

sum_f = sum_f + local_force

del local_force

gc.collect()

#print (sum_f)

print("=====================================")

mmm = sum_f.sum(axis=1)

xyzF = xyzF + mmm

def calc_XY_start_end(Xn, Yn, quotient, remainder, divide_N, extra):

global PN

global xyzP

if Xn != quotient-1 and remainder != 0 or remainder == 0:

X_st = Xn*divide_N

X_ed = (Xn+1)*divide_N-1

else:

X_st = Xn*divide_N - (divide_N - remainder)

X_ed = (Xn+1)*divide_N - (divide_N - remainder) -1

if Yn != quotient-1 and remainder != 0 or remainder == 0:

Y_st = Yn*divide_N

Y_ed = (Yn+1)*divide_N-1

else:

Y_st = Yn*divide_N - (divide_N - remainder)

Y_ed = (Yn+1)*divide_N -(divide_N - remainder) -1

Xa = np.arange(X_st,X_ed+1)

Ya = np.arange(Y_st,Y_ed+1)

mx1,my1 = np.meshgrid(Xa,Ya)

local_xyz = xyzP[mx1]-xyzP[my1]

distance_a = np.linalg.norm(local_xyz, axis=2)

tmp_d = distance_a[:,:,np.newaxis]

tmp_f = local_xyz/tmp_d

tmp_f = np.nan_to_num(tmp_f,0)

#extra clear

if remainder != 0:

if Xn + 1 == quotient:

for dd in range(0,extra):

tmp_f[:,dd] = 0

if Yn + 1 == quotient:

for dd in range(0,extra):

tmp_f[[dd]] = 0

return Xn,Yn,X_st,X_ed,Y_st,Y_ed,tmp_f

def init_lattice():

global xyzF

global xyzP

pnum = 0

while pnum < PN:

xyzP[pnum][XX] = random.uniform(-1,1)

xyzP[pnum][YY] = random.uniform(-1,1)

xyzP[pnum][ZZ] = random.uniform(-1,1)

xyzF[pnum][XX] = random.uniform(-1,1)

xyzF[pnum][YY] = random.uniform(-1,1)

xyzF[pnum][ZZ] = random.uniform(-1,1)

pnum += 1

def results_sum():

global xyzF

pnum = 0

total_F = 0

while pnum < PN:

total_F = total_F + xyzF[pnum][XX]

total_F = total_F + xyzF[pnum][YY]

total_F = total_F + xyzF[pnum][ZZ]

pnum += 1

print (total_F)

if __name__ == "__main__":

init_lattice()

find_pair()

results_sum()

コメントを残す コメントをキャンセル

コメントを残すコメントをキャンセル