pythonのmultiprocessingを使いクーロン力の並列計算をするテスト

前回クーロン力計算部分の高速化を検討した。結果としてはシンプルに2重ループを使った計算。今回はこのクーロン力計算部分をmultiprocessingを使ったマルチプロセスによる並列化で高速化を試してみる。

作成したコードが下記。結果は2.09[sec]。使用coreは4スレッド仕様なので、4プロセスでの処理を行った。前回の処理時間が約8秒であったのを考えると、理想どおり4分の1になる結果が得られた。

###########################
# multiprocessing test ####
###########################
import random
import math
import time
import scipy.special as scm
from multiprocessing import Pool

random.seed(1)

PX = 0;PY = 1;PZ = 2;
VX = 3;VY = 4;VZ = 5;
FX = 6;FY = 7;FZ = 8;

#number of particles in a line
line_num = 15

#total particle num
PN = line_num * line_num * line_num

#ready to 9 parameters for particle 
#(PX, PY, PZ, VX, VY, VZ, FX, FY, FZ)
xyz = [[0 for i in range(9)] for j in range(PN)]

#Number of combinations of coulomb force calculation
combinum = int(scm.comb(PN, 2))

#thread number(local thread num)
core = 4

def find_pair_sub(prep,pend,thread):
  global xyz

  #local results array
  xyzF = [[0 for i in range(3)] for j in range(PN)]
  fx = 0; fy = 1; fz = 2

  for i in range(prep,pend):
    for j in range(i + 1, PN):
      dx = xyz[i][PX] - xyz[j][PX]
      dy = xyz[i][PY] - xyz[j][PY]
      dz = xyz[i][PZ] - xyz[j][PZ]
      r  = math.sqrt(dx*dx + dy*dy + dz*dz)

      xyzF[i][fx] = xyzF[i][fx] + dx/(r*r*r)
      xyzF[i][fy] = xyzF[i][fy] + dy/(r*r*r)
      xyzF[i][fz] = xyzF[i][fz] + dz/(r*r*r)
      xyzF[j][fx] = xyzF[j][fx] - dx/(r*r*r)
      xyzF[j][fy] = xyzF[j][fy] - dy/(r*r*r)
      xyzF[j][fz] = xyzF[j][fz] - dz/(r*r*r)

  return xyzF

def wrapper(args):
  return find_pair_sub(*args)


def find_pair():
  global PN
  global combinum

  pw = combinum // core
  pl = combinum % core

  localt = 0
  thread = 0
  pre = 0
  #each thread work list
  worklist = []
  ppp = pw

  for i in range(PN) :
    if core == 1:
      worklist.append([pre,PN,thread])
      break

    localt = localt + (PN - i - 1)
    if localt >= ppp:
      worklist.append([pre,i,thread])
      ppp += pw
      thread += 1
      pre = i

  if i != pre:
    prep = worklist[thread-1][0]
    worklist[thread-1] = [prep,PN,thread-1]

  #make thread core num
  p = Pool(core)

  #start thread. results is callback in array.
  callback = p.map(wrapper, worklist)
  p.close()

  #summation each thread results
  for j in range(core):
    for i in range(PN):
      xyz[i][FX] += callback[j][i][0]
      xyz[i][FY] += callback[j][i][1]
      xyz[i][FZ] += callback[j][i][2]

def init_lattice():
  global xyz

  pnum = 0
  while pnum < PN:
    xyz[pnum][PX] = random.uniform(-1,1)
    xyz[pnum][PY] = random.uniform(-1,1)
    xyz[pnum][PZ] = random.uniform(-1,1)
    xyz[pnum][FX] = random.uniform(-1,1)
    xyz[pnum][FY] = random.uniform(-1,1)
    xyz[pnum][FZ] = random.uniform(-1,1)
    pnum += 1

if __name__ == "__main__":
  init_lattice()
  find_pair()

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

###########################

# multiprocessing test ####

###########################

import random

import math

import time

import scipy.special as scm

from multiprocessing import Pool

random.seed(1)

PX = 0;PY = 1;PZ = 2;

VX = 3;VY = 4;VZ = 5;

FX = 6;FY = 7;FZ = 8;

#number of particles in a line

line_num = 15

#total particle num

PN = line_num * line_num * line_num

#ready to 9 parameters for particle

#(PX, PY, PZ, VX, VY, VZ, FX, FY, FZ)

xyz = [[0 for i in range(9)] for j in range(PN)]

#Number of combinations of coulomb force calculation

combinum = int(scm.comb(PN, 2))

#thread number(local thread num)

core = 4

def find_pair_sub(prep,pend,thread):

global xyz

#local results array

xyzF = [[0 for i in range(3)] for j in range(PN)]

fx = 0; fy = 1; fz = 2

for i in range(prep,pend):

for j in range(i + 1, PN):

dx = xyz[i][PX] - xyz[j][PX]

dy = xyz[i][PY] - xyz[j][PY]

dz = xyz[i][PZ] - xyz[j][PZ]

r = math.sqrt(dx*dx + dy*dy + dz*dz)

xyzF[i][fx] = xyzF[i][fx] + dx/(r*r*r)

xyzF[i][fy] = xyzF[i][fy] + dy/(r*r*r)

xyzF[i][fz] = xyzF[i][fz] + dz/(r*r*r)

xyzF[j][fx] = xyzF[j][fx] - dx/(r*r*r)

xyzF[j][fy] = xyzF[j][fy] - dy/(r*r*r)

xyzF[j][fz] = xyzF[j][fz] - dz/(r*r*r)

return xyzF

def wrapper(args):

return find_pair_sub(*args)

def find_pair():

global PN

global combinum

pw = combinum // core

pl = combinum % core

localt = 0

thread = 0

pre = 0

#each thread work list

worklist = []

ppp = pw

for i in range(PN) :

if core == 1:

worklist.append([pre,PN,thread])

break

localt = localt + (PN - i - 1)

if localt >= ppp:

worklist.append([pre,i,thread])

ppp += pw

thread += 1

pre = i

if i != pre:

prep = worklist[thread-1][0]

worklist[thread-1] = [prep,PN,thread-1]

#make thread core num

p = Pool(core)

#start thread. results is callback in array.

callback = p.map(wrapper, worklist)

p.close()

#summation each thread results

for j in range(core):

for i in range(PN):

xyz[i][FX] += callback[j][i][0]

xyz[i][FY] += callback[j][i][1]

xyz[i][FZ] += callback[j][i][2]

def init_lattice():

global xyz

pnum = 0

while pnum < PN:

xyz[pnum][PX] = random.uniform(-1,1)

xyz[pnum][PY] = random.uniform(-1,1)

xyz[pnum][PZ] = random.uniform(-1,1)

xyz[pnum][FX] = random.uniform(-1,1)

xyz[pnum][FY] = random.uniform(-1,1)

xyz[pnum][FZ] = random.uniform(-1,1)

pnum += 1

if __name__ == "__main__":

init_lattice()

find_pair()

■結果　[Results summary]

code type	時間[sec]
multiprocessing (4core)	2.09 <-(new)
itertools使用 (no1)	8.18
range記述 (no2)	7.93
xrange記述 (no3)	7.89
ループ内周でnumpy使用 (no4)	78.46

We examined the speedup of the previous Coulomb force calculation part. As a result, more fast results calculation was using a simple double loop. In this time, I will try speeding up by multiprocessing parallel processing of this Coulomb force calculation part.

The code you created is below. The result is 2.09 [sec]. Since core used is 4 thread specification, processing in 4 processes was done. Considering that the last processing time was about 8 seconds, the result was 1/4 as ideal.

コメントを残す コメントをキャンセル

コメントを残すコメントをキャンセル