Python in the box

2月 16, 20182月 16, 2018

numpyとopenCVを使った画像のフーリエ変換と逆変換

openCVを使い画像読み込み、fftで周波数データに変換。その後逆変換で元の画像に戻すテスト。

入力に使用する画像は↓。サイズは360×240。
input data to fft

まず画像入力、グレースケールで取り込む。

###########
# code 1
###########
import cv2
from PIL import Image

image = cv2.imread("mt_fuji.jpg", cv2.IMREAD_GRAYSCALE)
cv2.imshow("original",image)
cv2.waitKey(0)
cv2.destroyWindow()

###########

# code 1

###########

import cv2

from PIL import Image

image = cv2.imread("mt_fuji.jpg", cv2.IMREAD_GRAYSCALE)

cv2.imshow("original",image)

cv2.waitKey(0)

cv2.destroyWindow()

実行すると元の画像がグレースケールで表示される。
imreadは引数2つ、1つ目は読み込みファイル、2つ目は読み込みオプションで以下3つ(1,0,-1でも指定可)。

　　cv2.IMREAD_COLOR (or 1)
　　cv2.IMREAD_GRAYSCALE (or 0)
　　cv2.IMREAD_UNCHANGED (or -1)

続けてFFT実施

###########
# code 2
###########
import cv2
import numpy as np
from PIL import Image

image = cv2.imread("mt_fuji.jpg", cv2.IMREAD_GRAYSCALE)
#cv2.imshow("original",image)
#cv2.waitKey(0)
#cv2.destroyWindow()

fimage = np.fft.fft2(image)
print fimage
print fimage.shape

###########

# code 2

###########

import cv2

import numpy as np

from PIL import Image

image = cv2.imread("mt_fuji.jpg", cv2.IMREAD_GRAYSCALE)

#cv2.imshow("original",image)

#cv2.waitKey(0)

#cv2.destroyWindow()

fimage = np.fft.fft2(image)

print fimage

print fimage.shape

np.fft.fft2()は2次元FFT。
実行結果が下記。入力が360×240画像だったので、360×240のnumpy arrayで返ってくる。

[[ 1.20523050e+07      +0.        j -3.67019062e+05 +764482.77644721j
   8.97803335e+04 +528353.446206  j ... -8.56635235e+04 -151789.8195801 j
   8.97803335e+04 -528353.446206  j -3.67019062e+05 -764482.77644721j]
 [-5.06881547e+05-1884930.25115523j -4.50139049e+04 -190712.10861819j
  -1.76090294e+05 -185634.30113876j ...  8.13464047e+04  +33421.45807695j
   1.54174557e+05 +151124.32767313j  2.22512389e+05 +112510.91249437j]
 [-2.70716797e+05 +598066.58169905j  2.74010196e+05  +76140.89477773j
   3.81093895e+04 -142669.2043982 j ...  7.45069646e+04  -13227.88422994j
   2.39614427e+03 -107962.49955725j -1.21014694e+05  +16070.44342708j]
 ...
 [-4.93325081e+05+1019497.68318583j  1.10749640e+05 -185659.62071232j
  -2.02180759e+04 -244150.48215119j ... -7.68268395e+03   +7667.31543078j
   1.36815674e+05   -2094.526859  j  4.18478921e+04  -76427.43943856j]
 [-2.70716797e+05 -598066.58169905j -1.21014694e+05  -16070.44342708j
   2.39614427e+03 +107962.49955725j ... -5.75415408e+02  +50912.11759434j
   3.81093895e+04 +142669.2043982 j  2.74010196e+05  -76140.89477773j]
 [-5.06881547e+05+1884930.25115523j  2.22512389e+05 -112510.91249437j
   1.54174557e+05 -151124.32767313j ... -4.22761652e+04  +70475.447572  j
  -1.76090294e+05 +185634.30113877j -4.50139049e+04 +190712.10861819j]]
(240, 360)

[[ 1.20523050e+07 +0. j -3.67019062e+05 +764482.77644721j

8.97803335e+04 +528353.446206 j ... -8.56635235e+04 -151789.8195801 j

8.97803335e+04 -528353.446206 j -3.67019062e+05 -764482.77644721j]

[-5.06881547e+05-1884930.25115523j -4.50139049e+04 -190712.10861819j

-1.76090294e+05 -185634.30113876j ... 8.13464047e+04 +33421.45807695j

1.54174557e+05 +151124.32767313j 2.22512389e+05 +112510.91249437j]

[-2.70716797e+05 +598066.58169905j 2.74010196e+05 +76140.89477773j

3.81093895e+04 -142669.2043982 j ... 7.45069646e+04 -13227.88422994j

2.39614427e+03 -107962.49955725j -1.21014694e+05 +16070.44342708j]

...

[-4.93325081e+05+1019497.68318583j 1.10749640e+05 -185659.62071232j

-2.02180759e+04 -244150.48215119j ... -7.68268395e+03 +7667.31543078j

1.36815674e+05 -2094.526859 j 4.18478921e+04 -76427.43943856j]

[-2.70716797e+05 -598066.58169905j -1.21014694e+05 -16070.44342708j

2.39614427e+03 +107962.49955725j ... -5.75415408e+02 +50912.11759434j

3.81093895e+04 +142669.2043982 j 2.74010196e+05 -76140.89477773j]

[-5.06881547e+05+1884930.25115523j 2.22512389e+05 -112510.91249437j

1.54174557e+05 -151124.32767313j ... -4.22761652e+04 +70475.447572 j

-1.76090294e+05 +185634.30113877j -4.50139049e+04 +190712.10861819j]]

(240, 360)

変換結果をパワースペクトルで確認してみる。確認に使用したコードが下記。

###########
# code 3
###########
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

image = cv2.imread("mt_fuji.jpg", cv2.IMREAD_GRAYSCALE)
#cv2.imshow("original",image)
#cv2.waitKey(0)
#cv2.destroyWindow()

fimage = np.fft.fft2(image)
#print fimage
#print fimage.shape

# Replace quadrant
# 1st <-> 3rd, 2nd <-> 4th
fimg =  np.fft.fftshift(fimage)
# Power spectrum calculation
mag = 20*np.log(np.abs(fimg))
plt.subplot(121)
plt.imshow(image,cmap = 'gray')
plt.subplot(122)
plt.imshow(mag,cmap = 'gray')
plt.show()

###########

# code 3

###########

import cv2

import numpy as np

import matplotlib.pyplot as plt

from PIL import Image

image = cv2.imread("mt_fuji.jpg", cv2.IMREAD_GRAYSCALE)

#cv2.imshow("original",image)

#cv2.waitKey(0)

#cv2.destroyWindow()

fimage = np.fft.fft2(image)

#print fimage

#print fimage.shape

# Replace quadrant

# 1st <-> 3rd, 2nd <-> 4th

fimg = np.fft.fftshift(fimage)

# Power spectrum calculation

mag = 20*np.log(np.abs(fimg))

plt.subplot(121)

plt.imshow(image,cmap = 'gray')

plt.subplot(122)

plt.imshow(mag,cmap = 'gray')

plt.show()

実行結果下記
input data to fft

最後に逆変換で元の画像に戻すコード。注意として変換後の結果には複素数成分が含まれているので、実部を取り出す処理が必要。またグレースケールで表示するために0-255階調への値調整が必要になる。

###########
# code 4
###########
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

image = cv2.imread("mt_fuji.jpg", cv2.IMREAD_GRAYSCALE)
#cv2.imshow("original",image)
#cv2.waitKey(0)
#cv2.destroyWindow()

fimage = np.fft.fft2(image)
#print fimage
#print fimage.shape

ifimage = np.fft.ifft2(fimage)
# Extract real part
ifimage = ifimage.real
# Convert to 255 tones for gray scale
ifimage = np.uint8(ifimage)

cv2.imshow("fft and ifft",ifimage)
cv2.waitKey(0)
cv2.destroyWindow()

###########

# code 4

###########

import cv2

import numpy as np

import matplotlib.pyplot as plt

from PIL import Image

image = cv2.imread("mt_fuji.jpg", cv2.IMREAD_GRAYSCALE)

#cv2.imshow("original",image)

#cv2.waitKey(0)

#cv2.destroyWindow()

fimage = np.fft.fft2(image)

#print fimage

#print fimage.shape

ifimage = np.fft.ifft2(fimage)

# Extract real part

ifimage = ifimage.real

# Convert to 255 tones for gray scale

ifimage = np.uint8(ifimage)

cv2.imshow("fft and ifft",ifimage)

cv2.waitKey(0)

cv2.destroyWindow()

実行結果が下記グレースケールでもとの画像に戻ってきた。
results fft and inv fft

Read images using openCV, convert to frequency data with fft. And then back to the original image with reverse transformation.
Code 1 is reading image by gray scale.
Code 2 is 2D fft by numpy.　Frequency distribution is returned.
Code 3 is checking Power spectrum.
Code 4 is invers Fourie by numpy.

2月 10, 20182月 10, 2018

pythonのthreadingを使いクーロン力の並列計算をするテスト

以前行った、multiprocessingでのクーロン力並列計算に引き続き、threadingを使ったクーロン力の並列計算をテストする。計算条件はmultiprocessingの時と同じくcore数を4、計算粒子数を15^3個とした。

結果、計算時間は14.36[sec]。multiprocessingでの計算は2.09[sec]であったため、大分時間がかかるというかシングルスレッドで計算していた時より時間がかかっている。threadingでは各threadがメモリを共有するために必要な情報を取得するためメモリアクセスする際に、排他ロック(GIL)が起きているのかな、と考えられる。試しにthread数を1にして実行してみると処理時間は6.92[sec]。thread化しない方が高速であった。

■結果　[Results summary]

code type	時間[sec]
threading (4threads)	14.36 <-(new!)
threading (1thread)	6.92 <-(new!)
multiprocessing	2.09
itertools使用 (no1)	8.18
range記述 (no2)	7.93
xrange記述 (no3)	7.89
ループ内周でnumpy使用 (no4)	78.46

■使用したコードは下記(use 4 threads)

###########################
# threading test ##########
###########################
import random
import math
import itertools
import time
import scipy.misc as scm
import numpy as np
import threading
from Queue import Queue

random.seed(1)

PX = 0;PY = 1;PZ = 2;
VX = 3;VY = 4;VZ = 5;
FX = 6;FY = 7;FZ = 8;

#number of particles in a line
line_num = 15

#total particle num
PN = line_num * line_num * line_num

#ready to 9 parameters for particle 
#(PX, PY, PZ, VX, VY, VZ, FX, FY, FZ)
xyz = [[0 for i in range(9)] for j in range(PN)]

#Number of combinations of coulomb force calculation
combinum = int(scm.comb(PN, 2))

#thread number(local thread num)
core = 20

def find_pair_sub(prep,pend,thread,q):
  global xyz

  #local results array
  xyzF = [[0 for i in range(3)] for j in range(PN)]
  fx = 0; fy = 1; fz = 2

  for i in xrange(prep,pend):
    for j in xrange(i + 1, PN):
      dx = xyz[i][PX] - xyz[j][PX]
      dy = xyz[i][PY] - xyz[j][PY]
      dz = xyz[i][PZ] - xyz[j][PZ]
      r  = math.sqrt(dx*dx + dy*dy + dz*dz)

      xyzF[i][fx] = xyzF[i][fx] + dx/(r*r*r)
      xyzF[i][fy] = xyzF[i][fy] + dy/(r*r*r)
      xyzF[i][fz] = xyzF[i][fz] + dz/(r*r*r)
      xyzF[j][fx] = xyzF[j][fx] - dx/(r*r*r)
      xyzF[j][fy] = xyzF[j][fy] - dy/(r*r*r)
      xyzF[j][fz] = xyzF[j][fz] - dz/(r*r*r)

  q.put(xyzF)

def find_pair():
  global PN
  global combinum

  q = Queue()
  pw = combinum // core
  pl = combinum % core

  localt = 0
  thread = 0
  pre = 0
  worklist = []
  ppp = pw

  for i in range(PN) :

    if core == 1:
      worklist.append([pre,PN,thread])
      break

    localt = localt + (PN - i - 1)
    if localt >= ppp:
      worklist.append([pre,i,thread])
      ppp += pw
      thread += 1
      pre = i

  if i != pre:
    prep = worklist[thread-1][0]
    worklist[thread-1] = [prep,PN,thread-1]

  results = []
  for i in range(core):
    thread = threading.Thread(target=find_pair_sub, args=(worklist[i][0],worklist[i][1],worklist[i][2],q))
    thread.start()

  thread_list = threading.enumerate()
  main_thread = threading.currentThread()
  thread_list.remove(main_thread)
  for thread in thread_list:
    thread.join()
    results.append(q.get())

  for j in range(core):
    for i in range(PN):
      xyz[i][FX] += results[j][i][0]
      xyz[i][FY] += results[j][i][1]
      xyz[i][FZ] += results[j][i][2]

def init_lattice():
  global xyz

  pnum = 0
  while pnum < PN:
    xyz[pnum][PX] = random.uniform(-1,1)
    xyz[pnum][PY] = random.uniform(-1,1)
    xyz[pnum][PZ] = random.uniform(-1,1)
    xyz[pnum][FX] = random.uniform(-1,1)
    xyz[pnum][FY] = random.uniform(-1,1)
    xyz[pnum][FZ] = random.uniform(-1,1)
    pnum += 1

if __name__ == "__main__":
  init_lattice()
  find_pair()

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

###########################

# threading test ##########

###########################

import random

import math

import itertools

import time

import scipy.misc as scm

import numpy as np

import threading

from Queue import Queue

random.seed(1)

PX = 0;PY = 1;PZ = 2;

VX = 3;VY = 4;VZ = 5;

FX = 6;FY = 7;FZ = 8;

#number of particles in a line

line_num = 15

#total particle num

PN = line_num * line_num * line_num

#ready to 9 parameters for particle

#(PX, PY, PZ, VX, VY, VZ, FX, FY, FZ)

xyz = [[0 for i in range(9)] for j in range(PN)]

#Number of combinations of coulomb force calculation

combinum = int(scm.comb(PN, 2))

#thread number(local thread num)

core = 20

def find_pair_sub(prep,pend,thread,q):

global xyz

#local results array

xyzF = [[0 for i in range(3)] for j in range(PN)]

fx = 0; fy = 1; fz = 2

for i in xrange(prep,pend):

for j in xrange(i + 1, PN):

dx = xyz[i][PX] - xyz[j][PX]

dy = xyz[i][PY] - xyz[j][PY]

dz = xyz[i][PZ] - xyz[j][PZ]

r = math.sqrt(dx*dx + dy*dy + dz*dz)

xyzF[i][fx] = xyzF[i][fx] + dx/(r*r*r)

xyzF[i][fy] = xyzF[i][fy] + dy/(r*r*r)

xyzF[i][fz] = xyzF[i][fz] + dz/(r*r*r)

xyzF[j][fx] = xyzF[j][fx] - dx/(r*r*r)

xyzF[j][fy] = xyzF[j][fy] - dy/(r*r*r)

xyzF[j][fz] = xyzF[j][fz] - dz/(r*r*r)

q.put(xyzF)

def find_pair():

global PN

global combinum

q = Queue()

pw = combinum // core

pl = combinum % core

localt = 0

thread = 0

pre = 0

worklist = []

ppp = pw

for i in range(PN) :

if core == 1:

worklist.append([pre,PN,thread])

break

localt = localt + (PN - i - 1)

if localt >= ppp:

worklist.append([pre,i,thread])

ppp += pw

thread += 1

pre = i

if i != pre:

prep = worklist[thread-1][0]

worklist[thread-1] = [prep,PN,thread-1]

results = []

for i in range(core):

thread = threading.Thread(target=find_pair_sub, args=(worklist[i][0],worklist[i][1],worklist[i][2],q))

thread.start()

thread_list = threading.enumerate()

main_thread = threading.currentThread()

thread_list.remove(main_thread)

for thread in thread_list:

thread.join()

results.append(q.get())

for j in range(core):

for i in range(PN):

xyz[i][FX] += results[j][i][0]

xyz[i][FY] += results[j][i][1]

xyz[i][FZ] += results[j][i][2]

def init_lattice():

global xyz

pnum = 0

while pnum < PN:

xyz[pnum][PX] = random.uniform(-1,1)

xyz[pnum][PY] = random.uniform(-1,1)

xyz[pnum][PZ] = random.uniform(-1,1)

xyz[pnum][FX] = random.uniform(-1,1)

xyz[pnum][FY] = random.uniform(-1,1)

xyz[pnum][FZ] = random.uniform(-1,1)

pnum += 1

if __name__ == "__main__":

init_lattice()

find_pair()

Following previous parallel computation of Coulomb force in multiprocessing, we test parallel computation of Coulomb force using threading. The same condition as before the multiprocessing calculation condition, the core number was set to 4 and the number of calculated particles was set to 15 ^ 3.

As a result, the calculation time is 14.36 [sec]. The computation with multiprocessing was 2.09 [sec], so it took a long time or it took more time than when computing with single thread. In threading, it is considered that an exclusive lock (GIL) is occurring when memory access is performed in order to acquire necessary information for each thread to share memory. If we try to run with thread number 1, the processing time is 6.92 [sec]. It was faster to not thread.

2月 9, 20182月 9, 2018

マルチスレッドのテスト

スレッドを発行して並列化するテスト。スレッドの開始はstart(),終了待ちはjoin()で行う。すべてのスレッドの終了をチェックするためには、現在発行中の全スレッドをリストアップするenumerate()を使う。ただしこのリストの中には、メインスレッドも含まれているので、メインスレッドは抜く必要がある。メインスレッドはcurrentThread()であらわせるので、これをremoveで取り除く。取り除いた結果に対して、join()で終了待ちし、全スレッドが終了したら終了メッセージを出す。各スレッドからの返り値はQueueを使って貯めることができる。貯めたいqueueを引数に渡してやることで、スレッド終了後に全スレッドの結果を読み出すことができる。

import threading, time
import random
from Queue import Queue

def ppp(a, q):
    c = random.uniform(0,5)
    print "thread = ",a," rand = ",c
    time.sleep(c)
    q.put(c)
    print "end thread = ",a

if __name__ == "__main__":
  q = Queue()
  results = []
  for i in range(8):
    thread = threading.Thread(target=ppp, args=(i, q))  # Initialize
    thread.start() # Start


  thread_list = threading.enumerate()
  main_thread = threading.currentThread()
  thread_list.remove(main_thread)
  for thread in thread_list:
    thread.join()
    results.append(q.get())

  print results
  print "end"

import threading, time

import random

from Queue import Queue

def ppp(a, q):

c = random.uniform(0,5)

print "thread = ",a," rand = ",c

time.sleep(c)

q.put(c)

print "end thread = ",a

if __name__ == "__main__":

q = Queue()

results = []

for i in range(8):

thread = threading.Thread(target=ppp, args=(i, q)) # Initialize

thread.start() # Start

thread_list = threading.enumerate()

main_thread = threading.currentThread()

thread_list.remove(main_thread)

for thread in thread_list:

thread.join()

results.append(q.get())

print results

print "end"

■実行結果
thread = 0 rand = 1.97370804812
thread = 1 rand = 3.45525477664
thread = 2 rand = 0.651252139145
thread = 3 rand = 3.96551177159
thread = 4 rand = 3.57845628232
thread = 5 rand = 1.81758678574
thread = 6 rand = 2.31490107118
thread = 7 rand = 1.21471678276
end thread = 2
end thread = 7
end thread = 5
end thread = 0
end thread = 6
end thread = 1
end thread = 4
end thread = 3
[0.6512521391445797, 1.2147167827645151, 1.817586785739616, 1.9737080481200742, 2.3149010711817626, 3.4552547766386787, 3.5784562823201704, 3.9655117715879653]
end

A test that issues threads and parallelizes them. The start of the thread is done with start (), and the wait for completion is done with join (). To check the termination of all threads, use enumerate () which lists all the threads that are currently issuing. However, since the main thread is also included in this list, it is necessary to withdraw the main thread. The main thread can be set with currentThread (), so remove it with remove. Wait for join () to finish removing, and issue a termination message when all threads are finished. The return value from each thread can be stored using Queue. By passing the queue you want to save as an argument, you can read the results of all threads after the thread ends.

1月 30, 20181月 15, 2022

分子動力学を使用したイオントラップシミュレーションとOpenGLによる可視化(multiprocessingによる処理並列化版)

1月 29, 20189月 25, 2022

pythonのmultiprocessingを使いクーロン力の並列計算をするテスト

前回クーロン力計算部分の高速化を検討した。結果としてはシンプルに2重ループを使った計算。今回はこのクーロン力計算部分をmultiprocessingを使ったマルチプロセスによる並列化で高速化を試してみる。

作成したコードが下記。結果は2.09[sec]。使用coreは4スレッド仕様なので、4プロセスでの処理を行った。前回の処理時間が約8秒であったのを考えると、理想どおり4分の1になる結果が得られた。

###########################
# multiprocessing test ####
###########################
import random
import math
import time
import scipy.special as scm
from multiprocessing import Pool

random.seed(1)

PX = 0;PY = 1;PZ = 2;
VX = 3;VY = 4;VZ = 5;
FX = 6;FY = 7;FZ = 8;

#number of particles in a line
line_num = 15

#total particle num
PN = line_num * line_num * line_num

#ready to 9 parameters for particle 
#(PX, PY, PZ, VX, VY, VZ, FX, FY, FZ)
xyz = [[0 for i in range(9)] for j in range(PN)]

#Number of combinations of coulomb force calculation
combinum = int(scm.comb(PN, 2))

#thread number(local thread num)
core = 4

def find_pair_sub(prep,pend,thread):
  global xyz

  #local results array
  xyzF = [[0 for i in range(3)] for j in range(PN)]
  fx = 0; fy = 1; fz = 2

  for i in range(prep,pend):
    for j in range(i + 1, PN):
      dx = xyz[i][PX] - xyz[j][PX]
      dy = xyz[i][PY] - xyz[j][PY]
      dz = xyz[i][PZ] - xyz[j][PZ]
      r  = math.sqrt(dx*dx + dy*dy + dz*dz)

      xyzF[i][fx] = xyzF[i][fx] + dx/(r*r*r)
      xyzF[i][fy] = xyzF[i][fy] + dy/(r*r*r)
      xyzF[i][fz] = xyzF[i][fz] + dz/(r*r*r)
      xyzF[j][fx] = xyzF[j][fx] - dx/(r*r*r)
      xyzF[j][fy] = xyzF[j][fy] - dy/(r*r*r)
      xyzF[j][fz] = xyzF[j][fz] - dz/(r*r*r)

  return xyzF

def wrapper(args):
  return find_pair_sub(*args)


def find_pair():
  global PN
  global combinum

  pw = combinum // core
  pl = combinum % core

  localt = 0
  thread = 0
  pre = 0
  #each thread work list
  worklist = []
  ppp = pw

  for i in range(PN) :
    if core == 1:
      worklist.append([pre,PN,thread])
      break

    localt = localt + (PN - i - 1)
    if localt >= ppp:
      worklist.append([pre,i,thread])
      ppp += pw
      thread += 1
      pre = i

  if i != pre:
    prep = worklist[thread-1][0]
    worklist[thread-1] = [prep,PN,thread-1]

  #make thread core num
  p = Pool(core)

  #start thread. results is callback in array.
  callback = p.map(wrapper, worklist)
  p.close()

  #summation each thread results
  for j in range(core):
    for i in range(PN):
      xyz[i][FX] += callback[j][i][0]
      xyz[i][FY] += callback[j][i][1]
      xyz[i][FZ] += callback[j][i][2]

def init_lattice():
  global xyz

  pnum = 0
  while pnum < PN:
    xyz[pnum][PX] = random.uniform(-1,1)
    xyz[pnum][PY] = random.uniform(-1,1)
    xyz[pnum][PZ] = random.uniform(-1,1)
    xyz[pnum][FX] = random.uniform(-1,1)
    xyz[pnum][FY] = random.uniform(-1,1)
    xyz[pnum][FZ] = random.uniform(-1,1)
    pnum += 1

if __name__ == "__main__":
  init_lattice()
  find_pair()

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

###########################

# multiprocessing test ####

###########################

import random

import math

import time

import scipy.special as scm

from multiprocessing import Pool

random.seed(1)

PX = 0;PY = 1;PZ = 2;

VX = 3;VY = 4;VZ = 5;

FX = 6;FY = 7;FZ = 8;

#number of particles in a line

line_num = 15

#total particle num

PN = line_num * line_num * line_num

#ready to 9 parameters for particle

#(PX, PY, PZ, VX, VY, VZ, FX, FY, FZ)

xyz = [[0 for i in range(9)] for j in range(PN)]

#Number of combinations of coulomb force calculation

combinum = int(scm.comb(PN, 2))

#thread number(local thread num)

core = 4

def find_pair_sub(prep,pend,thread):

global xyz

#local results array

xyzF = [[0 for i in range(3)] for j in range(PN)]

fx = 0; fy = 1; fz = 2

for i in range(prep,pend):

for j in range(i + 1, PN):

dx = xyz[i][PX] - xyz[j][PX]

dy = xyz[i][PY] - xyz[j][PY]

dz = xyz[i][PZ] - xyz[j][PZ]

r = math.sqrt(dx*dx + dy*dy + dz*dz)

xyzF[i][fx] = xyzF[i][fx] + dx/(r*r*r)

xyzF[i][fy] = xyzF[i][fy] + dy/(r*r*r)

xyzF[i][fz] = xyzF[i][fz] + dz/(r*r*r)

xyzF[j][fx] = xyzF[j][fx] - dx/(r*r*r)

xyzF[j][fy] = xyzF[j][fy] - dy/(r*r*r)

xyzF[j][fz] = xyzF[j][fz] - dz/(r*r*r)

return xyzF

def wrapper(args):

return find_pair_sub(*args)

def find_pair():

global PN

global combinum

pw = combinum // core

pl = combinum % core

localt = 0

thread = 0

pre = 0

#each thread work list

worklist = []

ppp = pw

for i in range(PN) :

if core == 1:

worklist.append([pre,PN,thread])

break

localt = localt + (PN - i - 1)

if localt >= ppp:

worklist.append([pre,i,thread])

ppp += pw

thread += 1

pre = i

if i != pre:

prep = worklist[thread-1][0]

worklist[thread-1] = [prep,PN,thread-1]

#make thread core num

p = Pool(core)

#start thread. results is callback in array.

callback = p.map(wrapper, worklist)

p.close()

#summation each thread results

for j in range(core):

for i in range(PN):

xyz[i][FX] += callback[j][i][0]

xyz[i][FY] += callback[j][i][1]

xyz[i][FZ] += callback[j][i][2]

def init_lattice():

global xyz

pnum = 0

while pnum < PN:

xyz[pnum][PX] = random.uniform(-1,1)

xyz[pnum][PY] = random.uniform(-1,1)

xyz[pnum][PZ] = random.uniform(-1,1)

xyz[pnum][FX] = random.uniform(-1,1)

xyz[pnum][FY] = random.uniform(-1,1)

xyz[pnum][FZ] = random.uniform(-1,1)

pnum += 1

if __name__ == "__main__":

init_lattice()

find_pair()

■結果　[Results summary]

code type	時間[sec]
multiprocessing (4core)	2.09 <-(new)
itertools使用 (no1)	8.18
range記述 (no2)	7.93
xrange記述 (no3)	7.89
ループ内周でnumpy使用 (no4)	78.46

We examined the speedup of the previous Coulomb force calculation part. As a result, more fast results calculation was using a simple double loop. In this time, I will try speeding up by multiprocessing parallel processing of this Coulomb force calculation part.

The code you created is below. The result is 2.09 [sec]. Since core used is 4 thread specification, processing in 4 processes was done. Considering that the last processing time was about 8 seconds, the result was 1/4 as ideal.

1月 25, 20181月 28, 2018

クーロン力計算の高速化検討

前回イオントラップシミュレーションにて、分子動力学のopenGLを使った可視化を行った。この処理にかかるほぼすべての時間はクーロン力計算の箇所であり、各粒子同士の計算が必要となる。粒子数をN個とすると、計算量は1ステップあたりNC2回が必要となる。特に今回のイオントラップの様な、閉じた系の中では周期境界条件が存在しないため、Ewald methodの様な高速化の方法も用いることができない。

今回はこのイオントラップにおけるクーロン力計算の高速化を検討してみたいと思う。まず高速化対象のコードだけを抜き出してきたのが下記。

#################
### code no1  ###
#################
import random
import math
import itertools
import time

random.seed(1)

PX = 0;PY = 1;PZ = 2;
VX = 3;VY = 4;VZ = 5;
FX = 6;FY = 7;FZ = 8;

#number of particles in a line
line_num = 15

#total particle num
PN = line_num * line_num * line_num

#ready to 9 parameters for particle 
#(PX, PY, PZ, VX, VY, VZ, FX, FY, FZ)
xyz = [[0 for i in range(9)] for j in range(PN)]

def find_pair():
  global PN
  for element in itertools.combinations(range(PN), 2):
    i  = element[0]
    j  = element[1]
    dx = xyz[i][PX] - xyz[j][PX]
    dy = xyz[i][PY] - xyz[j][PY]
    dz = xyz[i][PZ] - xyz[j][PZ]
    r  = math.sqrt(dx*dx + dy*dy + dz*dz)

    xyz[i][FX] = xyz[i][FX] + dx/(r*r*r)
    xyz[i][FY] = xyz[i][FY] + dy/(r*r*r)
    xyz[i][FZ] = xyz[i][FZ] + dz/(r*r*r)
    xyz[j][FX] = xyz[j][FX] - dx/(r*r*r)
    xyz[j][FY] = xyz[j][FY] - dy/(r*r*r)
    xyz[j][FZ] = xyz[j][FZ] - dz/(r*r*r)

def init_lattice():

  pnum = 0
  while pnum < PN:
    xyz[pnum][PX] = random.uniform(-1,1)
    xyz[pnum][PY] = random.uniform(-1,1)
    xyz[pnum][PZ] = random.uniform(-1,1)
    xyz[pnum][FX] = random.uniform(-1,1)
    xyz[pnum][FY] = random.uniform(-1,1)
    xyz[pnum][FZ] = random.uniform(-1,1)
    pnum += 1


if __name__ == "__main__":
  init_lattice()
  find_pair()

#################

### code no1 ###

#################

import random

import math

import itertools

import time

random.seed(1)

PX = 0;PY = 1;PZ = 2;

VX = 3;VY = 4;VZ = 5;

FX = 6;FY = 7;FZ = 8;

#number of particles in a line

line_num = 15

#total particle num

PN = line_num * line_num * line_num

#ready to 9 parameters for particle

#(PX, PY, PZ, VX, VY, VZ, FX, FY, FZ)

xyz = [[0 for i in range(9)] for j in range(PN)]

def find_pair():

global PN

for element in itertools.combinations(range(PN), 2):

i = element[0]

j = element[1]

dx = xyz[i][PX] - xyz[j][PX]

dy = xyz[i][PY] - xyz[j][PY]

dz = xyz[i][PZ] - xyz[j][PZ]

r = math.sqrt(dx*dx + dy*dy + dz*dz)

xyz[i][FX] = xyz[i][FX] + dx/(r*r*r)

xyz[i][FY] = xyz[i][FY] + dy/(r*r*r)

xyz[i][FZ] = xyz[i][FZ] + dz/(r*r*r)

xyz[j][FX] = xyz[j][FX] - dx/(r*r*r)

xyz[j][FY] = xyz[j][FY] - dy/(r*r*r)

xyz[j][FZ] = xyz[j][FZ] - dz/(r*r*r)

def init_lattice():

pnum = 0

while pnum < PN:

xyz[pnum][PX] = random.uniform(-1,1)

xyz[pnum][PY] = random.uniform(-1,1)

xyz[pnum][PZ] = random.uniform(-1,1)

xyz[pnum][FX] = random.uniform(-1,1)

xyz[pnum][FY] = random.uniform(-1,1)

xyz[pnum][FZ] = random.uniform(-1,1)

pnum += 1

if __name__ == "__main__":

init_lattice()

find_pair()

上記コードの処理時間は手元の環境で約8.18[sec](10回実行した平均)
このコードのfind_pair関数内のitertools箇所を下記の様にシンプルにforループを2重で処理する様書き替えた場合が下記。

#################
### code no2  ###
#################
def find_pair():
  global PN

  for i in range(PN):
    for j in range(i + 1, PN):
      dx = xyz[i][PX] - xyz[j][PX]
      dy = xyz[i][PY] - xyz[j][PY]
      dz = xyz[i][PZ] - xyz[j][PZ]
      r  = math.sqrt(dx*dx + dy*dy + dz*dz)

      xyz[i][FX] = xyz[i][FX] + dx/(r*r*r)
      xyz[i][FY] = xyz[i][FY] + dy/(r*r*r)
      xyz[i][FZ] = xyz[i][FZ] + dz/(r*r*r)
      xyz[j][FX] = xyz[j][FX] - dx/(r*r*r)
      xyz[j][FY] = xyz[j][FY] - dy/(r*r*r)
      xyz[j][FZ] = xyz[j][FZ] - dz/(r*r*r)

#################

### code no2 ###

#################

def find_pair():

global PN

for i in range(PN):

for j in range(i + 1, PN):

dx = xyz[i][PX] - xyz[j][PX]

dy = xyz[i][PY] - xyz[j][PY]

dz = xyz[i][PZ] - xyz[j][PZ]

r = math.sqrt(dx*dx + dy*dy + dz*dz)

xyz[i][FX] = xyz[i][FX] + dx/(r*r*r)

xyz[i][FY] = xyz[i][FY] + dy/(r*r*r)

xyz[i][FZ] = xyz[i][FZ] + dz/(r*r*r)

xyz[j][FX] = xyz[j][FX] - dx/(r*r*r)

xyz[j][FY] = xyz[j][FY] - dy/(r*r*r)

xyz[j][FZ] = xyz[j][FZ] - dz/(r*r*r)

上記コードの処理時間は約7.93[sec](10回実行した平均)。
itertoolsを使った方が高速になるのかなと思ってたけど、シンプルにforの2重ループの方が高速な結果になった。

次に、forループ範囲をrangeコマンドで作っているのを、xrangeに変更してみる。rangeは内部でリストを生成してから要素参照するのに対して、xrangeはリストを作らず要素参照するらしい。

#################
### code no3  ###
#################
def find_pair():
  global PN

  for i in xrange(PN):     ## range -> xrange 
    for j in xrange(i + 1, PN):  ## range -> xrange 
      dx = xyz[i][PX] - xyz[j][PX]
      dy = xyz[i][PY] - xyz[j][PY]
      dz = xyz[i][PZ] - xyz[j][PZ]
      r  = math.sqrt(dx*dx + dy*dy + dz*dz)

      xyz[i][FX] = xyz[i][FX] + dx/(r*r*r)
      xyz[i][FY] = xyz[i][FY] + dy/(r*r*r)
      xyz[i][FZ] = xyz[i][FZ] + dz/(r*r*r)
      xyz[j][FX] = xyz[j][FX] - dx/(r*r*r)
      xyz[j][FY] = xyz[j][FY] - dy/(r*r*r)
      xyz[j][FZ] = xyz[j][FZ] - dz/(r*r*r)

#################

### code no3 ###

#################

def find_pair():

global PN

for i in xrange(PN): ## range -> xrange

for j in xrange(i + 1, PN): ## range -> xrange

dx = xyz[i][PX] - xyz[j][PX]

dy = xyz[i][PY] - xyz[j][PY]

dz = xyz[i][PZ] - xyz[j][PZ]

r = math.sqrt(dx*dx + dy*dy + dz*dz)

xyz[i][FX] = xyz[i][FX] + dx/(r*r*r)

xyz[i][FY] = xyz[i][FY] + dy/(r*r*r)

xyz[i][FZ] = xyz[i][FZ] + dz/(r*r*r)

xyz[j][FX] = xyz[j][FX] - dx/(r*r*r)

xyz[j][FY] = xyz[j][FY] - dy/(r*r*r)

xyz[j][FZ] = xyz[j][FZ] - dz/(r*r*r)

上記コードで約7.89[sec](10回実行した平均)。
気持ち速くなった程度だった。今回は粒子数15^3個で試しているが、数を増やしたら効果が出てくるかもしれない。

最後、numpyを使ってみる。どうしても上手い使い方が思い浮かばず、forループの中の演算を無理矢理numpyを使った処理に書き替えた。

#################
### code no4  ###
#################
def find_pair():
  global PN

  vv = numpy.array([[[0 for i in range(3)] for j in range(3)] for k in range(PN)], dtype=float)
  for i in xrange(PN):
    vv[i,0,0] = xyz[i][PX]
    vv[i,0,1] = xyz[i][PY]
    vv[i,0,2] = xyz[i][PZ]
    vv[i,2,0] = xyz[i][FX]
    vv[i,2,1] = xyz[i][FY]
    vv[i,2,2] = xyz[i][FZ]
  
  for i in xrange(PN):
    for j in xrange(i + 1, PN):
      vvv = vv[i,0] - vv[j,0]
      r = numpy.linalg.norm(vvv)
      r3 = r*r*r
      vv[i,2] = vv[i,2] + (vvv/r3)
      vv[j,2] = vv[j,2] - (vvv/r3)

#################

### code no4 ###

#################

def find_pair():

global PN

vv = numpy.array([[[0 for i in range(3)] for j in range(3)] for k in range(PN)], dtype=float)

for i in xrange(PN):

vv[i,0,0] = xyz[i][PX]

vv[i,0,1] = xyz[i][PY]

vv[i,0,2] = xyz[i][PZ]

vv[i,2,0] = xyz[i][FX]

vv[i,2,1] = xyz[i][FY]

vv[i,2,2] = xyz[i][FZ]

for i in xrange(PN):

for j in xrange(i + 1, PN):

vvv = vv[i,0] - vv[j,0]

r = numpy.linalg.norm(vvv)

r3 = r*r*r

vv[i,2] = vv[i,2] + (vvv/r3)

vv[j,2] = vv[j,2] - (vvv/r3)

上記コードで約78.46[sec](10回実行した平均)。
全然駄目だった。多重ループの内周で、何度も実行されるような場所にnumpyの処理を配置するととんでもなく遅くなる。遅くなるとは思っていたが予想以上の重さ。

■結果　[Results summary]

code type	時間[sec]
itertools使用 (no1)	8.18
range記述 (no2)	7.93
xrange記述 (no3)	7.89
ループ内周でnumpy使用 (no4)	78.46

あと考えられる手法はスレッド化。これはまた今度試してみたいと思う。

We performed visualization using openGL of molecular dynamics in the previous ion trap simulation. Almost all the time required for this process is the location of the Coulomb force calculation and it is necessary to calculate each particle. Assuming that the number of particles is N, the amount of calculation needs NC2 Times per step. Especially in the closed system such as the ion trap of this time there is no periodic boundary condition, so speeding method like Ewald method can not be used.

In this time I would like to examine how to speed up the calculation of the Coulomb force in this ion trap.

First, Code No1 that it has been extracted only the code of speeding up the subject. Results time is 8.18sec.

Code No.2 is when the itertools location in the above find_pair function is rewritten with a double for loop.Results time is 7.93sec.

Code No 3 changes range to xrange.Results time is 7.89sec.

Last Code No4 use numpy.But, Since it is used many times on the inner periphery, slow processing can be expected.Results time is 78.46sec.

Another possible method is threading. I want to try this again next time.

1月 5, 20182月 3, 2018

var page = new WebPage(), testindex = 0, loadInProgress = false;

page.onConsoleMessage = function(msg)
{
        console.log(msg);
};

page.onLoadStarted = function()
{
        loadInProgress = true;
};

page.onLoadFinished = function()
{
        loadInProgress = false;
};
var steps = [
        function() {
                page.open("http://hogehoge_wordpress.com/wp-login.php");
        },
        function() {
                page.evaluate(function(){
                        document.getElementById("user_login").value="<user id>";
                        document.getElementById("user_pass").value="<user pass>";
                        document.querySelector('*[name="wp-submit"]').click();
                });
        },
        function() {
                page.open('http://hogehoge_wordpress.com/wp-admin/post-new.php');
        },
        function() {
                page.evaluate(function() {
                document.querySelector('*[name="post_title"]').value='title';
                document.querySelector('*[name="content"]').value='contents';
                document.querySelector("#publish").click;
                var a = document.querySelectorAll("#publish");
                var e = document.createEvent('MouseEvents');
                e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
                a[0].dispatchEvent(e);
                waitforload = true;
                });
        },
        function() {
                page.render('images/LastLoad.png');
        }

];

interval = setInterval(function()
{
        if (!loadInProgress && typeof steps[testindex] == "function")
        {
                console.log("step " + (testindex + 1));
                steps[testindex]();
                page.render("images/step" + (testindex + 1) + ".png");
                testindex++;
        }
        if (typeof steps[testindex] != "function")
        {
                                page.render('images/finalxx.png');
                console.log("Scrapping complete...!");
                phantom.exit();
        }
}, 5000);// JavaScript Document

var page = new WebPage(), testindex = 0, loadInProgress = false;

page.onConsoleMessage = function(msg)

{

console.log(msg);

};

page.onLoadStarted = function()

{

loadInProgress = true;

};

page.onLoadFinished = function()

{

loadInProgress = false;

};

var steps = [

function() {

page.open("http://hogehoge_wordpress.com/wp-login.php");

function() {

page.evaluate(function(){

document.getElementById("user_login").value="<user id>";

document.getElementById("user_pass").value="<user pass>";

document.querySelector('*[name="wp-submit"]').click();

});

function() {

page.open('http://hogehoge_wordpress.com/wp-admin/post-new.php');

function() {

page.evaluate(function() {

document.querySelector('*[name="post_title"]').value='title';

document.querySelector('*[name="content"]').value='contents';

document.querySelector("#publish").click;

var a = document.querySelectorAll("#publish");

var e = document.createEvent('MouseEvents');

e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);

a[0].dispatchEvent(e);

waitforload = true;

});

function() {

page.render('images/LastLoad.png');

}

];

interval = setInterval(function()

{

if (!loadInProgress && typeof steps[testindex] == "function")

{

console.log("step " + (testindex + 1));

steps[testindex]();

page.render("images/step" + (testindex + 1) + ".png");

testindex++;

}

if (typeof steps[testindex] != "function")

{

page.render('images/finalxx.png');

console.log("Scrapping complete...!");

phantom.exit();

}

}, 5000);// JavaScript Document

Script sample to posting to wordpress by phantomjs.

投稿

numpyとopenCVを使った画像のフーリエ変換と逆変換

pythonのthreadingを使いクーロン力の並列計算をするテスト

マルチスレッドのテスト

分子動力学を使用したイオントラップシミュレーションとOpenGLによる可視化(multiprocessingによる処理並列化版)

pythonのmultiprocessingを使いクーロン力の並列計算をするテスト

クーロン力計算の高速化検討

分子動力学を使用したイオントラップシミュレーションとOpenGLによる可視化

３D格子状に配置した粒子をopenGLで可視化、マウスで視点移動

格子ボルツマン法を用いた流体力学のシミュレーション(openGL)

phantomjs を使い、wordpress へ記事を投稿するスクリプト