简体   繁体   中英

How to implement python script to run on “N” number of CPU CORES?

I have made a script to optimize particular part of structure. (scientific terms , you can ignore it) but the main purpose of script is optimization and it takes major time during these two steps optimize () and refine() function where it uses only one CPU out of 4 CPU's in my local system but i want to make this script to use all 4 CPU's (especially for these two functions optimize () and refine()).

I didn't have much idea about multiprocessing/multicore but still i uses multiprocessing module but it fails use all the CPU's. So, if someone knows how to implement the python script to run on all avail multiple CPU's could give me some suggestion would be really helpful.

MY SCRIPT:

import sys
import os

from modeller import *
from modeller.optimizers import molecular_dynamics,conjugate_gradients
from modeller.automodel import autosched

def optimize(atmsel, sched):
    for step in sched:
        step.optimize(atmsel, max_iterations=200,  min_atom_shift=0.001)
    refine(atmsel)
    cg = conjugate_gradients()
    cg.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)

def refine(atmsel):
    md = molecular_dynamics(cap_atom_shift=0.39, md_time_step=4.0,
                        md_return='FINAL')
    init_vel = True
    for (its, equil, temps) in ((200, 20, (150.0, 250.0, 400.0, 700.0, 1000.0)),
                            (200, 600,
                             (1000.0, 800.0, 600.0, 500.0, 400.0, 300.0))):
    for temp in temps:
        md.optimize(atmsel, init_velocities=init_vel, temperature=temp,
                     max_iterations=its, equilibrate=equil)
        init_vel = False

def make_restraints(mdl1, aln):
   rsr = mdl1.restraints
   rsr.clear()
   s = selection(mdl1)
   for typ in ('stereo', 'phi-psi_binormal'):
       rsr.make(s, restraint_type=typ, aln=aln, spline_on_site=True)
   for typ in ('omega', 'chi1', 'chi2', 'chi3', 'chi4'):
       rsr.make(s, restraint_type=typ+'_dihedral', spline_range=4.0,
            spline_dx=0.3, spline_min_points = 5, aln=aln,
            spline_on_site=True)

log.verbose()

env = environ(rand_seed=int(-4243))
env.io.hetatm = True
env.edat.dynamic_sphere=False
env.edat.dynamic_lennard=True
env.edat.contact_shell = 4.0
env.edat.update_dynamic = 0.39

env.libs.topology.read(file='$(LIB)/top_heav.lib')

env.libs.parameters.read(file='$(LIB)/par.lib')

mdl1 = model(env, file = "3O26")
ali = alignment(env)
ali.append_model(mdl1, atom_files= "3O26.pdb", align_codes= "3O26")

s = selection(mdl1.chains["A"].residues["275"])

s.mutate(residue_type="ALA")
ali.append_model(mdl1, align_codes="3O26")

mdl1.clear_topology()
mdl1.generate_topology(ali[-1])

mdl1.transfer_xyz(ali)

mdl1.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')

mdl2 = model(env, file="3O26.pdb")

mdl1.res_num_from(mdl2,ali)

mdl1.write(file="3O26"+"ALA"+"275"+"A"+'.tmp')
mdl1.read(file="3O26"+"ALA"+"275"+"A"+'.tmp')

make_restraints(mdl1, ali)
mdl1.env.edat.nonbonded_sel_atoms=1
sched = autosched.loop.make_for_model(mdl1)

s = selection(mdl1.atoms['CA:'+"275"+':'+"A"].select_sphere(5)).by_residue()

mdl1.restraints.unpick_all()
mdl1.restraints.pick(s)

s.energy()

s.randomize_xyz(deviation=4.0)

mdl1.env.edat.nonbonded_sel_atoms=2
optimize(s,sched)
mdl1.env.edat.nonbonded_sel_atoms=1
optimize(s,sched)
s.energy()
atmsel = selection(mdl1.chains["A"])
score = atmsel.assess_dope()
mdl1.write(file="hi.pdb")

os.remove("3O26"+"ALA"+"275"+"A"+'.tmp')

from multiprocessing import Process
if __name__ == '__main__':
    p = Process(target=optimize, args=(atmsel,sched))
    p.start()
    p.join()

In case of demo, kindly paste this ( http://files.rcsb.org/view/3o26.pdb ) into a file 3O26.pdb and keep it in same directory.

Thanking you in advance

Based on @Dinesh suggestion I have modified the code by including pp module where its working fine with using all the cores but i am getting some errors that i couldn't figure out.

Modified script:

import sys
import os
import pp
from modeller import *
from modeller.optimizers import molecular_dynamics, conjugate_gradients
from modeller.automodel import autosched


def optimize(atmsel, sched):
    for step in sched:
        step.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
    refine(atmsel)
    cg = conjugate_gradients()
    cg.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)


def refine(atmsel):
    md = molecular_dynamics(cap_atom_shift=0.39, md_time_step=4.0,
                            md_return='FINAL')
    init_vel = True
    for (its, equil, temps) in ((200, 20, (150.0, 250.0, 400.0, 700.0, 1000.0)),
                                (200, 600,
                                 (1000.0, 800.0, 600.0, 500.0, 400.0, 300.0))):
        for temp in temps:
            md.optimize(atmsel, init_velocities=init_vel, temperature=temp,
                        max_iterations=its, equilibrate=equil)
            init_vel = False

def make_restraints(mdl1, aln):
    rsr = mdl1.restraints
    rsr.clear()
    s = selection(mdl1)
    for typ in ('stereo', 'phi-psi_binormal'):
        rsr.make(s, restraint_type=typ, aln=aln, spline_on_site=True)
    for typ in ('omega', 'chi1', 'chi2', 'chi3', 'chi4'):
        rsr.make(s, restraint_type=typ + '_dihedral', spline_range=4.0,
                 spline_dx=0.3, spline_min_points=5, aln=aln,
                 spline_on_site=True)

################################### PPMODULE ############################
def main(s,sched):
    print s,"*************************************************************************"
    ppservers = ()
    if len(sys.argv) > 1:
        ncpus = int(sys.argv[1])
        job_server = pp.Server(ncpus, ppservers=ppservers)
    else:
        job_server = pp.Server(ppservers=ppservers)
    print "Starting pp with", job_server.get_ncpus(), "workers"
    job_server.submit(optimize,(s,sched,),(refine,),("from modeller.optimizers import molecular_dynamics, conjugate_gradients",))()
#################################### PPMODULE ############################

if __name__=="__main__":
    log.verbose()
    env = environ(rand_seed=int(-4345))
    env.io.hetatm = True
    env.edat.dynamic_sphere = False
    env.edat.dynamic_lennard = True
    env.edat.contact_shell = 4.0
    env.edat.update_dynamic = 0.39
    env.libs.topology.read(file='$(LIB)/top_heav.lib')
    env.libs.parameters.read(file='$(LIB)/par.lib')
    mdl1 = model(env, file="3O26")
    ali = alignment(env)
    ali.append_model(mdl1, atom_files="3O26.pdb", align_codes="3O26")
    s = selection(mdl1.chains["A"].residues["275"])
    s.mutate(residue_type="ALA")
    ali.append_model(mdl1, align_codes="3O26")
    mdl1.clear_topology()
    mdl1.generate_topology(ali[-1])
    mdl1.transfer_xyz(ali)
    mdl1.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
    mdl2 = model(env, file="3O26.pdb")
    mdl1.res_num_from(mdl2, ali)
    mdl1.write(file="3O26" + "ALA" + "275" + "A" + '.tmp')
    mdl1.read(file="3O26" + "ALA" + "275" + "A" + '.tmp')
    make_restraints(mdl1, ali)
    mdl1.env.edat.nonbonded_sel_atoms = 1
    sched = autosched.loop.make_for_model(mdl1)
    s = selection(mdl1.atoms['CA:' + "275" + ':' + "A"].select_sphere(15)).by_residue()
    mdl1.restraints.unpick_all()
    mdl1.restraints.pick(s)
    s.energy()
    s.randomize_xyz(deviation=4.0)
    mdl1.env.edat.nonbonded_sel_atoms = 2
    main(s, sched)
    mdl1.env.edat.nonbonded_sel_atoms = 1
    main(s, sched)
    s.energy()
    atmsel = selection(mdl1.chains["A"])
    score = atmsel.assess_dope()
    mdl1.write(file="current.pdb")
    os.remove("3O26" + "ALA" + "275" + "A" + '.tmp')

ERROR:

randomi_498_> Atoms,selected atoms,random_seed,amplitude:     2302      558        1        4.0000
randomi_496_> Amplitude is > 0; randomization is done.
<Selection of 558 atoms> *************************************************************************
Starting pp with 4 workers
Traceback (most recent call last):
  File "mutate_model.py", line 88, in <module>
    main(s, sched)
  File "m_m.py", line 52, in main
    job_server.submit(optimize,(s,sched,),(refine,),("from modeller.optimizers import molecular_dynamics, conjugate_gradients",))()
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 460, in submit
    sfunc = self.__dumpsfunc((func, ) + depfuncs, modules)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 638, in __dumpsfunc
    sources = [self.__get_source(func) for func in funcs]
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 705, in __get_source
    sourcelines = inspect.getsourcelines(func)[0]
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 690, in getsourcelines
    lines, lnum = findsource(object)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 526, in findsource
    file = getfile(object)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 408, in getfile
    raise TypeError('{!r} is a built-in class'.format(object))
TypeError: <module '__builtin__' (built-in)> is a built-in class

Finally I have done myself by another method called multiprocessing.pool based on the blog http://chriskiehl.com/article/parallelism-in-one-line/ and https://pymotw.com/2/multiprocessing/basics.html

Here is my Pseudo CODE:

from multiprocessing import Pool 

def get_mm_script(scripts):
    #I just created all my mm.py scripts as string template
    return scripts

def run(filename):
    #here i use system command to run my all scripts.
    return

if __name__ == '__main__':
  scripts=get_mm_script(f)
  pool = Pool(4)
  pool.map(run, scripts)
  pool.close()
  pool.join()

Please check pp module. Parallelpython.com

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM