Parallelizer: more clever assessment of CPU count (ignores hyperthreaded cores on linux)
This commit is contained in:
parent
0402d08604
commit
92d11cee91
@ -1,4 +1,4 @@
|
|||||||
import os, sys, time, multiprocessing
|
import os, sys, time, multiprocessing, re
|
||||||
from processes import ForkedProcess
|
from processes import ForkedProcess
|
||||||
from remoteproxy import ExitError
|
from remoteproxy import ExitError
|
||||||
|
|
||||||
@ -37,12 +37,13 @@ class Parallelize:
|
|||||||
since it is automatically sent via pipe back to the parent process.
|
since it is automatically sent via pipe back to the parent process.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, tasks, workers=None, block=True, progressDialog=None, randomReseed=True, **kwds):
|
def __init__(self, tasks=None, workers=None, block=True, progressDialog=None, randomReseed=True, **kwds):
|
||||||
"""
|
"""
|
||||||
=============== ===================================================================
|
=============== ===================================================================
|
||||||
Arguments:
|
Arguments:
|
||||||
tasks list of objects to be processed (Parallelize will determine how to
|
tasks list of objects to be processed (Parallelize will determine how to
|
||||||
distribute the tasks)
|
distribute the tasks). If unspecified, then each worker will receive
|
||||||
|
a single task with a unique id number.
|
||||||
workers number of worker processes or None to use number of CPUs in the
|
workers number of worker processes or None to use number of CPUs in the
|
||||||
system
|
system
|
||||||
progressDialog optional dict of arguments for ProgressDialog
|
progressDialog optional dict of arguments for ProgressDialog
|
||||||
@ -70,6 +71,8 @@ class Parallelize:
|
|||||||
if not hasattr(os, 'fork'):
|
if not hasattr(os, 'fork'):
|
||||||
workers = 1
|
workers = 1
|
||||||
self.workers = workers
|
self.workers = workers
|
||||||
|
if tasks is None:
|
||||||
|
tasks = range(workers)
|
||||||
self.tasks = list(tasks)
|
self.tasks = list(tasks)
|
||||||
self.reseed = randomReseed
|
self.reseed = randomReseed
|
||||||
self.kwds = kwds.copy()
|
self.kwds = kwds.copy()
|
||||||
@ -103,7 +106,7 @@ class Parallelize:
|
|||||||
self.progressDlg.__enter__()
|
self.progressDlg.__enter__()
|
||||||
self.progressDlg.setMaximum(len(self.tasks))
|
self.progressDlg.setMaximum(len(self.tasks))
|
||||||
self.progress = {os.getpid(): []}
|
self.progress = {os.getpid(): []}
|
||||||
return Tasker(None, self.tasks, self.kwds)
|
return Tasker(self, None, self.tasks, self.kwds)
|
||||||
|
|
||||||
|
|
||||||
def runParallel(self):
|
def runParallel(self):
|
||||||
@ -121,7 +124,7 @@ class Parallelize:
|
|||||||
proc = ForkedProcess(target=None, preProxy=self.kwds, randomReseed=self.reseed)
|
proc = ForkedProcess(target=None, preProxy=self.kwds, randomReseed=self.reseed)
|
||||||
if not proc.isParent:
|
if not proc.isParent:
|
||||||
self.proc = proc
|
self.proc = proc
|
||||||
return Tasker(proc, chunks[i], proc.forkedProxies)
|
return Tasker(self, proc, chunks[i], proc.forkedProxies)
|
||||||
else:
|
else:
|
||||||
self.childs.append(proc)
|
self.childs.append(proc)
|
||||||
|
|
||||||
@ -197,7 +200,26 @@ class Parallelize:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def suggestedWorkerCount():
|
def suggestedWorkerCount():
|
||||||
return multiprocessing.cpu_count() ## is this really the best option?
|
if 'linux' in sys.platform:
|
||||||
|
## I think we can do a little better here..
|
||||||
|
## cpu_count does not consider that there is little extra benefit to using hyperthreaded cores.
|
||||||
|
try:
|
||||||
|
cores = {}
|
||||||
|
pid = None
|
||||||
|
|
||||||
|
for line in open('/proc/cpuinfo'):
|
||||||
|
m = re.match(r'physical id\s+:\s+(\d+)', line)
|
||||||
|
if m is not None:
|
||||||
|
pid = m.groups()[0]
|
||||||
|
m = re.match(r'cpu cores\s+:\s+(\d+)', line)
|
||||||
|
if m is not None:
|
||||||
|
cores[pid] = int(m.groups()[0])
|
||||||
|
return sum(cores.values())
|
||||||
|
except:
|
||||||
|
return multiprocessing.cpu_count()
|
||||||
|
|
||||||
|
else:
|
||||||
|
return multiprocessing.cpu_count()
|
||||||
|
|
||||||
def _taskStarted(self, pid, i, **kwds):
|
def _taskStarted(self, pid, i, **kwds):
|
||||||
## called remotely by tasker to indicate it has started working on task i
|
## called remotely by tasker to indicate it has started working on task i
|
||||||
@ -212,8 +234,9 @@ class Parallelize:
|
|||||||
|
|
||||||
|
|
||||||
class Tasker:
|
class Tasker:
|
||||||
def __init__(self, proc, tasks, kwds):
|
def __init__(self, parallelizer, process, tasks, kwds):
|
||||||
self.proc = proc
|
self.proc = process
|
||||||
|
self.par = parallelizer
|
||||||
self.tasks = tasks
|
self.tasks = tasks
|
||||||
for k, v in kwds.iteritems():
|
for k, v in kwds.iteritems():
|
||||||
setattr(self, k, v)
|
setattr(self, k, v)
|
||||||
@ -229,7 +252,20 @@ class Tasker:
|
|||||||
#print os.getpid(), 'no more tasks'
|
#print os.getpid(), 'no more tasks'
|
||||||
self.proc.close()
|
self.proc.close()
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
"""
|
||||||
|
Process requests from parent.
|
||||||
|
Usually it is not necessary to call this unless you would like to
|
||||||
|
receive messages (such as exit requests) during an iteration.
|
||||||
|
"""
|
||||||
|
if self.proc is not None:
|
||||||
|
self.proc.processRequests()
|
||||||
|
|
||||||
|
def numWorkers(self):
|
||||||
|
"""
|
||||||
|
Return the number of parallel workers
|
||||||
|
"""
|
||||||
|
return self.par.workers
|
||||||
|
|
||||||
#class Parallelizer:
|
#class Parallelizer:
|
||||||
#"""
|
#"""
|
||||||
|
Loading…
Reference in New Issue
Block a user