I created ten threads to process item in a global list but I do not why only the first worker do this work and also, the main thread finished before sub-threads finished, even though I had used the thread.join(). here is the code, I think the problem may because I use while loop in myThread.run . but I do not know how to tell these threads keep working before the global list is empty.
# coding=utf-8
import threading
import numpy as np
dfs = ['units' + str(i).zfill(5) for i in range(250)]
units = dfs.copy()
k = [str(i).zfill(5) for i in range(500, 21800000)]
units.extend(k)
np.random.shuffle(units)
marker = []
def working_fun(df, unit):
global marker
if unit in df:
threadlock.acquire()
marker.append(int(unit[5:]))
class myThread(threading.Thread):
def __init__(self, name):
threading.Thread.__init__(self)
self.name = name
self.work_load = []
def run(self):
global dfs
print("start thread" + self.name)
while True:
threadlock.acquire()
if units != []:
unit = units.pop()
else:
unit = None
threadlock.release()
if unit is not None:
self.work_load.append(unit)
working_fun(dfs, unit)
else:
print('------', self.name, '--finish---', len(self.work_load), '--------')
break
threadlock = threading.RLock()
thds = []
for i in range(10):
thd = myThread(name='thd' + str(i))
thds.append(thd)
for thd in thds:
thd.start()
thd.join()
print('output:', marker)
Try it this way:
import numpy
import multiprocessing
# Same as before
dfs = ['units' + str(i).zfill(5) for i in range(250)]
units = dfs.copy()
k = [str(i).zfill(5) for i in range(500, 21800000)]
units.extend(k)
numpy.random.shuffle(units)
# Almost the same as before
def working_fun(inp):
df, unit = inp
if unit in df:
return int(unit[5:])
# This is needed for multiprocessing/threading
if __name__ == "__main__":
# Create a pool of workers (10 in this case)
with multiprocessing.Pool(10) as pool:
# Map some (global) iterable on the pool of workers
result = pool.map(working_fun, [(dfs, unit) for unit in units])
# Show the results (note that the function returns None if the unit is not in df)
print([r for r in result if r is not None])
Output:
$ python test.py
[1, 75, 139, 24, 101, 72, 156, 55, 58, 235, 14, 123, 177, 112, 168, 178, 173, 162, 104, 226, 230, 205, 69, 100, 246, 18, 117, 149, 37, 214, 206, 26, 136, 87, 144, 79, 50, 222, 7, 133, 36, 41, 30, 163, 103, 187, 6, 225, 15, 223, 234, 138, 126, 19, 64, 224, 39, 145, 130, 42, 11, 221, 128, 213, 204, 2, 45, 220, 242, 109, 59, 238, 232, 68, 152, 107, 148, 83, 197, 241, 118, 32, 90, 99, 22, 119, 0, 67, 48, 181, 71, 193, 95, 29, 113, 40, 134, 218, 141, 27, 121, 8, 207, 110, 60, 237, 47, 94, 73, 157, 184, 78, 159, 49, 202, 239, 124, 215, 127, 209, 62, 4, 52, 82, 74, 9, 199, 158, 188, 3, 61, 180, 57, 219, 245, 38, 16, 190, 12, 17, 175, 46, 196, 125, 194, 76, 129, 161, 81, 93, 137, 155, 174, 54, 35, 25, 115, 140, 216, 23, 21, 233, 77, 33, 92, 208, 120, 86, 165, 70, 135, 28, 91, 66, 85, 169, 203, 211, 114, 154, 122, 217, 247, 31, 147, 96, 142, 191, 10, 183, 80, 179, 189, 56, 105, 160, 228, 185, 132, 5, 53, 106, 13, 210, 182, 89, 192, 153, 170, 111, 65, 212, 186, 151, 200, 248, 229, 102, 240, 198, 176, 43, 131, 166, 236, 231, 116, 172, 146, 88, 44, 98, 227, 20, 34, 164, 108, 171, 244, 243, 195, 150, 249, 97, 167, 51, 201, 84, 63, 143]
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.