简体   繁体   中英

Multi-threading program in python, only one sub-threads work and program didnot quit normally even though I use join()

I created ten threads to process item in a global list but I do not why only the first worker do this work and also, the main thread finished before sub-threads finished, even though I had used the thread.join(). here is the code, I think the problem may because I use while loop in myThread.run . but I do not know how to tell these threads keep working before the global list is empty.

# coding=utf-8
import threading
import numpy as np

dfs = ['units' + str(i).zfill(5) for i in range(250)]
units = dfs.copy()
k = [str(i).zfill(5) for i in range(500, 21800000)]
units.extend(k)
np.random.shuffle(units)
marker = []


def working_fun(df, unit):
    global marker
    if unit in df:
        threadlock.acquire()
        marker.append(int(unit[5:]))


class myThread(threading.Thread):
    def __init__(self, name):
        threading.Thread.__init__(self)
        self.name = name
        self.work_load = []

    def run(self):
        global dfs
        print("start thread" + self.name)

        while True:
            threadlock.acquire()
            if units != []:
                unit = units.pop()
            else:
                unit = None
            threadlock.release()

            if unit is not None:
                self.work_load.append(unit)
                working_fun(dfs, unit)
            else:
                print('------', self.name, '--finish---', len(self.work_load), '--------')
                break


threadlock = threading.RLock()

thds = []
for i in range(10):
    thd = myThread(name='thd' + str(i))
    thds.append(thd)
for thd in thds:
    thd.start()
    thd.join()

print('output:', marker)

Try it this way:

import numpy
import multiprocessing

# Same as before
dfs = ['units' + str(i).zfill(5) for i in range(250)]
units = dfs.copy()
k = [str(i).zfill(5) for i in range(500, 21800000)]
units.extend(k)
numpy.random.shuffle(units)


# Almost the same as before
def working_fun(inp):
    df, unit = inp
    if unit in df:
        return int(unit[5:])


# This is needed for multiprocessing/threading
if __name__ == "__main__":

    # Create a pool of workers (10 in this case)
    with multiprocessing.Pool(10) as pool:

        # Map some (global) iterable on the pool of workers
        result = pool.map(working_fun, [(dfs, unit) for unit in units])

        # Show the results (note that the function returns None if the unit is not in df)
        print([r for r in result if r is not None])

Output:

$ python test.py
[1, 75, 139, 24, 101, 72, 156, 55, 58, 235, 14, 123, 177, 112, 168, 178, 173, 162, 104, 226, 230, 205, 69, 100, 246, 18, 117, 149, 37, 214, 206, 26, 136, 87, 144, 79, 50, 222, 7, 133, 36, 41, 30, 163, 103, 187, 6, 225, 15, 223, 234, 138, 126, 19, 64, 224, 39, 145, 130, 42, 11, 221, 128, 213, 204, 2, 45, 220, 242, 109, 59, 238, 232, 68, 152, 107, 148, 83, 197, 241, 118, 32, 90, 99, 22, 119, 0, 67, 48, 181, 71, 193, 95, 29, 113, 40, 134, 218, 141, 27, 121, 8, 207, 110, 60, 237, 47, 94, 73, 157, 184, 78, 159, 49, 202, 239, 124, 215, 127, 209, 62, 4, 52, 82, 74, 9, 199, 158, 188, 3, 61, 180, 57, 219, 245, 38, 16, 190, 12, 17, 175, 46, 196, 125, 194, 76, 129, 161, 81, 93, 137, 155, 174, 54, 35, 25, 115, 140, 216, 23, 21, 233, 77, 33, 92, 208, 120, 86, 165, 70, 135, 28, 91, 66, 85, 169, 203, 211, 114, 154, 122, 217, 247, 31, 147, 96, 142, 191, 10, 183, 80, 179, 189, 56, 105, 160, 228, 185, 132, 5, 53, 106, 13, 210, 182, 89, 192, 153, 170, 111, 65, 212, 186, 151, 200, 248, 229, 102, 240, 198, 176, 43, 131, 166, 236, 231, 116, 172, 146, 88, 44, 98, 227, 20, 34, 164, 108, 171, 244, 243, 195, 150, 249, 97, 167, 51, 201, 84, 63, 143]

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM