简体   繁体   中英

write data to JSON file during multiprocessing using python

I am new to python. I am writing a python program to write to a JSON file if the website is unreachable. The multiple websites will be stored in hosts variable. It will be scheduled to check every 5 seconds. I have used pool from multiprocessing to process the website at the same time without delay. After that, i will write the data to the json file. But in here, it is writing only one website data to json file. So how to make this to write two data at the same time.

Here's the sample code:

import os
from multiprocessing import Pool
from datetime import datetime
import time
import json

hosts = ["www.google.com","www.smackcoders.com"]
n = len(hosts)

def write(hosts):
    u = "down"
    name = "stack.json"
    if not os.path.exists(name):
        with open(name, 'w') as f:
            f.write('{}')
    result = [(timestamp, {'monitor.status': u,
                           "monitor.id": "tcp-tcp@"+hosts
                           })]

    with open(name, 'rb+') as f:
        f.seek(-1, os.SEEK_END)
        f.truncate()
        for entry in result:
            _entry = '"{}":{},\n'.format(entry[0], json.dumps(entry[1]))
            _entry = _entry.encode()
            f.write(_entry)
        f.write('}'.encode('ascii'))

def main(hosts):
    p = Pool(processes= n)
    result = p.map(write, hosts)
while True:
    timestamp = datetime.now().strftime("%B %d %Y, %H:%M:%S")
    main(hosts)
    time.sleep(5)

My output:

""March 13 2019, 10:49:03":{"monitor.id": "tcp-tcp@www.smackcoders.com", "monitor.status": "down"},
}

Required Output:

{"March 13 2019, 10:49:03":{"monitor.id": "tcp-tcp@www.smackcoders.com", "monitor.status": "down"},"March 13 2019, 10:49:03":{"monitor.id": "tcp-tcp@www.google.com", "monitor.status": "down"},
}

Ive made some minor changes to your code and implemented a Lock.

import os
from multiprocessing import Pool,RLock
from datetime import datetime
import time
import json

file_lock=RLock()
hosts = ["www.google.com","www.smackcoders.com"]
n = len(hosts)

def write(hosts):
    u = "down"
    name = "stack.json"
    if not os.path.exists(name):
        with open(name, 'w') as f:
            f.write('{}')
    result = [(timestamp, {'monitor.status': u,
                           "monitor.id": "tcp-tcp@"+hosts
                           })]
    with file_lock:
        with open(name, 'rb+') as f:
            f.seek(-1, os.SEEK_END)
            f.truncate()
            for entry in result:
                _entry = '"{}":{},\n'.format(entry[0], json.dumps(entry[1]))
                _entry = _entry.encode()
                f.write(_entry)
            f.write('}'.encode('ascii'))


def main(hosts):
    p = Pool(processes= n)
    result = p.map(write, hosts)
while True:
    timestamp = datetime.now().strftime("%B %d %Y, %H:%M:%S")
    main(hosts)
    time.sleep(5)

However, for a long running process that constantly has to read and write a file for logging seems like a poor implementation as the code will have to read a bulky file and completely rewrite it on every process. Consider writing the log in a database instead.

Here's a different option that will use Thread over Pool.

Created a class to get the return of join()

# Class that overwrite Thread to get the return of join()
class ThreadWithReturnValue(Thread):
    def __init__(self, group=None, target=None, name=None, args=None, kwargs=None, Verbose=None):
        if args is None:
            args = ()
        if kwargs is None:
            kwargs = {}

        super().__init__(group, target, name, args, kwargs)
        self._return = None

    def run(self):
        print(type(self._target))
        if self._target is not None:
            self._return = self._target(*self._args, **self._kwargs)

    def join(self, *args):
        Thread.join(self, *args)
        return self._return

I have changed the code to get the status of each hosts first, then writing the result to your file. Also fixed the way the JSON file is written.

import os
from datetime import datetime
import time
import json
from threading import Thread

hosts = ["www.google.com","www.smackcoders.com"]
filepath = os.path.join(os.getcwd(), "stack.json")
n = len(hosts)


def perform_ping(host_ip):
    """
    You have hardcoded down, this method will ping to check if we get an ICMP response
    """
    response = os.system("ping -c 1 " + host_ip)
    if response == 0:
        return 'UP'
    else:
        return 'DOWN'


def write_result(timestamp, results):
    # u = "down"  Using perform_ping to get the status

    if not os.path.exists(filepath):
        current_file = {}
    else:
       # If file exist, reading the current output
        with open(filepath, 'r') as f_read:
            current_file = json.loads(f_read.read())

    inner_result = []
    for result in results:
        host, status = result
        inner_result.append({'monitor.status': status,
                             "monitor.id": "tcp-tcp@"+host
                    })

    current_file[timestamp] = inner_result

    # writing the file with new input
    with open(filepath, 'w') as f_write:
        f_write.write(json.dumps(current_file))


def main():
    while True:
        thread_list = []
        for host_ip in hosts:
            thread_list.append(ThreadWithReturnValue(target=perform_ping, name=host_ip, args=(host_ip, )))
        results = []
        timestamp = datetime.now().strftime("%B %d %Y, %H:%M:%S")
        for thread in thread_list:
            thread.start()
        for thread in thread_list:
            results.append((thread.name, thread.join()))
        # Ping is done in parallel, writing the result at the end to avoid thread collision and reading/writing the file to many times if you increase the number of host
        write_result(timestamp, results)
        time.sleep(5)


if __name__ == '__main__':
    main()

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM