简体   繁体   中英

Django bulk_Create, large csv files

Hi all I'm in the middle of a controversy here with django and is because I have a to upload csv files content into a mysql relational innodb engine the files get like 2.2mb the most biggest that I get, I get more information related to this here and other pages and to many people said that bulks functions are good on this kind of task but the problem on the django doc, are useless with relational tables and foreign keys did someone have some approach to get the bilk working with an innodb engine this is my code.

def csv_to_db():


        global check_files

        check_files = glob.glob(media_path+filematch)


        try:

            logger.info('\n')
            logger.info('========================[ Starting CSV to Database Process ]======================')
            logger.info('\n')
            logger.info('Checking Csv Files in csvfiles folder....')

            if check_files:

                logger.info(str(len(check_files))+' '+'Found'+' '+str(check_files))


                for filename in os.listdir(media_path):
                    if filename.endswith('.csv'):
                        with open(media_path + filename, 'rb') as csv_files:
                            csv_lines = csv_files.readlines()
                            csv_lines = csv_lines[:-1]

                            reader = csv.reader(csv_lines)

                            line1 = reader.next()
                            line2 = reader.next()
                            line3 = reader.next()

                            # take the date from the file and convert into string month and year values.

                            take_date = line2[2].split()
                            date_report = take_date[1]
                            date_split = date_report.split('/')
                            year = date_split[0]
                            month_key = int(date_split[1].strip('0'))
                            month = month_dict.get(month_key)

                            for keys, vals in CategoriesContent.cat.iteritems():

                                category, created = Categories.objects.get_or_create(description=keys, fee=vals)

                            # Iterates into the data to retrieve the lines and begin to add the values into the database

                            for data in reader:

                                global get_customer_id

                                get_customer_id = Customers.objects.filter(pk=data[26])


                                retails, created = Retails.objects.get_or_create(customers_id=get_customer_id[0].id,
                                                                                 code=data[8],
                                                                                 name=data[9], phone=data[25],
                                                                                 address=data[20], address2=data[21],
                                                                                 city=data[22],
                                                                                 state=data[23], zip=data[24])

                                ff_vendor, created = Vendor.objects.get_or_create(code=data[0], description=data[1])


                                ff_type, created = Type.objects.get_or_create(code=data[2], description=data[3])


                                ff_material, created = Material.objects.get_or_create(code=data[4],description=data[5])


                                ff_color, created = Color.objects.get_or_create(code=data[6],description=data[7])



                                ff_product, created = Ffdesign.objects.get_or_create(
                                                customers_id=get_customer_id[0].id,
                                                designvendor=ff_vendor, material=ff_material,
                                                designcolor=ff_color, type=ff_type,


                                                )

                                relation, created = CustomersHasFfdesign.objects.get_or_create(
                                    customers_id=get_customer_id[0].id,
                                    ffdesign=ff_product,
                                    month=month, year=year, docnum=data[10],
                                    eye=data[11], lenses=data[12], remake_nc=data[13],
                                    remake=data[14], credit=data[15], coupon=data[16],
                                    outsourced=data[19], no_charges=data[17],
                                    recalcs=data[18], retails=retails

                                )

                                if Ffdesign.objects.filter(categories=None):

                                    Ffdesign.objects.filter(
                                        type_id__code__in=DesignCatRanges.adv).values('type__code').update(categories=1)

                                    Ffdesign.objects.filter(

                                        type_id__code__in=DesignCatRanges.basic).values('type__code').update(categories=2)

                                    Ffdesign.objects.filter(

                                        type_id__code__in=DesignCatRanges.bifocal).values('type__code').update(categories=3)

                                    Ffdesign.objects.filter(

                                        type_id__code__in=DesignCatRanges.camber).values('type__code').update(categories=4)

                                    Ffdesign.objects.filter(
                                       type_id__code__in=DesignCatRanges.sv).values('type__code').update(categories=6)
                                    Ffdesign.objects.filter(
                                        type_id__code__in=DesignCatRanges.svlen).values('type__code').update(categories=7)
                                    Ffdesign.objects.filter(
                                        type_id__code__in=DesignCatRanges.slimsv).values('type__code').update(categories=8)
                                    Ffdesign.objects.filter(
                                        type_id__code__in=DesignCatRanges.ultimate).values('type__code').update(categories=9)
                                    Ffdesign.objects.filter(
                                        type_id__code__in=DesignCatRanges.office).values('type__code').update(categories=5)


                                if CustomersHasFfdesign.objects.filter(categories=None):

                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.adv).values('ffdesign').update(categories=1)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.basic).values('ffdesign').update(categories=2)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.bifocal).values('ffdesign').update(categories=3)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.camber).values('ffdesign').update(categories=4)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.sv).values('ffdesign').update(categories=6)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.svlen).values('ffdesign').update(categories=7)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.slimsv).values('ffdesign').update(categories=8)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.ultimate).values('ffdesign').update(categories=9)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.office).values('ffdesign').update(categories=5)


                                    csv_files.close()


            logger.info('\n')
            logger.info('========================[ End CSV to Database Process ]======================')
            logger.info('\n')




        except Exception as e:
            logger.error(str(e))
            logger.info('\n')
            logger.info('========================End CSV to Database Process======================')
            logger.info('\n')

Well I see what was the problem, there is two bottle neck on the code, the first one in this part of the code.

 retails, created = Retails.objects.get_or_create(customers_id=get_customer_id[0].id,
                                                                             code=data[8],
                                                                             name=data[9], phone=data[25],
                                                                             address=data[20], address2=data[21],
                                                                             city=data[22],
                                                                             state=data[23], zip=data[24])

And the other bottle neck is in this other part of the code

relation, created = CustomersHasFfdesign.objects.get_or_create(
                                customers_id=get_customer_id[0].id,
                                ffdesign=ff_product,
                                month=month, year=year, docnum=data[10],
                                eye=data[11], lenses=data[12], remake_nc=data[13],
                                remake=data[14], credit=data[15], coupon=data[16],
                                outsourced=data[19], no_charges=data[17],
                                recalcs=data[18], retails=retails

                            )

the use of get_or_create needs to be careful maybe for small amount of data to insert but in this case is a lot of registers a good approach is use bulk_create there is some limitations on bulk_create you can read it in Django docs but it works fine.

if not checking_results.exists():
                                    CustomersHasFfdesign.objects.bulk_create([CustomersHasFfdesign(
                                        customers_id=get_customer_id[0].id,
                                        ffdesign=ff_product,
                                        month=month, year=year, docnum=row[10],
                                        eye=row[11], lenses=row[12], remake_nc=row[13],
                                        remake=row[14], credit=row[15], coupon=row[16],
                                        outsourced=row[19], no_charges=row[17],
                                        recalcs=row[18], retails=retails)for row in reader]

                                    )

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM