简体   繁体   中英

Python re.findall between two strings while excluding strings

My current code is the following:

idk = {"id":30511879634,"title":"3.5y","option1":"3.5y","option2":null,"option3":null,"sku":"","requires_shipping":true,"taxable":true,"featured_image":{"id":18778730002,"product_id":8876555346,"position":1,"created_at":"2017-02-15T15:51:03-05:00","updated_at":"2017-02-15T15:51:37-05:00","src":"https:\/\/cdn.shopify.com\/s\/files\/1\/1527\/4931\/products\/AJ6_HEIRESS_PRODUCT.jpg?v=1487191897","variant_ids":[30511879634,30511879698,30511879762,30511879826,30511879890,30511879954,30511880018,30511880082]},"available":false,"name":"Air Jordan 6 Retro Premium GG 'Heiress' - 3.5y","public_title":"3.5y","options":["3.5y"],"price":16000,"weight":1361,"compare_at_price":null,"inventory_quantity":0,"inventory_management":"shopify","inventory_policy":"deny","barcode":""},{"id":30511879698,"title":"4y","option1":"4y","option2":null,"option3":null,"sku":"","requires_shipping":true,"taxable":true,"featured_image":{"id":18778730002,"product_id":8876555346,"position":1,"created_at":"2017-02-15T15:51:03-05:00","updated_at":"2017-02-15T15:51:37-05:00","src":"https:\/\/cdn.shopify.com\/s\/files\/1\/1527\/4931\/products\/AJ6_HEIRESS_PRODUCT.jpg?v=1487191897","variant_ids":[30511879634,30511879698,30511879762,30511879826,30511879890,30511879954,30511880018,30511880082]},"available":true,"name":"Air Jordan 6 Retro Premium GG 'Heiress' - 4y","public_title":"4y","options":["4y"],"price":16000,"weight":1361,"compare_at_price":null,"inventory_quantity":1,"inventory_management":"shopify","inventory_policy":"deny","barcode":""},
variants = re.findall(r'"id":(.*?),"title"', idk)

which returns ['30511879634', '18778730002,"product_id":8876555346,"position":1,"created_at":"2017-02-15T15:51:03-05:00","updated_at":"2017-02-15T15:51:37-05:00","src":"https:\\\\/\\\\/cdn.shopify.com\\\\/s\\\\/files\\\\/1\\\\/1527\\\\/4931\\\\/products\\\\/AJ6_HEIRESS_PRODUCT.jpg?v=1487191897","variant_ids":[30511879634,30511879698,30511879762,30511879826,30511879890,30511879954,30511880018,30511880082]},"available":false,"name":"Air Jordan 6 Retro Premium GG \\'Heiress\\' - 3.5y","public_title":"3.5y","options":["3.5y"],"price":16000,"weight":1361,"compare_at_price":null,"inventory_quantity":0,"inventory_management":"shopify","inventory_policy":"deny","barcode":""},{"id":30511879698']

but I want it to return ['30511879634', '30511879698']

I know I can do variants = re.findall(r'"id":[^"product_id"].,"title"', idk) but that would return ['"id":30511879634,"title"', '"id":30511879698,"title"']

I've tried variants = re.findall(r'"id":[^"product_id"](.*?),"title"', idk) but that doesn't work. Is there anyway I can return just the numbers while making sure the second id (18778730002) is not included in the list, rather just 30511879634 and 30511879698.

You could use this regex ...

(?<=\"id\":)\d+(?=,\"title\")

see regex demo / explanation

python ( demo )

import re

idk = """{"id":30511879634,"title":"3.5y","option1":"3.5y","option2":null,"option3":null,"sku":"","requires_shipping":true,"taxable":true,"featured_image":{"id":18778730002,"product_id":8876555346,"position":1,"created_at":"2017-02-15T15:51:03-05:00","updated_at":"2017-02-15T15:51:37-05:00","src":"https:\/\/cdn.shopify.com\/s\/files\/1\/1527\/4931\/products\/AJ6_HEIRESS_PRODUCT.jpg?v=1487191897","variant_ids":[30511879634,30511879698,30511879762,30511879826,30511879890,30511879954,30511880018,30511880082]},"available":false,"name":"Air Jordan 6 Retro Premium GG 'Heiress' - 3.5y","public_title":"3.5y","options":["3.5y"],"price":16000,"weight":1361,"compare_at_price":null,"inventory_quantity":0,"inventory_management":"shopify","inventory_policy":"deny","barcode":""},{"id":30511879698,"title":"4y","option1":"4y","option2":null,"option3":null,"sku":"","requires_shipping":true,"taxable":true,"featured_image":{"id":18778730002,"product_id":8876555346,"position":1,"created_at":"2017-02-15T15:51:03-05:00","updated_at":"2017-02-15T15:51:37-05:00","src":"https:\/\/cdn.shopify.com\/s\/files\/1\/1527\/4931\/products\/AJ6_HEIRESS_PRODUCT.jpg?v=1487191897","variant_ids":[30511879634,30511879698,30511879762,30511879826,30511879890,30511879954,30511880018,30511880082]},"available":true,"name":"Air Jordan 6 Retro Premium GG 'Heiress' - 4y","public_title":"4y","options":["4y"],"price":16000,"weight":1361,"compare_at_price":null,"inventory_quantity":1,"inventory_management":"shopify","inventory_policy":"deny","barcode":""}"""
variants = re.findall(r"(?<=\"id\":)\d+(?=,\"title\")", idk)
print(variants) #-> ['30511879634', '30511879698']

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM