Applying a function to each array of every couple of rows

Question

I have a column of a dataframe full of arrays with images.

>>>df.IMAGES.head()

0    ["https://cf-medias.avendrealouer.fr/image/_87...
1    ["http://photos.ubiflow.net/440414/165474561/p...
2    ["https://v.seloger.com/s/width/965/visuels/0/...
3    ["https://pix.yanport.com/ads/e9e07ed0-812f-11...
4    ["https://v.seloger.com/s/width/966/visuels/0/...

I want to check if the images from every couples of rows are similar. So I did a function to check if two images are similar. How can I apply my function to every couple of rows ?

from PIL import Image
import imagehash
import requests
from io import BytesIO

def image_similarity(imageAurl,imageB):
    responseA = requests.get(imageAurl)
    imgA = Image.open(BytesIO(response.content))
    responseB = requests.get(imageBurl)
    imgB = Image.open(BytesIO(response.content))    
    hash0 = imagehash.average_hash(Image.open(imageA)) 
    hash1 = imagehash.average_hash(Image.open(imageB)) 
    cutoff = 5

    if hash0 - hash1 < cutoff:
        print('images are similar')
    else:
        print('images are not similar')

Thanks to Kshitij Saxena I tried :

df['NextImage'] = df['IMAGES'][df['IMAGES'].index - 1]
df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)

However I get the following error:

---------------------------------------------------------------------------
InvalidSchema                             Traceback (most recent call last)
<ipython-input-25-16b99a7b864a> in <module>
      1 df['NextImage'] = df['IMAGES'][df['IMAGES'].index - 1]
----> 2 df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
   6012                          args=args,
   6013                          kwds=kwds)
-> 6014         return op.get_result()
   6015 
   6016     def applymap(self, func):

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
    140             return self.apply_raw()
    141 
--> 142         return self.apply_standard()
    143 
    144     def apply_empty_result(self):

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
    246 
    247         # compute the result using the series generator
--> 248         self.apply_series_generator()
    249 
    250         # wrap results

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
    275             try:
    276                 for i, v in enumerate(series_gen):
--> 277                     results[i] = self.f(v)
    278                     keys.append(v.name)
    279             except Exception as e:

<ipython-input-25-16b99a7b864a> in <lambda>(x)
      1 df['NextImage'] = df['IMAGES'][df['IMAGES'].index - 1]
----> 2 df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)

<ipython-input-21-3acdcb76f890> in image_similarity(imageAurl, imageB)
      7 
      8 def image_similarity(imageAurl,imageB):
----> 9     responseA = requests.get(imageAurl)
     10     imgA = Image.open(BytesIO(response.content))
     11     responseB = requests.get(imageBurl)

~\AppData\Roaming\Python\Python36\site-packages\requests\api.py in get(url, params, **kwargs)
     73 
     74     kwargs.setdefault('allow_redirects', True)
---> 75     return request('get', url, params=params, **kwargs)
     76 
     77 

~\AppData\Roaming\Python\Python36\site-packages\requests\api.py in request(method, url, **kwargs)
     58     # cases, and look like a memory leak in others.
     59     with sessions.Session() as session:
---> 60         return session.request(method=method, url=url, **kwargs)
     61 
     62 

~\AppData\Roaming\Python\Python36\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    531         }
    532         send_kwargs.update(settings)
--> 533         resp = self.send(prep, **send_kwargs)
    534 
    535         return resp

~\AppData\Roaming\Python\Python36\site-packages\requests\sessions.py in send(self, request, **kwargs)
    638 
    639         # Get the appropriate adapter to use
--> 640         adapter = self.get_adapter(url=request.url)
    641 
    642         # Start time (approximately) of the request

~\AppData\Roaming\Python\Python36\site-packages\requests\sessions.py in get_adapter(self, url)
    729 
    730         # Nothing matches :-/
--> 731         raise InvalidSchema("No connection adapters were found for '%s'" % url)
    732 
    733     def close(self):

InvalidSchema: ('No connection adapters were found for \'["https://cf-medias.avendrealouer.fr/image/_873908158_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908159_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908160_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908161_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908162_d.jpg"]\'', 'occurred at index 0')

The error seems to be because I try to take the array of urls every columns have for an url.

Answer 1

This should work:

df['NextImage'] = df['Image'][df['IMAGES'].index - 1]
df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)

Make sure you return similarity hash in your function and not just print it!

Applying a function to each array of every couple of rows

Question

1 answers

solution1
0 2019-09-11 12:17:19

Applying a function to each array of every couple of rows

Question

1 answers

solution1 0 2019-09-11 12:17:19

solution1
0 2019-09-11 12:17:19