For my research, I have to match participants in 'pairs' of three, according to their age gender and sports experience. I am using pulp to find an optimized match, that minimizes the variation per matched group in age gender and experience. However, when I run the code below I see in my output that some participants are matched to themselves. How can I set the constraints so that all participants in a group are unique and all participants in the original data are assigned to a group?
data= pd.DataFrame({'name': ['John', 'Leo','Karin','Daniel','Claire','Alex'], 'gender': [0,0,1,0,1,1] , 'age': [24,60,38,42,28,55], 'experience': [5,4,1,4,2,2]})
data.head()
#define the optimization problem (minimize variation in participant characteristics)
prob = LpProblem("Matching_Participants", LpMinimize)
participants = range(len(data))
#Use LpVariables to create variable y, a decision variable to determine whether or not to match participant i with participant j and participant k.
y = LpVariable.dicts("trio", [(i,j,k) for i in participants for j in participants for k in participants] ,cat='Binary')
prob += lpSum( [ np.mean([np.std([data['gender'][i],data['gender'][j],data['gender'][k]]), np.std([data['age'][i],data['age'][j],data['age'][k]]), np.std([data['experience'][i],data['experience'][j],data['experience'][k]]) ] ) * y[(i,j,k)] for i in participants for j in participants for k in participants])
#define constraints
for i in participants:
prob += lpSum(y[(i,j,k)] for j in participants for k in participants) <= 2 #i is not paired with more than two participants j and k
prob += lpSum(y[(j,i,k)] for j in participants for k in participants) <= 2 #j is not paired with more than two participants i and k
prob += lpSum(y[(i,j,k)] + y[(j,i,k)] for j in participants for k in participants) <= 1 #pairing must go both ways
prob += lpSum(y[(i,j,k)] for i in participants for j in participants for k in participants) == 2 #there is a total of 2 paires
#solve the problem
prob.solve()
#print matches
print("Finished matching!\n")
for i in participants:
for j in participants:
for k in participants:
if y[(i,j,k)].varValue == 1:
print('{} and {} and {} with a mean std of {}'.format(data['name'][i],data['name'][j],data['name'][k],(np.mean([np.std([data['gender'][i],data['gender'][j],data['gender'][k]]), np.std([data['age'][i],data['age'][j],data['age'][k]]), np.std([data['experience'][i],data['experience'][j],data['experience'][k]]) ] ))))
Output:
Finished matching!
Leo and Alex and Leo with a mean std of 1.2570787221094177
Karin and Daniel and Karin with a mean std of 1.2570787221094177
Add constraints:
for i in participants:
prob += lpSum(y[(i,j,k)] + y[(j,i,k)] + y[(j,k,i)] for j in participants for k in participants) <= 1
data= pd.DataFrame({'name': ['John', 'Leo','Karin','Daniel','Claire','Alex'], 'gender': [0,0,1,0,1,1] , 'age': [24,60,38,42,28,55], 'experience': [5,4,1,4,2,2]})
data.head()
#define the optimization problem (minimize variation in participant characteristics)
prob = LpProblem("Matching_Participants", LpMinimize)
participants = range(len(data))
#Use LpVariables to create variable y, a decision variable to determine whether or not to match participant i with participant j and participant k.
#(1)
y = LpVariable.dicts("trio", [(i,j,k) for i in participants for j in participants for k in participants if i != j and i != k and j != k] ,cat='Binary')
prob += lpSum( [ np.mean([np.std([data['gender'][i],data['gender'][j],data['gender'][k]]), np.std([data['age'][i],data['age'][j],data['age'][k]]), np.std([data['experience'][i],data['experience'][j],data['experience'][k]]) ] ) * y[(i,j,k)] for i in participants for j in participants for k in participants if i != j and i != k and j != k])
#define constraints
#(2)
for i in participants:
prob += lpSum(y[(i,j,k)] for j in participants if i != j for k in participants if i != k and j != k ) <= 1 #i is not paired with more than two participants j and k
prob += lpSum(y[(j,i,k)] for j in participants if i != j for k in participants if i != k and j != k) <= 1 #j is not paired with more than two participants i and k
prob += lpSum(y[(j,k,i)] for j in participants if i != j for k in participants if i != k and j != k) <= 1 #j is not paired with more than two participants i and j
prob += lpSum(y[(i,j,k)] for i in participants for j in participants for k in participants if i != j and i != k and j != k ) == 2 #there is a total of 2 paires
#only one participant per group and the participant can not be a member of multiple groups
#(3)
for i in participants:
prob += lpSum(y[(i,j,k)] for j in participants for k in participants if i != k and j != k and i != j) + lpSum(y[(j,i,k)] for j in participants for k in participants if i != k and j != k and i != j) + lpSum(y[(j,k,i)] for j in participants for k in participants if i != k and j != k and i != j) == 1
#solve the problem
prob.solve()
#print matches
print("Finished matching!\n")
for i in participants:
for j in participants:
for k in participants:
if i != j and i != k and j != k:
if y[(i,j,k)].varValue == 1:
print('{} and {} and {} with a mean std of {}'.format(data['name'][i],data['name'][j],data['name'][k],(np.mean([np.std([data['gender'][i],data['gender'][j],data['gender'][k]]), np.std([data['age'][i],data['age'][j],data['age'][k]]), np.std([data['experience'][i],data['experience'][j],data['experience'][k]]) ] ))))
prob.writeLP('stacko')
There are 3 things that should be adressed
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.