How to make Julia Code more Efficient? Currently it is performing even worse than Python

Question

I am new to Julia and have been experimenting with it as I got to know that it has amazing performances. But I am still to experience those promised performances. I have tried many methods for enhancing performance described in the book "JULIA HIGH PERFORMANCE", which has made the code a little bit less readable. But still, my python code is much faster than my Julia code, at least 3x faster for the benchmark case. Either I am doing something very wrong with the code which must be a sin in Julia or its that Julia just can't do it. Please prove me wrong about the later.

What I am trying to do in the code is assign distinct balls into distinct boxes with a maximum and minimum limit to the capacity of each box. The order in which balls are placed in the box also matters. I need to generate all possible assignments with the given constraints in minimum possible time.

PYTHON CODE:

import itertools
import time

max_balls = 5
min_balls = 0

def get_assignments(balls, boxes, assignments=[[]]):
    all_assignments = []
    upper_ball_limit = len(balls)
    if upper_ball_limit > max_balls:
        upper_ball_limit = max_balls

    n_boxes = len(boxes)
    lower_ball_limit = len(balls) - upper_ball_limit * (n_boxes - 1)
    if lower_ball_limit < min_balls:
        lower_ball_limit = min_balls

    if len(boxes) == 0:
        raise Exception("No delivery boys found")

    elif len(boxes) == 1:
        for strategy in itertools.permutations(balls, upper_ball_limit):
            #             valid = evaluate_strategy(strategy, db_id)
            for subplan in assignments:
                subplan_copy = subplan[:]
                box_id = boxes[0]
                subplan_copy.append((box_id, strategy))
                all_assignments.append(subplan_copy)
        return all_assignments
    else:
        box_id = boxes[0]
        for i in range(lower_ball_limit, upper_ball_limit+ 1):
            for strategy in itertools.permutations(balls, i):
                temp_plans = []
                for subplan in assignments:
                    subplan_copy = subplan[:]
                    subplan_copy.append((box_id, strategy))
                    temp_plans.append(subplan_copy)
                    remaining_balls = set(balls).difference(strategy)
                    remaining_boxes = list(set(boxes).difference([box_id]))
                    if remaining_balls:
                        all_assignments.extend(get_assignments(remaining_balls, remaining_boxes, temp_plans))
                    else:
                        all_assignments.extend(temp_plans)
        return all_assignments

balls = range(1, 9)
boxes = [1, 2, 3, 4]

t = time.time()
all_assignments = get_assignments(balls, boxes)

print('Number of assignments: %s' % len(all_assignments))
print('Time taken: %s' % (time.time()-t))

And here is my attempt at writing the JULIA CODE for the above.

#!/usr/bin/env julia

using Combinatorics

const max_balls=5
const min_balls=0

function plan_assignments(balls::Vector{Int32}, boxes ; plans=[Vector{Tuple{Int32,Array{Int32,1}}}(length(boxes))])
const n_boxes = length(boxes)
const n_balls = length(balls)
const n_plans = length(plans)

if n_boxes*max_balls < n_balls
  print("Invalid Inputs: Number of balls exceed the number of boxes.")
end

all_plans = Vector{Tuple{Int32,Array{Int32,1}}}[]
upper_box_limit = n_balls
if upper_box_limit > max_balls
    upper_box_limit = max_balls
end
lower_box_limit = n_balls - upper_box_limit * (n_boxes-1)

if lower_box_limit < min_balls
    lower_box_limit = min_balls
end

if n_boxes == 1
    box_id = boxes[1]
    @inbounds for strategy in Combinatorics.permutations(balls, upper_box_limit)
        @inbounds for subplan in plans
            subplan = subplan[:]
            subplan[tn_boxes - n_boxes + 1] = (box_id, strategy)
            all_plans = push!(all_plans, subplan)
        end
    end
    return all_plans

else
    box_id = boxes[1]
    @inbounds for i in lower_box_limit:upper_box_limit
        @inbounds for strategy in Combinatorics.permutations(balls, i)
            temp_plans = Array{Vector{Tuple{Int32,Array{Int32,1}}},1}(n_plans)
            # temp_plans = []

            @inbounds for (i,subplan) in zip(1:n_plans, plans)
                subplan = subplan[:]
                subplan[tn_boxes - n_boxes + 1] = (box_id, strategy)
                temp_plans[i] = subplan
                # subplan = push!(subplan, (db_id, strategy))
                # temp_plans = push!(temp_plans, subplan)

                remaining_balls = filter((x) -> !(x in strategy), balls)
                remaining_boxes = filter((x) -> x != box_id , boxes)

                if length(remaining_balls) > 0
                  @inbounds for plan in plan_assignments(remaining_balls, remaining_boxes, plans=temp_plans)
                    push!(all_plans, plan)
                  end
                    # append!(all_plans, plan_assignments(remaining_orders, remaining_delivery_boys, plans=temp_plans))
                else
                  @inbounds for plan in temp_plans
                    push!(all_plans, plan)
                  end
                    # append!(all_plans, temp_plans)

                end
            end
        end
    end
    return all_plans
end
end

balls = Int32[1,2,3,4,5,6,7,8]
boxes = Int32[1,2,3,4]

const tn_boxes = length(boxes)

@timev all_plans = plan_assignments(balls, boxes)
print(length(all_plans))

My benchmark timings are as follows:

For Python:

Number of assignments: 5040000
Time taken: 22.5003659725

For Julia: (This is while discounting the compilation time.)

76.986338 seconds (122.94 M allocations: 5.793 GB, 77.01% gc time)
elapsed time (ns): 76986338257
gc time (ns):      59287603693
bytes allocated:   6220111360
pool allocs:       122932049
non-pool GC allocs:10587
malloc() calls:    11
realloc() calls:   18
GC pauses:         270
full collections:  28

Answer 1

This is another version in Julia, a little bit more idiomatic and modified to avoid recursion and some allocations:

using Iterators
using Combinatorics

histograms(n,m) = [diff([0;x;n+m]).-1 for x in combinations([1:n+m-1;],m-1)]

good_histograms(n,m,minval,maxval) = 
  [h for h in histograms(n,m) if all(maxval.>=h.>=minval)]

typealias PlanGrid Matrix{SubArray{Int,1,Array{Int,1},Tuple{UnitRange{Int}},true}}

function assignmentplans(balls,boxes,minballs,maxballs)
  nballs, nboxes = length(balls),length(boxes)
  nperms = factorial(nballs)
  partslist = good_histograms(nballs,nboxes,minballs,maxballs)
  plans = PlanGrid(nboxes,nperms*length(partslist))
  permutationvector = vec([balls[p[i]] for i=1:nballs,p in permutations(balls)])
  i1 = 1
  for parts in partslist
    i2 = 0
    breaks = [(x[1]+1:x[2]) for x in partition(cumsum([0;parts]),2,1)]
    for i=1:nperms
      for j=1:nboxes
        plans[j,i1] = view(permutationvector,breaks[j]+i2)
      end
      i1 += 1
      i2 += nballs
    end
  end
  return plans
end

For timing we get:

julia> assignmentplans([1,2],[1,2],0,2)   # a simple example
2×6 Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},2}:
 Int64[]  Int64[]  [1]  [2]  [1,2]    [2,1]  
 [1,2]    [2,1]    [2]  [1]  Int64[]  Int64[]

julia> @time plans = assignmentplans([1:8;],[1:4;],0,5);
  8.279623 seconds (82.28 M allocations: 2.618 GB, 14.07% gc time)

julia> size(plans)
(4,5040000)

julia> plans[:,1000000]                    # sample ball configuration
4-element Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},1}:
 Int64[]    
 [7,3,8,2,5]
 [4]        
 [6,1]

Timings, of course, vary per setup, but this should be much faster. It is not exactly an apples to apples comparison, but it calculates the same stuff. Timing on the poster's (or others') machines are welcome in the comments.

Answer 2

I made a few minor changes to the code of Dan Getz in order to make it type-stable. The main problem was that Combinatorics.permutations and Iterators.partition are not type-stable, so I had to write type-stable versions of these as well (my change to Combinatorics.combinations was actually unnecessary)

import Base: start, next, done, eltype, length, size
import Base: iteratorsize, SizeUnknown, IsInfinite, HasLength

import Combinatorics: factorial, combinations

# Copied from Combinatorics
# Only one small change in the `nextpermutation` method

immutable Permutations{T}
    a::T
    t::Int
end

eltype{T}(::Type{Permutations{T}}) = Vector{eltype(T)}

length(p::Permutations) = (0 <= p.t <= length(p.a))?factorial(length(p.a), length(p.a)-p.t):0

"""
Generate all permutations of an indexable object. Because the number of permutations can be very large, this function returns an iterator object. Use `collec
t(permutations(array))` to get an array of all permutations.
"""
permutations(a) = Permutations(a, length(a))

"""
Generate all size t permutations of an indexable object.
"""
function permutations(a, t::Integer)
    if t < 0
        t = length(a) + 1
    end
    Permutations(a, t)
end

start(p::Permutations) = [1:length(p.a);]
next(p::Permutations, s) = nextpermutation(p.a, p.t ,s)

function nextpermutation(m, t, state)
    s = copy(state)                     # was s = copy(s) a few lines down
    perm = [m[s[i]] for i in 1:t]
    n = length(s)
    if t <= 0
        return(perm, [n+1])
    end
    if t < n
        j = t + 1
        while j <= n &&  s[t] >= s[j]; j+=1; end
    end
    if t < n && j <= n
        s[t], s[j] = s[j], s[t]
    else
        if t < n
            reverse!(s, t+1)
        end
        i = t - 1
        while i>=1 && s[i] >= s[i+1]; i -= 1; end
        if i > 0
            j = n
            while j>i && s[i] >= s[j]; j -= 1; end
            s[i], s[j] = s[j], s[i]
            reverse!(s, i+1)
        else
            s[1] = n+1
        end
    end
    return(perm, s)
end

done(p::Permutations, s) = !isempty(s) && max(s[1], p.t) > length(p.a) ||  (isempty(s) && p.t > 0)  

# Copied from Iterators
# Returns an `Array` of `Array`s instead of `Tuple`s now

immutable Partition{I}
    xs::I
    step::Int
    n::Int

end

iteratorsize{T<:Partition}(::Type{T}) = SizeUnknown()

eltype{I}(::Type{Partition{I}}) = Vector{eltype(I)}

function partition{I}(xs::I, n::Int, step::Int = n)
    Partition(xs, step, n)
end

function start{I}(it::Partition{I})
    N = it.n
    p = Vector{eltype(I)}(N)
    s = start(it.xs)
    for i in 1:(N - 1)
        if done(it.xs, s)
            break
        end
        (p[i], s) = next(it.xs, s)
    end
    (s, p)
end

function next{I}(it::Partition{I}, state)
    N = it.n
    (s, p0) = state
    (x, s) = next(it.xs, s)
    ans = p0; ans[end] = x

    p = similar(p0)
    overlap = max(0, N - it.step)
    for i in 1:overlap
        p[i] = ans[it.step + i]
    end

    # when step > n, skip over some elements
    for i in 1:max(0, it.step - N)
        if done(it.xs, s)
            break
        end
        (x, s) = next(it.xs, s)
    end

    for i in (overlap + 1):(N - 1)
        if done(it.xs, s)
            break
        end

        (x, s) = next(it.xs, s)
        p[i] = x
    end

    (ans, (s, p))
end

done(it::Partition, state) = done(it.xs, state[1])

# Copied from the answer of Dan Getz
# Added types to comprehensions and used Vector{Int} instead of Int in vcat    

typealias PlanGrid Matrix{SubArray{Int,1,Array{Int,1},Tuple{UnitRange{Int}},true}}

histograms(n,m) = [diff(vcat([0],x,[n+m])).-1 for x in combinations([1:n+m-1;],m-1)]

good_histograms(n,m,minval,maxval) = 
  Vector{Int}[h for h in histograms(n,m) if all(maxval.>=h.>=minval)]

minballs = 0
maxballs = 5

function assignmentplans(balls,boxes,minballs,maxballs)
  nballs, nboxes = length(balls),length(boxes)
  nperms = factorial(nballs)
  partslist = good_histograms(nballs,nboxes,minballs,maxballs)
  plans = PlanGrid(nboxes,nperms*length(partslist))
  permutationvector = vec([balls[p[i]] for i=1:nballs,p in permutations(balls)])
  i1 = 1
  for parts in partslist
    i2 = 0
    breaks = UnitRange{Int64}[(x[1]+1:x[2]) for x in partition(cumsum(vcat([0],parts)),2,1)]
    for i=1:nperms
      for j=1:nboxes
        plans[j,i1] = view(permutationvector,breaks[j]+i2)
      end
      i1 += 1
      i2 += nballs
    end
  end
  return plans
end

@time plans = assignmentplans(1:8, 1:4, 0, 5)

The result of the first run is (the timings vary a lot because of the gc)

  1.589867 seconds (22.02 M allocations: 1.127 GB, 46.50% gc time)
4×5040000 Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},2}:
 Int64[]      Int64[]      Int64[]      Int64[]      Int64[]      Int64[]      …  [8,7,6,5,4]  [8,7,6,5,4]  [8,7,6,5,4]  [8,7,6,5,4]  [8,7,6,5,4]
 Int64[]      Int64[]      Int64[]      Int64[]      Int64[]      Int64[]         [1,3,2]      [2,1,3]      [2,3,1]      [3,1,2]      [3,2,1]    
 [1,2,3]      [1,2,3]      [1,2,3]      [1,2,3]      [1,2,3]      [1,2,3]         Int64[]      Int64[]      Int64[]      Int64[]      Int64[]    
 [4,5,6,7,8]  [4,5,6,8,7]  [4,5,7,6,8]  [4,5,7,8,6]  [4,5,8,6,7]  [4,5,8,7,6]     Int64[]      Int64[]      Int64[]      Int64[]      Int64[]

I didn't test the changes thoroughly. Also, I don't understand, why s = copy(s) works in combinations but not in permutations . Interestingly, there is only a negligible improvement if I try to make the original version type-stable (still > 40 s with 85% gc time).

How to make Julia Code more Efficient? Currently it is performing even worse than Python

Question

2 answers

solution1
7 ACCPTED 2016-11-19 06:57:30

solution2
5 2016-11-20 21:03:12

How to make Julia Code more Efficient? Currently it is performing even worse than Python

Question

2 answers

solution1 7 ACCPTED 2016-11-19 06:57:30

solution2 5 2016-11-20 21:03:12

solution1
7 ACCPTED 2016-11-19 06:57:30

solution2
5 2016-11-20 21:03:12