I wrote this class to compress and expand number lists to sequence strings, including step values when the the step value is greater than 1. The code still feels clunky. Are there libraries that can do something like this? Possibly simpler code?
import re
class Foo( object ):
def __init__( self, num_list ):
self.num_list = sorted( list( set( [ int(n) for n in num_list ] ) ) )
# end def __init__
def gen_seq_data( self ):
self.seq_data = list()
index_offset = None
backward_step_value = None
forward_step_value = None
sub_list = list()
sub_list_step_value = None
for index, num in enumerate( self.num_list ):
if index - 1 < 0:
backward_step_value = None
# end if
else:
backward_step_value = num - self.num_list[ index - 1 ]
# end else
try:
forward_step_value = self.num_list[ index + 1 ] - num
# end try
except IndexError:
forward_step_value = None
# end except
if backward_step_value is None:
sub_list.append( num )
# end if
elif backward_step_value == forward_step_value:
sub_list.append( num )
if forward_step_value is None:
self.seq_data.append( ( sub_list_step_value, sub_list ) )
# end if
# end if
elif backward_step_value == sub_list_step_value:
sub_list.append( num )
if sub_list:
self.seq_data.append( ( sub_list_step_value, sub_list ) )
# end if
sub_list = list()
# end elif
else:
if sub_list:
self.seq_data.append( ( sub_list_step_value, sub_list ) )
# end if
sub_list = [ num ]
if forward_step_value is None:
self.seq_data.append( ( sub_list_step_value, sub_list ) )
# end if
# end else
try:
sub_list_step_value = sub_list[ -1 ] - sub_list[ -2 ]
# end try
except IndexError:
sub_list_step_value = None
# end except
# end for
# end def gen_seq_object
def format_elements( self ):
format_elements = list()
for step, num_list in self.seq_data:
if step is None:
format_elements.append( '%s' % ( num_list[ 0 ] ) )
# end if
elif step == 1:
format_elements.append( '%s-%s' % ( num_list[ 0 ], num_list[ -1 ] ) )
# end elif
else:
format_elements.append( '%s-%sx%s' % ( num_list[ 0 ], num_list[ -1 ], step ) )
# end else
# end for
return format_elements
# end def format_range
def format_range( self ):
return ','.join( self.format_elements() )
# end def format_range
def expand_range( self ):
num_list = list()
for r_token in self.format_range().split( ',' ):
if r_token.isdigit():
num_list.append( int( r_token ) )
# end if
elif '-' in r_token:
if 'x' in r_token:
start, end, step = re.split( r'[-|x]', r_token )
num_list.extend( range( int( start ), int( end ) + 1, int( step ) ) )
# end if
else:
start, end = r_token.split( '-' )
num_list.extend( range( int( start ), int( end ) + 1 ) )
# end else
# end elif
# end for
return num_list
# end def expand_range
# end class Foo
Input/output:
data = [ 1, 4, 5, 6, 10, 15, 16, 17, 18, 20, 22, 24, 26, 27, 28, 30, 35, 40, 45, 50, 56, 63, 66, 69, 72 ]
foo = Foo( data )
foo.gen_seq_data()
print data
print foo.format_range()
1,4-6,10,15-18,20-26x2,27,28,30-50x5,56,63-72x3
print foo.expand_range()
[1, 4, 5, 6, 10, 15, 16, 17, 18, 20, 22, 24, 26, 27, 28, 30, 35, 40, 45, 50, 56, 63, 66, 69, 72]
One. Remove all #END comments. They are monstrously useless. Your indentation speaks for itself. Use it.
Two. Don't make this a class. It isn't a distinct object with distinct responsibilities. It's just an algorithm. Made up of functions. At best it's a class with all static methods.
Three. Never do this
for index, num in enumerate( self.num_list ):
if index - 1 < 0:
backward_step_value = None
# end if
else:
backward_step_value = num - self.num_list[ index - 1 ]
# end else
If the first element is special, then treat it separately.
backward_step_value = self.num_list[0]
for num in self.num_list[1:]:
You rarely need the index for something like this. Indeed, the only reason for having the index appears to be to treat the first element specially.
Finally, this is a "reduction". Use a generator function
def reduce_list( some_list ):
v= min(some_list)
low, high = v, v
for v in sorted(some_list)[1:]:
if v == high+1:
high= high+1
else:
yield low, high
yield low, high
That might yield your list of contiguous ranges. You can then format those.
format_elements( reduce_list( some_list ) )
The following solution handles non-contiguous ranges, and also preserves the behavior of ignoring ranges of length 2.
def reduce_list(seq):
l = sorted(set(seq))
low = high = l[0]
step = None
for v in l[1:]:
if step is None or v - high == step:
# Extend the current range.
step = v - high
high = v
elif high - low == step:
# The current range only has two values. Yield the
# first value, and start a new range comprising the
# second value and the current value.
yield low, low, None
step = v - high
low = high
high = v
else:
# Yield the current range, and start a new one.
yield low, high, step
low = high = v
step = None
if high - low == step:
# The final range has only two values. Yield them
# individually.
yield low, low, None
step = None
low = high
yield low, high, step
def format_element(low, high, step):
if step is None:
assert low == high
return "%s" % (low,)
elif step == 1:
return "%s-%s" % (low, high)
else:
return "%s-%sx%s" % (low, high, step)
def format_list(seq):
return ','.join(format_element(*e) for e in seq)
Here's some test code:
def test( *args ):
print args, "==", format_list(reduce_list(args))
test(1)
test(1, 2)
test(1, 2, 3)
test(0, 10)
test(0, 10, 20)
test(0, 10, 11, 12, 14, 16)
test(0, 2, 4, 8, 16, 32, 64)
test(0, 1, 3, 4, 6, 7, 9, 10)
test(0, 1, 3, 6, 10, 15, 21, 28)
which outputs:
(1,) == 1
(1, 2) == 1,2
(1, 2, 3) == 1-3
(0, 10) == 0,10
(0, 10, 20) == 0-20x10
(0, 10, 11, 12, 14, 16) == 0,10-12,14,16
(0, 2, 4, 8, 16, 32, 64) == 0-4x2,8,16,32,64
(0, 1, 3, 4, 6, 7, 9, 10) == 0,1,3,4,6,7,9,10
(0, 1, 3, 6, 10, 15, 21, 28) == 0,1,3,6,10,15,21,28
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.