# 算术编码和解码算法PythonArithmetic Encoding and Decoding Algorithm Python

``````import sys
import random
import string

def encode(encode_str, N):
count = dict.fromkeys(string.ascii_lowercase, 1)                                        # probability table
cdf_range = dict.fromkeys(string.ascii_lowercase, 0)
pdf = dict.fromkeys(string.ascii_lowercase, 0)

low = 0
high = float(1)/float(26)

for key, value in sorted(cdf_range.iteritems()):
cdf_range[key] = [low, high]
low = high
high += float(1)/float(26)

for key, value in sorted(pdf.iteritems()):
pdf[key] = float(1)/float(26)

# for key, value in sorted(cdf_range.iteritems()):
#   print key, value

# for key, value in sorted(pdf.iteritems()):
#   print key, value

i = 26

lower_bound = 0                                                                     # upper bound
upper_bound = 1                                                                     # lower bound

u = 0

# go thru every symbol in the string
for sym in encode_str:
i += 1
u += 1
count[sym] += 1

curr_range = upper_bound - lower_bound                                          # current range
upper_bound = lower_bound + (curr_range * cdf_range[sym][1])                    # upper_bound
lower_bound = lower_bound + (curr_range * cdf_range[sym][0])                    # lower bound

# update cdf_range after N symbols have been read
if (u == N):
u = 0

for key, value in sorted(pdf.iteritems()):
pdf[key] = float(count[key])/float(i)

low = 0
for key, value in sorted(cdf_range.iteritems()):
high = pdf[key] + low
cdf_range[key] = [low, high]
low = high

return lower_bound

def decode(encoded, strlen, every):
decoded_str = ""

count = dict.fromkeys(string.ascii_lowercase, 1)                                        # probability table
cdf_range = dict.fromkeys(string.ascii_lowercase, 0)
pdf = dict.fromkeys(string.ascii_lowercase, 0)

low = 0
high = float(1)/float(26)

for key, value in sorted(cdf_range.iteritems()):
cdf_range[key] = [low, high]
low = high
high += float(1)/float(26)

for key, value in sorted(pdf.iteritems()):
pdf[key] = float(1)/float(26)

lower_bound = 0                                                                     # upper bound
upper_bound = 1                                                                     # lower bound

k = 0

while (strlen != len(decoded_str)):
for key, value in sorted(pdf.iteritems()):

curr_range = upper_bound - lower_bound                                      # current range
upper_cand = lower_bound + (curr_range * cdf_range[key][1])                 # upper_bound
lower_cand = lower_bound + (curr_range * cdf_range[key][0])                 # lower bound

if (lower_cand <= encoded < upper_cand):
k += 1
decoded_str += key

if (strlen == len(decoded_str)):
break

upper_bound = upper_cand
lower_bound = lower_cand

count[key] += 1

if (k == every):
k = 0
for key, value in sorted(pdf.iteritems()):
pdf[key] = float(count[key])/float(26+len(decoded_str))

low = 0
for key, value in sorted(cdf_range.iteritems()):
high = pdf[key] + low
cdf_range[key] = [low, high]
low = high

print decoded_str

def main():
count = 10
encode_str = "yyyyuuuuyyyy"
strlen = len(encode_str)
every = 3
encoded = encode(encode_str, every)
decoded = decode(encoded, strlen, every)

if __name__ == '__main__':
main()
``````

## 2 个回复2

### ===============>>#1 票数：1

``````import sys
import random
import string
from bigfloat import *

factor = BigFloat(1)/BigFloat(26)

def encode(encode_str, N):
count = dict.fromkeys(string.ascii_lowercase, 1)                                        # probability table
cdf_range = dict.fromkeys(string.ascii_lowercase, 0)
pdf = dict.fromkeys(string.ascii_lowercase, 0)

with precision(200) + RoundTowardZero:
low = 0
high = factor

for key, value in sorted(cdf_range.iteritems()):
cdf_range[key] = [low, high]
low = high
high += factor

for key, value in sorted(pdf.iteritems()):
pdf[key] = factor

# for key, value in sorted(cdf_range.iteritems()):
#   print key, value

# for key, value in sorted(pdf.iteritems()):
#   print key, value

i = 26

lower_bound = 0                         # upper bound
upper_bound = 1                         # lower bound

u = 0

# go thru every symbol in the string
for sym in encode_str:
i += 1
u += 1
count[sym] += 1

curr_range = upper_bound - lower_bound                                          # current range
upper_bound = lower_bound + (curr_range * cdf_range[sym][1])                    # upper_bound
lower_bound = lower_bound + (curr_range * cdf_range[sym][0])                    # lower bound

# update cdf_range after N symbols have been read
if (u == N):
u = 0

for key, value in sorted(pdf.iteritems()):
pdf[key] = BigFloat(count[key])/BigFloat(i)

low = 0
for key, value in sorted(cdf_range.iteritems()):
high = pdf[key] + low
cdf_range[key] = [low, high]
low = high

return lower_bound

def decode(encoded, strlen, every):
decoded_str = ""

count = dict.fromkeys(string.ascii_lowercase, 1)                                        # probability table
cdf_range = dict.fromkeys(string.ascii_lowercase, 0)
pdf = dict.fromkeys(string.ascii_lowercase, 0)

with precision(200) + RoundTowardZero:
low = 0
high = factor

for key, value in sorted(cdf_range.iteritems()):
cdf_range[key] = [low, high]
low = high
high += factor

for key, value in sorted(pdf.iteritems()):
pdf[key] = factor

lower_bound = BigFloat(0)                           # upper bound
upper_bound = BigFloat(1)                           # lower bound

k = 0

while (strlen != len(decoded_str)):
for key, value in sorted(pdf.iteritems()):

curr_range = upper_bound - lower_bound                                      # current range
upper_cand = lower_bound + (curr_range * cdf_range[key][1])                 # upper_bound
lower_cand = lower_bound + (curr_range * cdf_range[key][0])                 # lower bound

if (lower_cand <= encoded < upper_cand):
k += 1
decoded_str += key

if (strlen == len(decoded_str)):
break

upper_bound = upper_cand
lower_bound = lower_cand

count[key] += 1

if (k == every):
k = 0
for key, value in sorted(pdf.iteritems()):
pdf[key] = BigFloat(count[key])/BigFloat(26+len(decoded_str))

low = 0
for key, value in sorted(cdf_range.iteritems()):
high = pdf[key] + low
cdf_range[key] = [low, high]
low = high

print decoded_str

def main():
count = 10
encode_str = "heloworldheloworld"
strlen = len(encode_str)
every = 3
encoded = encode(encode_str, every)
decoded = decode(encoded, strlen, every)

if __name__ == '__main__':
main()
``````

### ===============>>#2 票数：1

``````import sys
import random
import string

import decimal
from decimal import Decimal

decimal.getcontext().prec=100

def encode(encode_str, N):
count = dict.fromkeys(string.ascii_lowercase, 1)                                        # probability table
cdf_range = dict.fromkeys(string.ascii_lowercase, 0)
pdf = dict.fromkeys(string.ascii_lowercase, 0)

low = 0
high = Decimal(1)/Decimal(26)

for key, value in sorted(cdf_range.iteritems()):
cdf_range[key] = [low, high]
low = high
high += Decimal(1)/Decimal(26)

for key, value in sorted(pdf.iteritems()):
pdf[key] = Decimal(1)/Decimal(26)

# for key, value in sorted(cdf_range.iteritems()):
#   print key, value

# for key, value in sorted(pdf.iteritems()):
#   print key, value

i = 26

lower_bound = 0                                                                     # upper bound
upper_bound = 1                                                                     # lower bound

u = 0

# go thru every symbol in the string
for sym in encode_str:
i += 1
u += 1
count[sym] += 1

curr_range = upper_bound - lower_bound                                          # current range
upper_bound = lower_bound + (curr_range * cdf_range[sym][1])                    # upper_bound
lower_bound = lower_bound + (curr_range * cdf_range[sym][0])                    # lower bound

# update cdf_range after N symbols have been read
if (u == N):
u = 0

for key, value in sorted(pdf.iteritems()):
pdf[key] = Decimal(count[key])/Decimal(i)

low = 0
for key, value in sorted(cdf_range.iteritems()):
high = pdf[key] + low
cdf_range[key] = [low, high]
low = high

return lower_bound

def decode(encoded, strlen, every):
decoded_str = ""

count = dict.fromkeys(string.ascii_lowercase, 1)                                        # probability table
cdf_range = dict.fromkeys(string.ascii_lowercase, 0)
pdf = dict.fromkeys(string.ascii_lowercase, 0)

low = 0
high = Decimal(1)/Decimal(26)

for key, value in sorted(cdf_range.iteritems()):
cdf_range[key] = [low, high]
low = high
high += Decimal(1)/Decimal(26)

for key, value in sorted(pdf.iteritems()):
pdf[key] = Decimal(1)/Decimal(26)

lower_bound = 0                                                                     # upper bound
upper_bound = 1                                                                     # lower bound

k = 0

while (strlen != len(decoded_str)):
for key, value in sorted(pdf.iteritems()):

curr_range = upper_bound - lower_bound                                      # current range
upper_cand = lower_bound + (curr_range * cdf_range[key][1])                 # upper_bound
lower_cand = lower_bound + (curr_range * cdf_range[key][0])                 # lower bound

if (lower_cand <= encoded < upper_cand):
k += 1
decoded_str += key

if (strlen == len(decoded_str)):
break

upper_bound = upper_cand
lower_bound = lower_cand

count[key] += 1

if (k == every):
k = 0
for key, value in sorted(pdf.iteritems()):
pdf[key] = Decimal(count[key])/Decimal(26+len(decoded_str))

low = 0
for key, value in sorted(cdf_range.iteritems()):
high = pdf[key] + low
cdf_range[key] = [low, high]
low = high

print decoded_str

def main():
count = 10
encode_str = "heloworldheloworld"
strlen = len(encode_str)
every = 3
encoded = encode(encode_str, every)
decoded = decode(encoded, strlen, every)

if __name__ == '__main__':
main()
``````

2回复

2回复

3回复

4回复

1回复

1回复

1回复

2回复

4回复

1回复