[英]Create a compress function in Python?
我需要创建一个名为 compress 的函数,它通过用字母和数字替换任何重复的字母来压缩字符串。 我的函数应该返回字符串的缩短版本。 我已经能够计算第一个字符,但不能计算任何其他字符。
前任:
>>> compress("ddaaaff")
'd2a3f2'
def compress(s):
count=0
for i in range(0,len(s)):
if s[i] == s[i-1]:
count += 1
c = s.count(s[i])
return str(s[i]) + str(c)
这是一个压缩函数的简短python实现:
def compress(string):
res = ""
count = 1
#Add in first character
res += string[0]
#Iterate through loop, skipping last one
for i in range(len(string)-1):
if(string[i] == string[i+1]):
count+=1
else:
if(count > 1):
#Ignore if no repeats
res += str(count)
res += string[i+1]
count = 1
#print last one
if(count > 1):
res += str(count)
return res
这里有一些例子:
>>> compress("ddaaaff")
'd2a3f2'
>>> compress("daaaafffyy")
'da4f3y2'
>>> compress("mississippi")
'mis2is2ip2i'
带发电机的简短版本:
from itertools import groupby
import re
def compress(string):
return re.sub(r'(?<![0-9])[1](?![0-9])', '', ''.join('%s%s' % (char, sum(1 for _ in group)) for char, group in groupby(string)))
(1) 用groupby(string)
按字符分组
(2) 用sum(1 for _ in group)
) 计算组的长度(因为没有len
on group 是可能的)
(3) 加入适当的格式
(4)删除1
个字符为单个物品时,有一个无位之前和之后1
这不起作用有几个原因。 您确实需要先尝试自己调试。 放入一些打印语句来跟踪执行。 例如:
def compress(s):
count=0
for i in range(0, len(s)):
print "Checking character", i, s[i]
if s[i] == s[i-1]:
count += 1
c = s.count(s[i])
print "Found", s[i], c, "times"
return str(s[i]) + str(c)
print compress("ddaaaff")
这是输出:
Checking character 0 d
Found d 2 times
Checking character 1 d
Found d 2 times
Checking character 2 a
Found a 3 times
Checking character 3 a
Found a 3 times
Checking character 4 a
Found a 3 times
Checking character 5 f
Found f 2 times
Checking character 6 f
Found f 2 times
f2
Process finished with exit code 0
(1) 除了最后一个字母的搜索结果之外,您丢弃了所有结果。 (2) 您计算所有出现的次数,而不仅仅是连续出现的次数。 (3) 你把一个字符串转换成一个字符串——多余的。
尝试用铅笔和纸完成这个例子。 写下您作为人类解析字符串所使用的步骤。 致力于将这些翻译成 Python。
x="mississippi"
res = ""
count = 0
while (len(x) > 0):
count = 1
res= ""
for j in range(1, len(x)):
if x[0]==x[j]:
count= count + 1
else:
res = res + x[j]
print(x[0], count, end=" ")
x=res
执行此操作的另一种最简单方法:
def compress(str1):
output = ''
initial = str1[0]
output = output + initial
count = 1
for item in str1[1:]:
if item == initial:
count = count + 1
else:
if count == 1:
count = ''
output = output + str(count)
count = 1
initial = item
output = output + item
print (output)
根据需要给出输出,示例:
>> compress("aaaaaaaccddddeehhyiiiuuo")
a7c2d4e2h2yi3u2o
>> compress("lllhhjuuuirrdtt")
l3h2ju3ir2dt
>> compress("mississippi")
mis2is2ip2i
from collections import Counter
def string_compression(string):
counter = Counter(string)
result = ''
for k, v in counter.items():
result = result + k + str(v)
print(result)
input = "mississippi"
count = 1
for i in range(1, len(input) + 1):
if i == len(input):
print(input[i - 1] + str(count), end="")
break
else:
if input[i - 1] == input[i]:
count += 1
else:
print(input[i - 1] + str(count), end="")
count = 1
输出:m1i1s2i1s2i1p2i1
s=input("Enter the string:")
temp={}
result=" "
for x in s:
if x in temp:
temp[x]=temp[x]+1
else:
temp[x]=1
for key,value in temp.items():
result+=str(key)+str(value)
打印(结果)
这是我写的东西。
def stringCompression(str1):
counter=0
prevChar = str1[0]
str2=""
charChanged = False
loopCounter = 0
for char in str1:
if(char==prevChar):
counter+=1
charChanged = False
else:
str2 += prevChar + str(counter)
counter=1
prevChar = char
if(loopCounter == len(str1) - 1):
str2 += prevChar + str(counter)
charChanged = True
loopCounter+=1
if(not charChanged):
str2+= prevChar + str(counter)
return str2
我猜不是最好的代码。 但效果很好。
a -> a1
aaabbbccc -> a3b3c3
这是解决问题的方法。 但请记住,此方法仅在有大量重复时才有效,特别是在连续字符重复时。 否则,只会使情况更加恶化。
例如,
AABCD --> A2B1C1D1
BcDG ---> B1c1D1G1
def compress_string(s):
result = [""] * len(s)
visited = None
index = 0
count = 1
for c in s:
if c == visited:
count += 1
result[index] = f"{c}{count}"
else:
count = 1
index += 1
result[index] = f"{c}{count}"
visited = c
return "".join(result)
您可以简单地通过以下方式实现:
gstr="aaabbccccdddee"
last=gstr[0]
count=0
rstr=""
for i in gstr:
if i==last:
count=count+1
elif i!=last:
rstr=rstr+last+str(count)
count=1
last=i
rstr=rstr+last+str(count)
print ("Required string for given string {} after conversion is {}.".format(gstr,rstr))
这是一个压缩函数的简短python实现:
#d=compress('xxcccdex')
#print(d)
def compress(word):
list1=[]
for i in range(len(word)):
list1.append(word[i].lower())
num=0
dict1={}
for i in range(len(list1)):
if(list1[i] in list(dict1.keys())):
dict1[list1[i]]=dict1[list1[i]]+1
else:
dict1[list1[i]]=1
s=list(dict1.keys())
v=list(dict1.values())
word=''
for i in range(len(s)):
word=word+s[i]+str(v[i])
return word
下面的逻辑将起作用,而不管
字符重复,如果不是连续的
def fstrComp_1(stng): sRes = "" cont = 1 for i in range(len(stng)): if not stng[i] in sRes: stng = stng.lower() n = stng.count(stng[i]) if n > 1: cont = n sRes += stng[i] + str(cont) else: sRes += stng[i] print(sRes) fstrComp_1("aB*b?cC&")
我想通过对字符串进行分区来做到这一点。 所以 aabbcc 会变成: ['aa', 'bb', 'cc']
我是这样做的:
def compression(string):
# Creating a partitioned list
alist = list(string)
master = []
n = len(alist)
for i in range(n):
if alist[i] == alist[i-1]:
master[-1] += alist[i]
else:
master += alist[i]
# Adding the partitions together in a new string
newString = ""
for i in master:
newString += i[0] + str(len(i))
# If the newString is longer than the old string, return old string (you've not
# compressed it in length)
if len(newString) > n:
return string
return newString
string = 'aabbcc'
print(compression(string))
字符串 = 'aabccccd' 输出 = '2a3b4c4d'
new_string = " "
count = 1
for i in range(len(string)-1):
if string[i] == string[i+1]:
count = count + 1
else:
new_string = new_string + str(count) + string[i]
count = 1
new_string = new_string + str(count) + string[i+1]
print(new_string)
对于编码面试,它是关于算法的,而不是关于我对 Python 的了解、它的数据结构的内部表示或诸如字符串连接之类的操作的时间复杂度:
def compress(message: str) -> str:
output = ""
length = 0
previous: str = None
for char in message:
if previous is None or char == previous:
length += 1
else:
output += previous
if length > 1:
output += str(length)
length = 1
previous = char
if previous is not None:
output += previous
if length > 1:
output += str(length)
return output
对于我实际在生产中使用的代码,不重新发明任何轮子,更易于测试,使用迭代器直到空间效率的最后一步,并使用join()
而不是字符串连接以提高时间效率:
from itertools import groupby
from typing import Iterator
def compressed_groups(message: str) -> Iterator[str]:
for char, group in groupby(message):
length = sum(1 for _ in group)
yield char + (str(length) if length > 1 else "")
def compress(message: str) -> str:
return "".join(compressed_groups(message))
更进一步,以获得更多的可测试性:
from itertools import groupby
from typing import Iterator
from collections import namedtuple
class Segment(namedtuple('Segment', ['char', 'length'])):
def __str__(self) -> str:
return self.char + (str(self.length) if self.length > 1 else "")
def segments(message: str) -> Iterator[Segment]:
for char, group in groupby(message):
yield Segment(char, sum(1 for _ in group))
def compress(message: str) -> str:
return "".join(str(s) for s in segments(message))
全力以赴并提供一个值对象CompressedString
:
from itertools import groupby
from typing import Iterator
from collections import namedtuple
class Segment(namedtuple('Segment', ['char', 'length'])):
def __str__(self) -> str:
return self.char + (str(self.length) if self.length > 1 else "")
class CompressedString(str):
@classmethod
def compress(cls, message: str) -> "CompressedString":
return cls("".join(str(s) for s in cls._segments(message)))
@staticmethod
def _segments(message: str) -> Iterator[Segment]:
for char, group in groupby(message):
yield Segment(char, sum(1 for _ in group))
def compress(message: str) -> str:
return CompressedString.compress(message)
def compress(val):
print(len(val))
end=0
count=1
result=""
for i in range(0,len(val)-1):
#print(val[i],val[i+1])
if val[i]==val[i+1]:
count=count+1
#print(count,val[i])
elif val[i]!=val[i+1]:
#print(end,i)
result=result+val[end]+str(count)
end=i+1
count=1
result=result+val[-1]+str(count)
return result
res=compress("I need to create a function called compress that compresses a string by replacing any repeated letters with a letter and number. My function should return the shortened version of the string. I've been able to count the first character but not any others.")
print(len(res))
使用 python 的标准库re
。
def compress(string):
import re
p=r'(\w+?)\1+' # non greedy, group1 1
sub_str=string
for m in re.finditer(p,string):
num=m[0].count(m[1])
sub_str=re.sub(m[0],f'{m[1]}{num}',sub_str)
return sub_str
string='aaaaaaaabbbbbbbbbcccccccckkkkkkkkkkkppp'
string2='ababcdcd'
string3='abcdabcd'
string4='ababcdabcdefabcdcd'
print(compress(string))
print(compress(string2))
print(compress(string3))
print(compress(string4))
结果:
a8b9c8k11p3
ab2cd2
abcd2
ab2cdabcdefabcd2
这是Patrick Yu 代码的修改。 他的代码在以下测试用例中失败。
样本输入:
C
aaaaaaaaaabcdefgh
预期产出:
c1
a10b1c1d1e1f1g1h1
帕特里克代码的输出:
C
a10bcdefgh
以下是他的代码的修改:
def Compress(S):
Ans = S[0]
count = 1
for i in range(len(S)-1):
if S[i] == S[i+1]:
count += 1
else:
if count >= 1:
Ans += str(count)
Ans += S[i+1]
count = 1
if count>=1:
Ans += str(count)
return Ans
只是在将Count与1进行比较时,条件必须从Greater(">")更改为 Greater Than Equal To(">=") 。
使用生成器:
input = "aaaddddffwwqqaattttttteeeeeee"
from itertools import groupby
print(''.join(([char+str(len(list(group))) for char, group in groupby(input)])))
def compress(string):
# taking out unique characters from the string
unique_chars = []
for c in string:
if not c in unique_chars:
unique_chars.append(c)
# Now count the characters
res = ""
for i in range(len(unique_chars)):
count = string.count(unique_chars[i])
res += unique_chars[i]+str(count)
return res
string = 'aabccccd'
compress(string)
from collections import Counter
def char_count(input_str):
my_dict = Counter(input_str)
print(my_dict)
output_str = ""
for i in input_str:
if i not in output_str:
output_str += i
output_str += str(my_dict[i])
return output_str
result = char_count("zddaaaffccc")
print(result)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.