This is an in-place algorithm for compressing characters, e.g.
AABBCCCDDXYYZ --> A2B2C3D2XY2Z
The reason why the output isn't A2B2C3D2X1Y2Z1 is because adding 1 for only any non-repeated contagious character can make the compressed data bigger than the actual data, e.g.
ABCD --> A1B1C1D1 (A1B1C1D1 is larger than ABCD)
#!/usr/bin/env python
def compress(l):
if len(l) == 0: return []
counter = 1
tmp = l[0]
i = 1
while i < len(l):
if l[i] != tmp:
new_index = update(i, counter, l)
i = new_index + 1
tmp = l[i]
counter = 1
else:
counter += 1
i += 1
update(i, counter, l)
return l
def update(current_index, counter, l):
if counter == 1: return current_index - 1
new_index = current_index - (counter - 1)
l[new_index] = str(counter)
shift(current_index, new_index+1, l)
return new_index
def shift(source_index, destination_index, l):
last_index = len(l)
i = 0
while (source_index + i) < last_index:
l[destination_index+i] = l[source_index+i]
i += 1
last_index = destination_index + i
del l[last_index:]
if __name__ == "__main__":
assert (['A', '4',
'B', '2',
'C',
'D', '3',
'E', '2',
'A', '2',
'X',
'Y', '5',
'Z']
==
compress(["A", "A", "A", "A",
"B", "B",
"C",
"D", "D", "D",
"E", "E",
"A", "A",
"X",
"Y", "Y", "Y", "Y", "Y",
"Z"]))
No comments:
Post a Comment