I have a huge file with a problematic character at line 9073245. So I want to replace/remove that character at that specific line while keeping the rest of the file intact. I found the following solution here :
from tempfile import mkstemp
from shutil import move, copymode
from os import fdopen, remove
def replace(file_path, pattern, subst):
#Create temp file
fh, abs_path = mkstemp()
with fdopen(fh,'w') as new_file:
with open(file_path) as old_file:
for line in old_file:
new_file.write(line.replace(pattern, subst))
#Copy the file permissions from the old file to the new file
copymode(file_path, abs_path)
#Remove original file
remove(file_path)
#Move new file
move(abs_path, file_path)
But instead of reading line by line, I just want to replace line number 9073245 and be done with it. I thought getline
from linecache
might work:
import linecache
def lineInFileReplacer(file_path, line_nr, pattern, subst):
#Create temp file
fh, abs_path = mkstemp()
with fdopen(fh,'w') as new_file:
bad_line = linecache.getline(file_path, line_nr)
new_file.write(bad_line.replace(pattern, subst))
#Copy the file permissions from the old file to the new file
copymode(file_path, abs_path)
#Remove original file
remove(file_path)
#Move new file
move(abs_path, file_path)
but new_file.write()
does not seem to include the replacement for bad_line
.
How can I replace a line at a specific line number without looping through every line in the file?
Well, I've a solution that doesn't require looping. I just don't know if this fits your requirements. If you want to remove the line altogether, you would obviously not use "\n" but "".
LINE_NUMBER = 1001
NEW_LINE = "\n"
NEW_LINE_2 = ""
NEW_LINE_3 = "".encode()
def init():
with open("temp.txt", "w") as temp:
temp.write("Foo\n" * 1000)
temp.write("REPLACE ME!\n")
temp.write("Bar\n" * 1000)
#input("[PRESS ENTER TO OVERWRITE THE FILE]")
def test1():
init()
with open("temp.txt", "r+") as temp:
lines = temp.readlines()
lines[LINE_NUMBER - 1] = NEW_LINE
temp.seek(0)
temp.writelines(lines)
temp.truncate()
def test2():
init()
with open("temp.txt", "r+") as temp:
lines = temp.read().split("\n")
lines[LINE_NUMBER - 1] = NEW_LINE_2
temp.seek(0)
temp.write("\n".join(lines))
temp.truncate()
def test3():
init()
with open("temp.txt", "rb+") as temp:
lines = temp.read().split(b"\n")
lines[LINE_NUMBER - 1] = NEW_LINE_3
temp.seek(0)
temp.write(b"\n".join(lines))
temp.truncate()
from timeit import repeat
loops = 300
count = 1
print(loops * min(repeat("test1()", globals=globals(), repeat=loops, number=count)))
print(loops * min(repeat("test2()", globals=globals(), repeat=loops, number=count)))
print(loops * min(repeat("test3()", globals=globals(), repeat=loops, number=count)))
0.9838907746598125
0.15912508824840188 # < Recommended
0.18560938769951463
After init:
<Foo> * 1000
REPLACE ME!
<Bar> * 1000
After test 1/2:
<Foo> * 1000
<Bar> * 1000
I've no idea why read bytes is slower
linecache
access internally your file line by line, so in terms of speed, you gain nothing by using it ( linecache
will give you a performance advantage if you try to access many lines from many files repeatedly, which is not your use case). Therefore you will have to go line-by-line to achieve what you want.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.