Extract Values between two strings in a text file using python

Just in case you have multiple "Start"s and "End"s in your text file, this will import all the data together, excluding all the "Start"s and "End"s.

with open('path/to/input') as infile, open('path/to/output', 'w') as outfile:
    copy = False
    for line in infile:
        if line.strip() == "Start":
            copy = True
            continue
        elif line.strip() == "End":
            copy = False
            continue
        elif copy:
            outfile.write(line)

If the text files aren't necessarily large, you can get the whole content of the file then use regular expressions:

import re
with open('data.txt') as myfile:
    content = myfile.read()

text = re.search(r'Start\n.*?End', content, re.DOTALL).group()
with open("result.txt", "w") as myfile2:
    myfile2.write(text)

Using itertools.dropwhile, itertools.takewhile, itertools.islice:

import itertools

with open('data.txt') as f, open('result.txt', 'w') as fout:
    it = itertools.dropwhile(lambda line: line.strip() != 'Start', f)
    it = itertools.islice(it, 1, None)
    it = itertools.takewhile(lambda line: line.strip() != 'End', it)
    fout.writelines(it)

UPDATE: As inspectorG4dget commented, above code copies over the first block. To copy multiple blocks, use following:

import itertools

with open('data.txt', 'r') as f, open('result.txt', 'w') as fout:
    while True:
        it = itertools.dropwhile(lambda line: line.strip() != 'Start', f)
        if next(it, None) is None: break
        fout.writelines(itertools.takewhile(lambda line: line.strip() != 'End', it))

I'm not a Python expert, but this code should do the job.

inFile = open("data.txt")
outFile = open("result.txt", "w")
keepCurrentSet = False
for line in inFile:
    if line.startswith("End"):
        keepCurrentSet = False

    if keepCurrentSet:
        outFile.write(line)

    if line.startswith("Start"):
        keepCurrentSet = True
inFile.close()
outFile.close()

Tags:

Python