Spring Batch process an encoded zipped file

if the gzipped file is a simple txt file, you only need a custum BufferedReaderFactory, the linemaper then gets the String of the current line

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;
import org.springframework.batch.item.file.BufferedReaderFactory;
import org.springframework.core.io.Resource;

public class GZipBufferedReaderFactory implements BufferedReaderFactory {

    /** Default value for gzip suffixes. */
    private List<String> gzipSuffixes = new ArrayList<String>() {

        {
            add(".gz");
            add(".gzip");
        }
    };

    /**
     * Creates Bufferedreader for gzip Resource, handles normal resources
     * too.
     * 
     * @param resource
     * @param encoding
     * @return
     * @throws UnsupportedEncodingException
     * @throws IOException 
     */
    @Override
    public BufferedReader create(Resource resource, String encoding)
            throws UnsupportedEncodingException, IOException {
        for (String suffix : gzipSuffixes) {
            // test for filename and description, description is used when 
            // handling itemStreamResources
            if (resource.getFilename().endsWith(suffix)
                    || resource.getDescription().endsWith(suffix)) {
                return new BufferedReader(new InputStreamReader(new GZIPInputStream(resource.getInputStream()), encoding));
            }
        }
        return new BufferedReader(new InputStreamReader(resource.getInputStream(), encoding));
    }

    public List<String> getGzipSuffixes() {
        return gzipSuffixes;
    }

    public void setGzipSuffixes(List<String> gzipSuffixes) {
        this.gzipSuffixes = gzipSuffixes;
    }
}

simple itemreader configuration:

 <bean id="itemReader" class="org.springframework.batch.item.file.FlatFileItemReader" scope="step">
  <property name="resource" value="#{jobParameters['input.file']}" />
  <property name="lineMapper">
    <bean class="org.springframework.batch.item.file.mapping.PassThroughLineMapper" />
  </property>
  <property name="strict" value="true" />
  <property name="bufferedReaderFactory">
    <bean class="your.custom.GZipBufferedReaderFactory" />
  </property>
</bean>

From the feature request ticket to spring batch (https://jira.spring.io/browse/BATCH-1750):

public class GZIPResource extends InputStreamResource implements Resource {

    public GZIPResource(Resource delegate) throws IOException {
        super(new GZIPInputStream(delegate.getInputStream()));
    }
}

The custom GZipBufferedReaderFactory won't work with other than FlatFileItemReader.

Edit: lazy version. This doesn't try to open the file until getInputStream is called. This avoids exceptions due to that the file doesn't exist if you create the Resource at the program initialization (e.g. with autowiring).

public class GzipLazyResource extends FileSystemResource implements Resource {

    public GzipLazyResource(File file) {
        super(file);
    }

    public GzipLazyResource(String path) {
        super(path);
    }

    @Override
    public InputStream getInputStream() throws IOException {
        return new GZIPInputStream(super.getInputStream());
    }
}

Edit2: this only works for input Resources

Adding another similar method getOutputStream won't work because spring uses the FileSystemResource.getFile, not the FileSystemResource.getOutputStream.

Tags:

Spring Batch