Replacing a text in Apache POI XWPF

my task was to replace texts of the format ${key} with values of a map within a word docx document. The above solutions were a good starting point but did not take into account all cases: ${key} can be spread not only across multiple runs but also across multiple texts within a run. I therefore ended up with the following code:

    private void replace(String inFile, Map<String, String> data, OutputStream out) throws Exception, IOException {
    XWPFDocument doc = new XWPFDocument(OPCPackage.open(inFile));
    for (XWPFParagraph p : doc.getParagraphs()) {
        replace2(p, data);
    }
    for (XWPFTable tbl : doc.getTables()) {
        for (XWPFTableRow row : tbl.getRows()) {
            for (XWPFTableCell cell : row.getTableCells()) {
                for (XWPFParagraph p : cell.getParagraphs()) {
                    replace2(p, data);
                }
            }
        }
    }
    doc.write(out);
}

private void replace2(XWPFParagraph p, Map<String, String> data) {
    String pText = p.getText(); // complete paragraph as string
    if (pText.contains("${")) { // if paragraph does not include our pattern, ignore
        TreeMap<Integer, XWPFRun> posRuns = getPosToRuns(p);
        Pattern pat = Pattern.compile("\\$\\{(.+?)\\}");
        Matcher m = pat.matcher(pText);
        while (m.find()) { // for all patterns in the paragraph
            String g = m.group(1);  // extract key start and end pos
            int s = m.start(1);
            int e = m.end(1);
            String key = g;
            String x = data.get(key);
            if (x == null)
                x = "";
            SortedMap<Integer, XWPFRun> range = posRuns.subMap(s - 2, true, e + 1, true); // get runs which contain the pattern
            boolean found1 = false; // found $
            boolean found2 = false; // found {
            boolean found3 = false; // found }
            XWPFRun prevRun = null; // previous run handled in the loop
            XWPFRun found2Run = null; // run in which { was found
            int found2Pos = -1; // pos of { within above run
            for (XWPFRun r : range.values())
            {
                if (r == prevRun)
                    continue; // this run has already been handled
                if (found3)
                    break; // done working on current key pattern
                prevRun = r;
                for (int k = 0;; k++) { // iterate over texts of run r
                    if (found3)
                        break;
                    String txt = null;
                    try {
                        txt = r.getText(k); // note: should return null, but throws exception if the text does not exist
                    } catch (Exception ex) {

                    }
                    if (txt == null)
                        break; // no more texts in the run, exit loop
                    if (txt.contains("$") && !found1) {  // found $, replace it with value from data map
                        txt = txt.replaceFirst("\\$", x);
                        found1 = true;
                    }
                    if (txt.contains("{") && !found2 && found1) {
                        found2Run = r; // found { replace it with empty string and remember location
                        found2Pos = txt.indexOf('{');
                        txt = txt.replaceFirst("\\{", "");
                        found2 = true;
                    }
                    if (found1 && found2 && !found3) { // find } and set all chars between { and } to blank
                        if (txt.contains("}"))
                        {
                            if (r == found2Run)
                            { // complete pattern was within a single run
                                txt = txt.substring(0, found2Pos)+txt.substring(txt.indexOf('}'));
                            }
                            else // pattern spread across multiple runs
                                txt = txt.substring(txt.indexOf('}'));
                        }
                        else if (r == found2Run) // same run as { but no }, remove all text starting at {
                            txt = txt.substring(0,  found2Pos);
                        else
                            txt = ""; // run between { and }, set text to blank
                    }
                    if (txt.contains("}") && !found3) {
                        txt = txt.replaceFirst("\\}", "");
                        found3 = true;
                    }
                    r.setText(txt, k);
                }
            }
        }
        System.out.println(p.getText());

    }

}

private TreeMap<Integer, XWPFRun> getPosToRuns(XWPFParagraph paragraph) {
    int pos = 0;
    TreeMap<Integer, XWPFRun> map = new TreeMap<Integer, XWPFRun>();
    for (XWPFRun run : paragraph.getRuns()) {
        String runText = run.text();
        if (runText != null && runText.length() > 0) {
            for (int i = 0; i < runText.length(); i++) {
                map.put(pos + i, run);
            }
            pos += runText.length();
        }

    }
    return map;
}

Here is what we did for text replacement using Apache POI. We found that it was not worth the hassle and simpler to replace the text of an entire XWPFParagraph instead of a run. A run can be randomly split in the middle of a word as Microsoft Word is in charge of where runs are created within the paragraph of a document. Therefore the text you might be searching for could be half in one run and half in another. Using the full text of a paragraph, removing its existing runs, and adding a new run with the adjusted text seems to solve the problem of text replacement.

However there is a cost of doing the replacement at the paragraph level; you lose the formatting of the runs in that paragraph. For example if in the middle of your paragraph you had bolded the word "bits", and then when parsing the file you replaced the word "bits" with "bytes", the word "bytes" would no longer be bolded. Because the bolding was stored with a run that was removed when the paragraph's entire body of text was replaced. The attached code has a commented out section that was working for replacement of text at the run level if you need it.

It should also be noted that the below works if the text you are inserting contains \n return characters. We could not find a way to insert returns without creating a run for each section prior to the return and marking the run addCarriageReturn(). Cheers

    package com.healthpartners.hcss.client.external.word.replacement;

import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;

public class TextReplacer {
    private String searchValue;
    private String replacement;

    public TextReplacer(String searchValue, String replacement) {
        this.searchValue = searchValue;
        this.replacement = replacement;
    }

    public void replace(XWPFDocument document) {
        List<XWPFParagraph> paragraphs = document.getParagraphs();

    for (XWPFParagraph xwpfParagraph : paragraphs) {
        replace(xwpfParagraph);
    }
}

private void replace(XWPFParagraph paragraph) {
    if (hasReplaceableItem(paragraph.getText())) {
        String replacedText = StringUtils.replace(paragraph.getText(), searchValue, replacement);

        removeAllRuns(paragraph);

        insertReplacementRuns(paragraph, replacedText);
    }
}

private void insertReplacementRuns(XWPFParagraph paragraph, String replacedText) {
    String[] replacementTextSplitOnCarriageReturn = StringUtils.split(replacedText, "\n");

    for (int j = 0; j < replacementTextSplitOnCarriageReturn.length; j++) {
        String part = replacementTextSplitOnCarriageReturn[j];

        XWPFRun newRun = paragraph.insertNewRun(j);
        newRun.setText(part);

        if (j+1 < replacementTextSplitOnCarriageReturn.length) {
            newRun.addCarriageReturn();
        }
    }       
}

private void removeAllRuns(XWPFParagraph paragraph) {
    int size = paragraph.getRuns().size();
    for (int i = 0; i < size; i++) {
        paragraph.removeRun(0);
    }
}

private boolean hasReplaceableItem(String runText) {
    return StringUtils.contains(runText, searchValue);
}

//REVISIT The below can be removed if Michele tests and approved the above less versatile replacement version

//  private void replace(XWPFParagraph paragraph) {
//      for (int i = 0; i < paragraph.getRuns().size()  ; i++) {
//          i = replace(paragraph, i);
//      }
//  }

//  private int replace(XWPFParagraph paragraph, int i) {
//      XWPFRun run = paragraph.getRuns().get(i);
//      
//      String runText = run.getText(0);
//      
//      if (hasReplaceableItem(runText)) {
//          return replace(paragraph, i, run);
//      }
//      
//      return i;
//  }

//  private int replace(XWPFParagraph paragraph, int i, XWPFRun run) {
//      String runText = run.getCTR().getTArray(0).getStringValue();
//      
//      String beforeSuperLong = StringUtils.substring(runText, 0, runText.indexOf(searchValue));
//      
//      String[] replacementTextSplitOnCarriageReturn = StringUtils.split(replacement, "\n");
//      
//      String afterSuperLong = StringUtils.substring(runText, runText.indexOf(searchValue) + searchValue.length());
//      
//      Counter counter = new Counter(i);
//      
//      insertNewRun(paragraph, run, counter, beforeSuperLong);
//      
//      for (int j = 0; j < replacementTextSplitOnCarriageReturn.length; j++) {
//          String part = replacementTextSplitOnCarriageReturn[j];
//
//          XWPFRun newRun = insertNewRun(paragraph, run, counter, part);
//          
//          if (j+1 < replacementTextSplitOnCarriageReturn.length) {
//              newRun.addCarriageReturn();
//          }
//      }
//      
//      insertNewRun(paragraph, run, counter, afterSuperLong);
//      
//      paragraph.removeRun(counter.getCount());
//      
//      return counter.getCount();
//  }

//  private class Counter {
//      private int i;
//      
//      public Counter(int i) {
//          this.i = i;
//      }
//      
//      public void increment() {
//          i++;
//      }
//      
//      public int getCount() {
//          return i;
//      }
//  }

//  private XWPFRun insertNewRun(XWPFParagraph xwpfParagraph, XWPFRun run, Counter counter, String newText) {
//      XWPFRun newRun = xwpfParagraph.insertNewRun(counter.i);
//      newRun.getCTR().set(run.getCTR());
//      newRun.getCTR().getTArray(0).setStringValue(newText);
//      
//      counter.increment();
//      
//      return newRun;
//  }

The method you need is XWPFRun.setText(String). Simply work your way through the file until you find the XWPFRun of interest, work out what you want the new text to be, and replace it. (A run is a sequence of text with the same formatting)

You should be able to do something like:

XWPFDocument doc = new XWPFDocument(OPCPackage.open("input.docx"));
for (XWPFParagraph p : doc.getParagraphs()) {
    List<XWPFRun> runs = p.getRuns();
    if (runs != null) {
        for (XWPFRun r : runs) {
            String text = r.getText(0);
            if (text != null && text.contains("needle")) {
                text = text.replace("needle", "haystack");
                r.setText(text, 0);
            }
        }
    }
}
for (XWPFTable tbl : doc.getTables()) {
   for (XWPFTableRow row : tbl.getRows()) {
      for (XWPFTableCell cell : row.getTableCells()) {
         for (XWPFParagraph p : cell.getParagraphs()) {
            for (XWPFRun r : p.getRuns()) {
              String text = r.getText(0);
              if (text != null && text.contains("needle")) {
                text = text.replace("needle", "haystack");
                r.setText(text,0);
              }
            }
         }
      }
   }
}
doc.write(new FileOutputStream("output.docx"));