OdfWhitespaceProcessor.java

  1. /**
  2.  * **********************************************************************
  3.  *
  4.  * <p>DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
  5.  *
  6.  * <p>Copyright 2008, 2010 Oracle and/or its affiliates. All rights reserved.
  7.  *
  8.  * <p>Use is subject to license terms.
  9.  *
  10.  * <p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
  11.  * except in compliance with the License. You may obtain a copy of the License at
  12.  * http://www.apache.org/licenses/LICENSE-2.0. You can also obtain a copy of the License at
  13.  * http://odftoolkit.org/docs/license.txt
  14.  *
  15.  * <p>Unless required by applicable law or agreed to in writing, software distributed under the
  16.  * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  17.  * express or implied.
  18.  *
  19.  * <p>See the License for the specific language governing permissions and limitations under the
  20.  * License.
  21.  *
  22.  * <p>**********************************************************************
  23.  */
  24. package org.odftoolkit.odfdom.incubator.doc.text;

  25. import org.odftoolkit.odfdom.dom.OdfDocumentNamespace;
  26. import org.odftoolkit.odfdom.dom.element.text.TextLineBreakElement;
  27. import org.odftoolkit.odfdom.dom.element.text.TextSElement;
  28. import org.odftoolkit.odfdom.dom.element.text.TextTabElement;
  29. import org.odftoolkit.odfdom.pkg.OdfFileDom;
  30. import org.w3c.dom.Element;
  31. import org.w3c.dom.Node;

  32. /**
  33.  * It's a tool class to help process white space.
  34.  *
  35.  * @author J David Eisenberg
  36.  */
  37. public class OdfWhitespaceProcessor {

  38.   public OdfWhitespaceProcessor() {}

  39.   /**
  40.    * Add given text content to an element, handling multiple blanks, tabs, and newlines properly.
  41.    *
  42.    * @param element the element to which content is being added
  43.    * @param content text content including whitespace
  44.    */
  45.   public void append(Element element, String content) {
  46.     char ch;
  47.     StringBuilder partial = new StringBuilder();
  48.     int spaces = 0;
  49.     OdfFileDom owner = (OdfFileDom) element.getOwnerDocument();
  50.     for (int i = 0; i < content.length(); i++) {
  51.       ch = content.charAt(i);
  52.       if (ch == ' ') {
  53.         if (spaces == 0) {
  54.           partial.append(' ');
  55.         }
  56.         spaces++;
  57.       } else if (ch == '\n') {
  58.         emitPartial(element, partial, spaces, owner);
  59.         spaces = 0;
  60.         element.appendChild(new TextLineBreakElement(owner));
  61.       } else if (ch == '\t') {
  62.         emitPartial(element, partial, spaces, owner);
  63.         spaces = 0;
  64.         element.appendChild(new TextTabElement(owner));
  65.       } else if (ch != '\r') // ignore DOS half of CR-LF
  66.       {
  67.         if (spaces > 1) {
  68.           emitPartial(element, partial, spaces, owner);
  69.         }
  70.         partial.append(ch);
  71.         spaces = 0;
  72.       }
  73.     }
  74.     emitPartial(element, partial, spaces, owner);
  75.   }

  76.   /*
  77.    * Send out any information that has been buffered
  78.    */
  79.   private void emitPartial(Element element, StringBuilder partial, int spaces, OdfFileDom owner) {
  80.     /*
  81.      * send out any partial text
  82.      */
  83.     if (partial.length() != 0) {
  84.       element.appendChild(owner.createTextNode(partial.toString()));
  85.     }
  86.     /*
  87.      * and any spaces if necessary
  88.      */
  89.     if (spaces > 1) {
  90.       TextSElement spaceElement = new TextSElement(owner);
  91.       spaceElement.setTextCAttribute(new Integer(spaces - 1));
  92.       element.appendChild(spaceElement);
  93.     }
  94.     /*
  95.      * and reset all the counters
  96.      */
  97.     partial.delete(0, partial.length());
  98.   }

  99.   /**
  100.    * Retrieve the text content of an element. Recursively retrieves all the text nodes, expanding
  101.    * whitespace where necessary. Ignores any elements except <code>&lt;text:s&gt;</code>, <code>
  102.    * &lt;text:line-break&gt;</code> and <code>&lt;text:tab&gt</code>.
  103.    *
  104.    * @param element an element whose text you want to retrieve
  105.    * @return the element's text content, with whitespace expanded
  106.    */
  107.   public String getText(Node element) {
  108.     String result = "";
  109.     int spaceCount;
  110.     Node node = element.getFirstChild();
  111.     while (node != null) {
  112.       if (node.getNodeType() == Node.TEXT_NODE) {
  113.         result += node.getNodeValue();
  114.       } else if (node.getNodeType() == Node.ELEMENT_NODE) {
  115.         if (node.getLocalName().equals("s")) // text:s
  116.         {
  117.           try {
  118.             spaceCount =
  119.                 Integer.parseInt(
  120.                     ((Element) node).getAttributeNS(OdfDocumentNamespace.TEXT.getUri(), "c"));
  121.           } catch (Exception e) {
  122.             spaceCount = 1;
  123.           }
  124.           for (int i = 0; i < spaceCount; i++) {
  125.             result += " ";
  126.           }
  127.         } else if (node.getLocalName().equals("line-break")) {
  128.           result += "\n";
  129.         } else if (node.getLocalName().equals("tab")) {
  130.           result += "\t";
  131.         } else {
  132.           result = result + getText(node);
  133.         }
  134.       }
  135.       node = node.getNextSibling();
  136.     }
  137.     return result;
  138.   }

  139.   /**
  140.    * Append text content to a given element, handling whitespace properly. This is a static method
  141.    * that creates its own OdfWhitespaceProcessor, so that you don't have to.
  142.    *
  143.    * @param element the element to which content is being added
  144.    * @param content text content including whitespace
  145.    */
  146.   public static void appendText(Element element, String content) {
  147.     OdfWhitespaceProcessor processor = new OdfWhitespaceProcessor();
  148.     processor.append(element, content);
  149.   }
  150. }