OdfWhitespaceProcessor.java
- /**
- * **********************************************************************
- *
- * <p>DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
- *
- * <p>Copyright 2008, 2010 Oracle and/or its affiliates. All rights reserved.
- *
- * <p>Use is subject to license terms.
- *
- * <p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0. You can also obtain a copy of the License at
- * http://odftoolkit.org/docs/license.txt
- *
- * <p>Unless required by applicable law or agreed to in writing, software distributed under the
- * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
- * express or implied.
- *
- * <p>See the License for the specific language governing permissions and limitations under the
- * License.
- *
- * <p>**********************************************************************
- */
- package org.odftoolkit.odfdom.incubator.doc.text;
- import org.odftoolkit.odfdom.dom.OdfDocumentNamespace;
- import org.odftoolkit.odfdom.dom.element.text.TextLineBreakElement;
- import org.odftoolkit.odfdom.dom.element.text.TextSElement;
- import org.odftoolkit.odfdom.dom.element.text.TextTabElement;
- import org.odftoolkit.odfdom.pkg.OdfFileDom;
- import org.w3c.dom.Element;
- import org.w3c.dom.Node;
- /**
- * It's a tool class to help process white space.
- *
- * @author J David Eisenberg
- */
- public class OdfWhitespaceProcessor {
- public OdfWhitespaceProcessor() {}
- /**
- * Add given text content to an element, handling multiple blanks, tabs, and newlines properly.
- *
- * @param element the element to which content is being added
- * @param content text content including whitespace
- */
- public void append(Element element, String content) {
- char ch;
- StringBuilder partial = new StringBuilder();
- int spaces = 0;
- OdfFileDom owner = (OdfFileDom) element.getOwnerDocument();
- for (int i = 0; i < content.length(); i++) {
- ch = content.charAt(i);
- if (ch == ' ') {
- if (spaces == 0) {
- partial.append(' ');
- }
- spaces++;
- } else if (ch == '\n') {
- emitPartial(element, partial, spaces, owner);
- spaces = 0;
- element.appendChild(new TextLineBreakElement(owner));
- } else if (ch == '\t') {
- emitPartial(element, partial, spaces, owner);
- spaces = 0;
- element.appendChild(new TextTabElement(owner));
- } else if (ch != '\r') // ignore DOS half of CR-LF
- {
- if (spaces > 1) {
- emitPartial(element, partial, spaces, owner);
- }
- partial.append(ch);
- spaces = 0;
- }
- }
- emitPartial(element, partial, spaces, owner);
- }
- /*
- * Send out any information that has been buffered
- */
- private void emitPartial(Element element, StringBuilder partial, int spaces, OdfFileDom owner) {
- /*
- * send out any partial text
- */
- if (partial.length() != 0) {
- element.appendChild(owner.createTextNode(partial.toString()));
- }
- /*
- * and any spaces if necessary
- */
- if (spaces > 1) {
- TextSElement spaceElement = new TextSElement(owner);
- spaceElement.setTextCAttribute(new Integer(spaces - 1));
- element.appendChild(spaceElement);
- }
- /*
- * and reset all the counters
- */
- partial.delete(0, partial.length());
- }
- /**
- * Retrieve the text content of an element. Recursively retrieves all the text nodes, expanding
- * whitespace where necessary. Ignores any elements except <code><text:s></code>, <code>
- * <text:line-break></code> and <code><text:tab></code>.
- *
- * @param element an element whose text you want to retrieve
- * @return the element's text content, with whitespace expanded
- */
- public String getText(Node element) {
- String result = "";
- int spaceCount;
- Node node = element.getFirstChild();
- while (node != null) {
- if (node.getNodeType() == Node.TEXT_NODE) {
- result += node.getNodeValue();
- } else if (node.getNodeType() == Node.ELEMENT_NODE) {
- if (node.getLocalName().equals("s")) // text:s
- {
- try {
- spaceCount =
- Integer.parseInt(
- ((Element) node).getAttributeNS(OdfDocumentNamespace.TEXT.getUri(), "c"));
- } catch (Exception e) {
- spaceCount = 1;
- }
- for (int i = 0; i < spaceCount; i++) {
- result += " ";
- }
- } else if (node.getLocalName().equals("line-break")) {
- result += "\n";
- } else if (node.getLocalName().equals("tab")) {
- result += "\t";
- } else {
- result = result + getText(node);
- }
- }
- node = node.getNextSibling();
- }
- return result;
- }
- /**
- * Append text content to a given element, handling whitespace properly. This is a static method
- * that creates its own OdfWhitespaceProcessor, so that you don't have to.
- *
- * @param element the element to which content is being added
- * @param content text content including whitespace
- */
- public static void appendText(Element element, String content) {
- OdfWhitespaceProcessor processor = new OdfWhitespaceProcessor();
- processor.append(element, content);
- }
- }