BookmarkRDFMetadataExtractor.java

/**
 * **********************************************************************
 *
 * <p>DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
 *
 * <p>Copyright 2008, 2010 Oracle and/or its affiliates. All rights reserved.
 *
 * <p>Use is subject to license terms.
 *
 * <p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0. You can also obtain a copy of the License at
 * http://odftoolkit.org/docs/license.txt
 *
 * <p>Unless required by applicable law or agreed to in writing, software distributed under the
 * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied.
 *
 * <p>See the License for the specific language governing permissions and limitations under the
 * License.
 *
 * <p>**********************************************************************
 */
package org.odftoolkit.odfdom.dom.rdfa;

import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.Resource;
import org.odftoolkit.odfdom.dom.DefaultElementVisitor;
import org.odftoolkit.odfdom.dom.OdfDocumentNamespace;
import org.odftoolkit.odfdom.dom.element.text.TextBookmarkEndElement;
import org.odftoolkit.odfdom.dom.element.text.TextBookmarkStartElement;
import org.odftoolkit.odfdom.pkg.OdfElement;
import org.odftoolkit.odfdom.pkg.OdfFileDom;
import org.odftoolkit.odfdom.pkg.rdfa.DOMAttributes;
import org.odftoolkit.odfdom.pkg.rdfa.JenaSink;
import org.w3c.dom.Node;
import org.xml.sax.Attributes;

/**
 * This is a sub class of <code>DefaultElementVisitor</code>, which is used to extract metadata from
 * {@odf.element text:bookmark-start} to {@odf.element text:bookmark-end} pair.
 */
public class BookmarkRDFMetadataExtractor extends DefaultElementVisitor {

  protected static final char NewLineChar = '\n';
  protected static final char TabChar = '\t';
  private TextBookmarkStartElement bookmarkstart;
  private boolean found;
  protected final Map<TextBookmarkStartElement, ExtractorStringBuilder> builderMap;
  protected final Map<TextBookmarkStartElement, String> stringMap;

  private XMLEventFactory eventFactory = XMLEventFactory.newInstance();

  private JenaSink sink;

  /**
   * This class is used to provide the string builder functions to extractor. It will automatically
   * process the last NewLineChar.
   *
   * @since 0.3.5
   */
  protected static class ExtractorStringBuilder {
    private StringBuilder mBuilder;
    private boolean lastAppendNewLine;

    ExtractorStringBuilder() {
      mBuilder = new StringBuilder();
      lastAppendNewLine = false;
    }

    /**
     * Append a string
     *
     * @param str - the string
     */
    public void append(String str) {
      mBuilder.append(str);
    }

    /**
     * Append a character
     *
     * @param ch - the character
     */
    public void append(char ch) {
      mBuilder.append(ch);
    }

    /** Append a new line character at the end */
    public void appendLine() {
      mBuilder.append(NewLineChar);
      lastAppendNewLine = true;
    }

    /**
     * Return the string value.
     *
     * <p>If the last character is a new line character and is appended with appendLine(), the last
     * new line character will be removed.
     */
    public String toString() {
      if (lastAppendNewLine) {
        mBuilder.deleteCharAt(mBuilder.length() - 1);
      }
      return mBuilder.toString();
    }
  }

  /**
   * Create a BookmarkRDFMetadataExtractor instance, which RDF metadata content of bookmarks can be
   * extracted by <code>getBookmarkRDFMetadata()</code>.
   *
   * @param element the ODF element whose text will be extracted.
   * @return an instance of BookmarkRDFMetadataExtractor
   */
  public static BookmarkRDFMetadataExtractor newBookmarkTextExtractor() {
    return new BookmarkRDFMetadataExtractor();
  }

  /**
   * Return the RDF metadata of specified ODF element as a Jena Model.
   *
   * @return the text content as a string
   */
  public Model getBookmarkRDFMetadata(OdfFileDom dom) {
    this.bookmarkstart = null;
    this.found = false;
    this.sink = dom.getSink();
    visit(dom.getRootElement());
    return getModel();
  }

  public Model getBookmarkRDFMetadata(TextBookmarkStartElement bookmarkstart) {
    this.bookmarkstart = bookmarkstart;
    this.found = false;
    this.sink = ((OdfFileDom) bookmarkstart.getOwnerDocument()).getSink();
    visit(((OdfFileDom) bookmarkstart.getOwnerDocument()).getRootElement());
    return getModel();
  }

  private Model getModel() {
    Model m = ModelFactory.createDefaultModel();
    for (Entry<TextBookmarkStartElement, String> entry : stringMap.entrySet()) {
      String xhtmlAbout = entry.getKey().getXhtmlAboutAttribute();
      String xhtmlProperty = entry.getKey().getXhtmlPropertyAttribute();
      String xhtmlContent = entry.getKey().getXhtmlContentAttribute();
      if (xhtmlAbout != null && xhtmlProperty != null) {
        String qname = entry.getKey().getNodeName();
        String namespaceURI = entry.getKey().getNamespaceURI();
        String localname = entry.getKey().getLocalName();
        String prefix = (qname.indexOf(':') == -1) ? "" : qname.substring(0, qname.indexOf(':'));

        StartElement e =
            eventFactory.createStartElement(
                prefix,
                namespaceURI,
                localname,
                fromAttributes(new DOMAttributes(entry.getKey().getAttributes())),
                null,
                sink.getContext());

        xhtmlAbout = sink.getExtractor().expandSafeCURIE(e, xhtmlAbout, sink.getContext());
        xhtmlProperty = sink.getExtractor().expandCURIE(e, xhtmlProperty, sink.getContext());
        Resource s = m.createResource(xhtmlAbout);
        Property p = m.createProperty(xhtmlProperty);
        if (xhtmlContent != null) {
          s.addLiteral(p, xhtmlContent);
        } else {
          s.addLiteral(p, entry.getValue());
        }
      }
    }
    return m;
  }

  private Iterator fromAttributes(Attributes attributes) {
    List toReturn = new LinkedList();

    for (int i = 0; i < attributes.getLength(); i++) {
      String qname = attributes.getQName(i);
      String prefix = qname.contains(":") ? qname.substring(0, qname.indexOf(":")) : "";
      Attribute attr =
          eventFactory.createAttribute(
              prefix, attributes.getURI(i), attributes.getLocalName(i), attributes.getValue(i));

      if (!qname.equals("xmlns") && !qname.startsWith("xmlns:")) toReturn.add(attr);
    }

    return toReturn.iterator();
  }

  /**
   * Constructor with an ODF element as parameter
   *
   * @param element the ODF element whose text would be extracted.
   */
  private BookmarkRDFMetadataExtractor() {
    builderMap = new HashMap<TextBookmarkStartElement, ExtractorStringBuilder>();
    stringMap = new HashMap<TextBookmarkStartElement, String>();
  }

  /**
   * The end users needn't to care of this method, if you don't want to override the text content
   * handling strategy of <code>OdfElement</code>.
   *
   * @see
   *     org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.pkg.OdfElement)
   */
  @Override
  public void visit(OdfElement element) {
    if (bookmarkstart != null && found) {
      return;
    }
    if (this.bookmarkstart == null) {
      if (element instanceof TextBookmarkStartElement) {
        builderMap.put((TextBookmarkStartElement) element, new ExtractorStringBuilder());
      }

    } else {
      if (element == bookmarkstart) {
        builderMap.put((TextBookmarkStartElement) element, new ExtractorStringBuilder());
      }
    }
    appendElementText(element);
    if (element.getNamespaceURI().equals(OdfDocumentNamespace.META.getUri())
        || element.getNamespaceURI().equals(OdfDocumentNamespace.DC.getUri())) {
      // textBuilderAppendLine();
    }
  }

  /**
   * Append the text content of this element to string buffer.
   *
   * @param ele the ODF element whose text will be appended.
   */
  private void appendElementText(OdfElement ele) {
    Node node = ele.getFirstChild();
    while (node != null) {
      if (node.getNodeType() == Node.TEXT_NODE) {
        textBuilderAppend(node.getNodeValue());
      } else if (node.getNodeType() == Node.ELEMENT_NODE) {
        if (node instanceof TextBookmarkEndElement) {
          TextBookmarkEndElement end = (TextBookmarkEndElement) node;
          endBookmark(end);
        }

        OdfElement element = (OdfElement) node;
        element.accept(this);
      }
      node = node.getNextSibling();
    }
  }

  private void textBuilderAppendLine() {
    for (Entry<TextBookmarkStartElement, ExtractorStringBuilder> entry : builderMap.entrySet()) {
      entry.getValue().appendLine();
    }
  }

  private void textBuilderAppend(char ch) {
    for (Entry<TextBookmarkStartElement, ExtractorStringBuilder> entry : builderMap.entrySet()) {
      entry.getValue().append(ch);
    }
  }

  private void textBuilderAppend(String str) {
    for (Entry<TextBookmarkStartElement, ExtractorStringBuilder> entry : builderMap.entrySet()) {
      entry.getValue().append(str);
    }
  }

  private void endBookmark(TextBookmarkEndElement end) {
    TextBookmarkStartElement start = null;
    for (Entry<TextBookmarkStartElement, ExtractorStringBuilder> entry : builderMap.entrySet()) {
      if (entry.getKey().getTextNameAttribute().equals(end.getTextNameAttribute())) {
        start = entry.getKey();
        break;
      }
    }
    if (start != null) {
      stringMap.put(start, builderMap.get(start).toString());
      builderMap.remove(start);
      if (bookmarkstart != null) {
        found = true;
      }
    }
  }
}