RDFaParser.java
/**
* **********************************************************************
*
* <p>DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
*
* <p>Copyright 2008, 2010 Oracle and/or its affiliates. All rights reserved.
*
* <p>Use is subject to license terms.
*
* <p>Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0. You can also obtain a copy of the License at
* http://odftoolkit.org/docs/license.txt
*
* <p>Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied.
*
* <p>See the License for the specific language governing permissions and limitations under the
* License.
*
* <p>**********************************************************************
*/
package org.odftoolkit.odfdom.pkg.rdfa;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import net.rootdev.javardfa.Constants;
import net.rootdev.javardfa.Setting;
import net.rootdev.javardfa.literal.LiteralCollector;
import net.rootdev.javardfa.uri.IRIResolver;
import net.rootdev.javardfa.uri.URIExtractor10;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
/** A RDFa Parser modified from net.rootdev.javardfa.Parser */
class RDFaParser extends net.rootdev.javardfa.Parser {
boolean ignore = false;
protected XMLEventFactory eventFactory;
protected JenaSink sink;
protected Set<Setting> settings;
protected LiteralCollector literalCollector;
protected URIExtractor extractor;
protected Locator locator;
protected EvalContext context;
protected RDFaParser(
JenaSink sink,
XMLOutputFactory outputFactory,
XMLEventFactory eventFactory,
URIExtractor extractor) {
super(sink, outputFactory, eventFactory, new URIExtractor10(new IRIResolver()));
this.sink = sink;
this.eventFactory = eventFactory;
this.settings = EnumSet.noneOf(Setting.class);
this.extractor = extractor;
this.literalCollector = new LiteralCollector(this, eventFactory, outputFactory);
extractor.setSettings(settings);
// Important, although I guess the caller doesn't get total control
outputFactory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, true);
}
protected void beginRDFaElement(String arg0, String localname, String qname, Attributes arg3) {
if (localname.equals("bookmark-start")) {
ignore = true;
return;
}
try {
// System.err.println("Start element: " + arg0 + " " + arg1 + " " +
// arg2);
// This is set very late in some html5 cases (not even ready by
// document start)
if (context == null) {
this.setBase(locator.getSystemId());
}
// Dammit, not quite the same as XMLEventFactory
String prefix = /* (localname.equals(qname)) */
(qname.indexOf(':') == -1) ? "" : qname.substring(0, qname.indexOf(':'));
if (settings.contains(Setting.ManualNamespaces)) {
getNamespaces(arg3);
if (prefix.length() != 0) {
arg0 = context.getNamespaceURI(prefix);
localname = localname.substring(prefix.length() + 1);
}
}
StartElement e =
eventFactory.createStartElement(
prefix, arg0, localname, fromAttributes(arg3), null, context);
if (literalCollector.isCollecting()) literalCollector.handleEvent(e);
// If we are gathering XML we stop parsing
if (!literalCollector.isCollectingXML()) context = parse(context, e);
} catch (XMLStreamException ex) {
throw new RuntimeException("Streaming issue", ex);
}
}
protected void endRDFaElement(String arg0, String localname, String qname) {
if (localname.equals("bookmark-start")) {
ignore = false;
return;
}
if (literalCollector.isCollecting()) {
String prefix = (localname.equals(qname)) ? "" : qname.substring(0, qname.indexOf(':'));
XMLEvent e = eventFactory.createEndElement(prefix, arg0, localname);
literalCollector.handleEvent(e);
}
// If we aren't collecting an XML literal keep parsing
if (!literalCollector.isCollectingXML()) context = context.parent;
}
protected void writeCharacters(String value) {
if (!ignore) {
if (literalCollector.isCollecting()) {
XMLEvent e = eventFactory.createCharacters(value);
literalCollector.handleEvent(e);
}
}
}
/** Set the base uri of the DOM. */
public void setBase(String base) {
this.context = new EvalContext(base);
sink.setBase(context.getBase());
}
protected EvalContext parse(EvalContext context, StartElement element) throws XMLStreamException {
boolean skipElement = false;
String newSubject = null;
String currentObject = null;
List<String> forwardProperties = new LinkedList();
List<String> backwardProperties = new LinkedList();
String currentLanguage = context.language;
if (settings.contains(Setting.OnePointOne)) {
if (getAttributeByName(element, Constants.vocab) != null) {
context.vocab = getAttributeByName(element, Constants.vocab).getValue().trim();
}
if (getAttributeByName(element, Constants.prefix) != null) {
parsePrefixes(getAttributeByName(element, Constants.prefix).getValue(), context);
}
}
// The xml / html namespace matching is a bit ropey. I wonder if the
// html 5
// parser has a setting for this?
if (settings.contains(Setting.ManualNamespaces)) {
if (getAttributeByName(element, Constants.xmllang) != null) {
currentLanguage = getAttributeByName(element, Constants.xmllang).getValue();
if (currentLanguage.length() == 0) currentLanguage = null;
} else if (getAttributeByName(element, Constants.lang) != null) {
currentLanguage = getAttributeByName(element, Constants.lang).getValue();
if (currentLanguage.length() == 0) currentLanguage = null;
}
} else if (getAttributeByName(element, Constants.xmllangNS) != null) {
currentLanguage = getAttributeByName(element, Constants.xmllangNS).getValue();
if (currentLanguage.length() == 0) currentLanguage = null;
}
if (Constants.base.equals(element.getName())
&& getAttributeByName(element, Constants.href) != null) {
context.setBase(getAttributeByName(element, Constants.href).getValue());
sink.setBase(context.getBase());
}
if (getAttributeByName(element, Constants.rev) == null
&& getAttributeByName(element, Constants.rel) == null) {
Attribute nSubj = findAttribute(element, Constants.about);
if (nSubj != null) {
newSubject = extractor.getURI(element, nSubj, context);
}
if (newSubject == null) {
if (Constants.body.equals(element.getName()) || Constants.head.equals(element.getName())) {
newSubject = context.base;
} else if (getAttributeByName(element, Constants.typeof) != null) {
newSubject = createBNode();
} else {
if (context.parentObject != null) {
newSubject = context.parentObject;
}
if (getAttributeByName(element, Constants.property) == null) {
skipElement = true;
}
}
}
} else {
Attribute nSubj = findAttribute(element, Constants.about, Constants.src);
if (nSubj != null) {
newSubject = extractor.getURI(element, nSubj, context);
}
if (newSubject == null) {
// if element is head or body assume about=""
if (Constants.head.equals(element.getName()) || Constants.body.equals(element.getName())) {
newSubject = context.base;
} else if (getAttributeByName(element, Constants.typeof) != null) {
newSubject = createBNode();
} else if (context.parentObject != null) {
newSubject = context.parentObject;
}
}
Attribute cObj = findAttribute(element, Constants.resource, Constants.href);
if (cObj != null) {
currentObject = extractor.getURI(element, cObj, context);
}
}
if (newSubject != null && getAttributeByName(element, Constants.typeof) != null) {
List<String> types =
extractor.getURIs(element, getAttributeByName(element, Constants.typeof), context);
for (String type : types) {
emitTriples(newSubject, Constants.rdfType, type);
}
}
if (currentObject != null) {
if (getAttributeByName(element, Constants.rel) != null) {
emitTriples(
newSubject,
extractor.getURIs(element, getAttributeByName(element, Constants.rel), context),
currentObject);
}
if (getAttributeByName(element, Constants.rev) != null) {
emitTriples(
currentObject,
extractor.getURIs(element, getAttributeByName(element, Constants.rev), context),
newSubject);
}
} else {
if (getAttributeByName(element, Constants.rel) != null) {
forwardProperties.addAll(
extractor.getURIs(element, getAttributeByName(element, Constants.rel), context));
}
if (getAttributeByName(element, Constants.rev) != null) {
backwardProperties.addAll(
extractor.getURIs(element, getAttributeByName(element, Constants.rev), context));
}
if (!forwardProperties.isEmpty() || !backwardProperties.isEmpty()) {
// if predicate present
currentObject = createBNode();
}
}
// Getting literal values. Complicated!
if (getAttributeByName(element, Constants.property) != null) {
List<String> props =
extractor.getURIs(element, getAttributeByName(element, Constants.property), context);
String dt = getDatatype(element);
if (getAttributeByName(element, Constants.content) != null) { // The
// easy
// bit
String lex = getAttributeByName(element, Constants.content).getValue();
if (dt == null || dt.length() == 0) {
emitTriplesPlainLiteral(newSubject, props, lex, currentLanguage);
} else {
emitTriplesDatatypeLiteral(newSubject, props, lex, dt);
}
} else {
literalCollector.collect(newSubject, props, dt, currentLanguage);
}
}
if (!skipElement && newSubject != null) {
emitTriples(context.parentSubject, context.forwardProperties, newSubject);
emitTriples(newSubject, context.backwardProperties, context.parentSubject);
}
EvalContext ec = new EvalContext(context);
if (skipElement) {
ec.language = currentLanguage;
} else {
if (newSubject != null) {
ec.parentSubject = newSubject;
} else {
ec.parentSubject = context.parentSubject;
}
if (currentObject != null) {
ec.parentObject = currentObject;
} else if (newSubject != null) {
ec.parentObject = newSubject;
} else {
ec.parentObject = context.parentSubject;
}
ec.language = currentLanguage;
ec.forwardProperties = forwardProperties;
ec.backwardProperties = backwardProperties;
}
return ec;
}
private void getNamespaces(Attributes attrs) {
for (int i = 0; i < attrs.getLength(); i++) {
String qname = attrs.getQName(i);
String prefix = getPrefix(qname);
if ("xmlns".equals(prefix)) {
String pre = getLocal(prefix, qname);
String uri = attrs.getValue(i);
if (!settings.contains(Setting.ManualNamespaces) && pre.contains("_"))
continue; // not permitted
context.setNamespaceURI(pre, uri);
extractor.setNamespaceURI(pre, uri);
sink.addPrefix(pre, uri);
}
}
}
private String getPrefix(String qname) {
if (!qname.contains(":")) {
return "";
}
return qname.substring(0, qname.indexOf(":"));
}
private String getLocal(String prefix, String qname) {
if (prefix.length() == 0) {
return qname;
}
return qname.substring(prefix.length() + 1);
}
private Iterator fromAttributes(Attributes attributes) {
List toReturn = new LinkedList();
for (int i = 0; i < attributes.getLength(); i++) {
String qname = attributes.getQName(i);
String prefix = qname.contains(":") ? qname.substring(0, qname.indexOf(":")) : "";
Attribute attr =
eventFactory.createAttribute(
prefix, attributes.getURI(i), attributes.getLocalName(i), attributes.getValue(i));
if (!qname.equals("xmlns") && !qname.startsWith("xmlns:")) toReturn.add(attr);
}
return toReturn.iterator();
}
private Attribute findAttribute(StartElement element, QName... names) {
for (QName aName : names) {
Attribute a = getAttributeByName(element, aName);
if (a != null) {
return a;
}
}
return null;
}
private void parsePrefixes(String value, EvalContext context) {
String[] parts = value.split("\\s+");
for (int i = 0; i < parts.length; i += 2) {
String prefix = parts[i];
if (i + 1 < parts.length && prefix.endsWith(":")) {
String prefixFix = prefix.substring(0, prefix.length() - 1);
context.setPrefix(prefixFix, parts[i + 1]);
sink.addPrefix(prefixFix, parts[i + 1]);
}
}
}
private Attribute getAttributeByName(StartElement element, QName name) {
if (name == null || element == null) {
return null;
}
Iterator it = element.getAttributes();
while (it.hasNext()) {
Attribute at = (Attribute) it.next();
if (Util.qNameEquals(at.getName(), name)) {
return at;
}
}
return null;
}
int bnodeId = 0;
private String createBNode() // TODO probably broken? Can you write bnodes
// in rdfa directly?
{
return "_:node" + (bnodeId++);
}
private String getDatatype(StartElement element) {
Attribute de = getAttributeByName(element, Constants.datatype);
if (de == null) {
return null;
}
String dt = de.getValue();
if (dt.length() == 0) {
return dt;
}
return extractor.expandCURIE(element, dt, context);
}
}