Habe in diesem Forum diesbezüglich nichts gefunden, was mir weiterhelfen würde, hat jemand von euch eine Idee, wie das am Besten zu bewerkstelligen wäre?
package pack;
import java.awt.HeadlessException;
import java.awt.Toolkit;
import java.awt.datatransfer.DataFlavor;
import java.awt.datatransfer.UnsupportedFlavorException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class XPathExample {
public static void main(String[] args) throws SAXException, IOException, HeadlessException, UnsupportedFlavorException, ParserConfigurationException, XPathExpressionException {
String source = (String) Toolkit.getDefaultToolkit().getSystemClipboard().getContents(null).getTransferData(DataFlavor.stringFlavor);
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
InputStream is = new ByteArrayInputStream(source.getBytes("UTF-8"));
Document document = builder.parse(is);
XPath xpath = XPathFactory.newInstance().newXPath();
String expression = "/html/body/span[@id='main']/center/span[@id='footer']/center/p/font/a"; //auf der Google-Seite ganz unten de Link "Datenschutz"
NodeList nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET);
for(int curNode = 0; curNode < nodes.getLength(); curNode++) {
System.out.println(nodes.item(curNode));
}
}
}
[Fatal Error] :1:3: The markup in the document preceding the root element must be well-formed.
Exception in thread "main" org.xml.sax.SAXParseException: The markup in the document preceding the root element must be well-formed.
at com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(Unknown Source)
at javax.xml.parsers.DocumentBuilder.parse(Unknown Source)
at pack.XPathExample.main(XPathExample.java:31)
[Fatal Error] :1:357: The entity name must immediately follow the '&' in the entity reference.
Exception in thread "main" org.xml.sax.SAXParseException: The entity name must immediately follow the '&' in the entity reference.
at com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(Unknown Source)
at javax.xml.parsers.DocumentBuilder.parse(Unknown Source)
at pack.XPathExample.main(XPathExample.java:31)
package pack;
import java.io.ByteArrayInputStream;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.tidy.Tidy;
public class XPathExample {
public static void main(String[] args) throws XPathExpressionException {
String source = "";// HTML-Quelltext
String xpathex = "";// XPath-Ausdruck
xpathex = xpathex.replace("/tbody", "");// tbody mag er nicht, keine
// Ahnung warum
Tidy tidy = new Tidy();// [url=http://jtidy.sourceforge.net/]JTidy - JTidy[/url]
tidy.setOnlyErrors(true);// unwichtig
tidy.setXmlOut(true);
Document doc = tidy.parseDOM(
new ByteArrayInputStream(source.getBytes()), System.out);
XPath xpath = XPathFactory.newInstance().newXPath();
String sreturn = xpath.evaluate(xpathex, doc);
}
}