mirror of
https://github.com/itplr-kosit/validator.git
synced 2026-05-25 16:55:39 +00:00
(chore) refactoring der html extraktion
This commit is contained in:
parent
6a3d33b97c
commit
0a28e41d98
4 changed files with 46 additions and 33 deletions
|
|
@ -25,7 +25,7 @@ import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import de.kosit.validationtool.impl.ContentRepository;
|
import de.kosit.validationtool.impl.ContentRepository;
|
||||||
import de.kosit.validationtool.impl.HtmlExtraction;
|
import de.kosit.validationtool.impl.HtmlExtractor;
|
||||||
import de.kosit.validationtool.impl.tasks.CheckAction;
|
import de.kosit.validationtool.impl.tasks.CheckAction;
|
||||||
|
|
||||||
import net.sf.saxon.s9api.QName;
|
import net.sf.saxon.s9api.QName;
|
||||||
|
|
@ -47,13 +47,13 @@ class ExtractHtmlContentAction implements CheckAction {
|
||||||
|
|
||||||
private final Path outputDirectory;
|
private final Path outputDirectory;
|
||||||
|
|
||||||
private HtmlExtraction htmlExtraction;
|
private HtmlExtractor htmlExtraction;
|
||||||
|
|
||||||
private ContentRepository repository;
|
private ContentRepository repository;
|
||||||
|
|
||||||
public ExtractHtmlContentAction(final ContentRepository repository, final Path outputDirectory) {
|
public ExtractHtmlContentAction(final ContentRepository repository, final Path outputDirectory) {
|
||||||
this.outputDirectory = outputDirectory;
|
this.outputDirectory = outputDirectory;
|
||||||
this.htmlExtraction = new HtmlExtraction(repository);
|
this.htmlExtraction = new HtmlExtractor(repository);
|
||||||
this.repository = repository;
|
this.repository = repository;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -130,7 +130,7 @@ public class DefaultCheck implements Check {
|
||||||
}
|
}
|
||||||
|
|
||||||
private Result createResult(final Bag t) {
|
private Result createResult(final Bag t) {
|
||||||
final DefaultResult result = new DefaultResult(t.getReport(), t.getAcceptStatus(), this.contentRepository);
|
final DefaultResult result = new DefaultResult(t.getReport(), t.getAcceptStatus(), new HtmlExtractor(this.contentRepository));
|
||||||
result.setReportInput(t.getReportInput());
|
result.setReportInput(t.getReportInput());
|
||||||
if (t.getSchemaValidationResult() != null) {
|
if (t.getSchemaValidationResult() != null) {
|
||||||
result.setSchemaViolations(convertErrors(t.getSchemaValidationResult().getErrors()));
|
result.setSchemaViolations(convertErrors(t.getSchemaValidationResult().getErrors()));
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,12 @@
|
||||||
package de.kosit.validationtool.impl;
|
package de.kosit.validationtool.impl;
|
||||||
|
|
||||||
import java.io.StringWriter;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.oclc.purl.dsdl.svrl.FailedAssert;
|
||||||
import org.oclc.purl.dsdl.svrl.SchematronOutput;
|
import org.oclc.purl.dsdl.svrl.SchematronOutput;
|
||||||
import org.w3c.dom.Document;
|
import org.w3c.dom.Document;
|
||||||
import org.w3c.dom.Element;
|
|
||||||
|
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
|
|
@ -19,8 +18,6 @@ import de.kosit.validationtool.api.XmlError;
|
||||||
import de.kosit.validationtool.model.reportInput.CreateReportInput;
|
import de.kosit.validationtool.model.reportInput.CreateReportInput;
|
||||||
|
|
||||||
import net.sf.saxon.dom.NodeOverNodeInfo;
|
import net.sf.saxon.dom.NodeOverNodeInfo;
|
||||||
import net.sf.saxon.s9api.SaxonApiException;
|
|
||||||
import net.sf.saxon.s9api.Serializer;
|
|
||||||
import net.sf.saxon.s9api.XdmNode;
|
import net.sf.saxon.s9api.XdmNode;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -43,14 +40,14 @@ public class DefaultResult implements Result {
|
||||||
@Getter
|
@Getter
|
||||||
private final AcceptRecommendation acceptRecommendation;
|
private final AcceptRecommendation acceptRecommendation;
|
||||||
|
|
||||||
private final HtmlExtraction htmlExtraction;
|
private final HtmlExtractor htmlExtraction;
|
||||||
|
|
||||||
@Setter(AccessLevel.PACKAGE)
|
@Setter(AccessLevel.PACKAGE)
|
||||||
@Getter
|
@Getter
|
||||||
private List<XmlError> schemaViolations = new ArrayList<>();
|
private List<XmlError> schemaViolations = new ArrayList<>();
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private List<String> processingErrors = new ArrayList<>();
|
private final List<String> processingErrors = new ArrayList<>();
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@Setter(AccessLevel.PACKAGE)
|
@Setter(AccessLevel.PACKAGE)
|
||||||
|
|
@ -60,10 +57,10 @@ public class DefaultResult implements Result {
|
||||||
@Setter
|
@Setter
|
||||||
private boolean processingSuccessful;
|
private boolean processingSuccessful;
|
||||||
|
|
||||||
public DefaultResult(final XdmNode report, final AcceptRecommendation recommendation, final ContentRepository repository) {
|
public DefaultResult(final XdmNode report, final AcceptRecommendation recommendation, final HtmlExtractor htmlExtractor) {
|
||||||
this.report = report;
|
this.report = report;
|
||||||
this.acceptRecommendation = recommendation;
|
this.acceptRecommendation = recommendation;
|
||||||
this.htmlExtraction = new HtmlExtraction(repository);
|
this.htmlExtraction = htmlExtractor;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -87,30 +84,23 @@ public class DefaultResult implements Result {
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> extractHtmlAsString() {
|
public List<String> extractHtmlAsString() {
|
||||||
return extractHtml().stream().map(DefaultResult::convertToString).collect(Collectors.toList());
|
return this.htmlExtraction.extractAsString(getReport());
|
||||||
}
|
|
||||||
|
|
||||||
private static String convertToString(final XdmNode element) {
|
|
||||||
try {
|
|
||||||
final StringWriter writer = new StringWriter();
|
|
||||||
final Serializer serializer = ObjectFactory.createProcessor().newSerializer(writer);
|
|
||||||
serializer.serializeNode(element);
|
|
||||||
return writer.toString();
|
|
||||||
} catch (final SaxonApiException e) {
|
|
||||||
throw new IllegalStateException("Can not convert to string", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Element> extractHtmlAsElement() {
|
|
||||||
return extractHtml().stream().map(DefaultResult::convertToElement).collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Element convertToElement(final XdmNode xdmItem) {
|
|
||||||
return (Element) NodeOverNodeInfo.wrap(xdmItem.getUnderlyingNode());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<XdmNode> extractHtml() {
|
public List<XdmNode> extractHtml() {
|
||||||
return this.htmlExtraction.extract(getReport());
|
return this.htmlExtraction.extract(getReport());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gibt alle Schematron-Ergebnisse vom Typ {@link FailedAssert} zurück.
|
||||||
|
*
|
||||||
|
* @return die {@link FailedAssert}
|
||||||
|
*/
|
||||||
|
public List<FailedAssert> getFailedAsserts() {
|
||||||
|
return filterSchematronResult(FailedAssert.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> List<T> filterSchematronResult(final Class<T> type) {
|
||||||
|
return getSchematronResult().stream().filter(type::isInstance).map(type::cast).collect(Collectors.toList());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package de.kosit.validationtool.impl;
|
package de.kosit.validationtool.impl;
|
||||||
|
|
||||||
|
import java.io.StringWriter;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
@ -8,6 +9,7 @@ import java.util.stream.Collectors;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
import net.sf.saxon.s9api.SaxonApiException;
|
import net.sf.saxon.s9api.SaxonApiException;
|
||||||
|
import net.sf.saxon.s9api.Serializer;
|
||||||
import net.sf.saxon.s9api.XPathExecutable;
|
import net.sf.saxon.s9api.XPathExecutable;
|
||||||
import net.sf.saxon.s9api.XPathSelector;
|
import net.sf.saxon.s9api.XPathSelector;
|
||||||
import net.sf.saxon.s9api.XdmItem;
|
import net.sf.saxon.s9api.XdmItem;
|
||||||
|
|
@ -19,7 +21,7 @@ import net.sf.saxon.s9api.XdmNode;
|
||||||
* @author Andreas Penski
|
* @author Andreas Penski
|
||||||
*/
|
*/
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class HtmlExtraction {
|
public class HtmlExtractor {
|
||||||
|
|
||||||
private final ContentRepository repository;
|
private final ContentRepository repository;
|
||||||
|
|
||||||
|
|
@ -48,4 +50,25 @@ public class HtmlExtraction {
|
||||||
}
|
}
|
||||||
return executable.load();
|
return executable.load();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String convertToString(final XdmNode element) {
|
||||||
|
try {
|
||||||
|
final StringWriter writer = new StringWriter();
|
||||||
|
final Serializer serializer = ObjectFactory.createProcessor().newSerializer(writer);
|
||||||
|
serializer.serializeNode(element);
|
||||||
|
return writer.toString();
|
||||||
|
} catch (final SaxonApiException e) {
|
||||||
|
throw new IllegalStateException("Can not convert to string", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extrahiert evtl. vorhandene HTML-Knoten als String.
|
||||||
|
*
|
||||||
|
* @param node der root knoten
|
||||||
|
* @return HTML-Fragment als String
|
||||||
|
*/
|
||||||
|
public List<String> extractAsString(final XdmNode node) {
|
||||||
|
return extract(node).stream().map(HtmlExtractor::convertToString).collect(Collectors.toList());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue