(chore) refactoring der html extraktion

This commit is contained in:
Andreas Penski (init) 2019-06-28 14:35:00 +02:00
parent 6a3d33b97c
commit 0a28e41d98
4 changed files with 46 additions and 33 deletions

View file

@ -25,7 +25,7 @@ import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import de.kosit.validationtool.impl.ContentRepository;
import de.kosit.validationtool.impl.HtmlExtraction;
import de.kosit.validationtool.impl.HtmlExtractor;
import de.kosit.validationtool.impl.tasks.CheckAction;
import net.sf.saxon.s9api.QName;
@ -47,13 +47,13 @@ class ExtractHtmlContentAction implements CheckAction {
private final Path outputDirectory;
private HtmlExtraction htmlExtraction;
private HtmlExtractor htmlExtraction;
private ContentRepository repository;
public ExtractHtmlContentAction(final ContentRepository repository, final Path outputDirectory) {
this.outputDirectory = outputDirectory;
this.htmlExtraction = new HtmlExtraction(repository);
this.htmlExtraction = new HtmlExtractor(repository);
this.repository = repository;
}

View file

@ -130,7 +130,7 @@ public class DefaultCheck implements Check {
}
private Result createResult(final Bag t) {
final DefaultResult result = new DefaultResult(t.getReport(), t.getAcceptStatus(), this.contentRepository);
final DefaultResult result = new DefaultResult(t.getReport(), t.getAcceptStatus(), new HtmlExtractor(this.contentRepository));
result.setReportInput(t.getReportInput());
if (t.getSchemaValidationResult() != null) {
result.setSchemaViolations(convertErrors(t.getSchemaValidationResult().getErrors()));

View file

@ -1,13 +1,12 @@
package de.kosit.validationtool.impl;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.oclc.purl.dsdl.svrl.FailedAssert;
import org.oclc.purl.dsdl.svrl.SchematronOutput;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import lombok.AccessLevel;
import lombok.Getter;
@ -19,8 +18,6 @@ import de.kosit.validationtool.api.XmlError;
import de.kosit.validationtool.model.reportInput.CreateReportInput;
import net.sf.saxon.dom.NodeOverNodeInfo;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XdmNode;
/**
@ -43,14 +40,14 @@ public class DefaultResult implements Result {
@Getter
private final AcceptRecommendation acceptRecommendation;
private final HtmlExtraction htmlExtraction;
private final HtmlExtractor htmlExtraction;
@Setter(AccessLevel.PACKAGE)
@Getter
private List<XmlError> schemaViolations = new ArrayList<>();
@Getter
private List<String> processingErrors = new ArrayList<>();
private final List<String> processingErrors = new ArrayList<>();
@Getter
@Setter(AccessLevel.PACKAGE)
@ -60,10 +57,10 @@ public class DefaultResult implements Result {
@Setter
private boolean processingSuccessful;
public DefaultResult(final XdmNode report, final AcceptRecommendation recommendation, final ContentRepository repository) {
public DefaultResult(final XdmNode report, final AcceptRecommendation recommendation, final HtmlExtractor htmlExtractor) {
this.report = report;
this.acceptRecommendation = recommendation;
this.htmlExtraction = new HtmlExtraction(repository);
this.htmlExtraction = htmlExtractor;
}
/**
@ -87,30 +84,23 @@ public class DefaultResult implements Result {
}
public List<String> extractHtmlAsString() {
return extractHtml().stream().map(DefaultResult::convertToString).collect(Collectors.toList());
}
private static String convertToString(final XdmNode element) {
try {
final StringWriter writer = new StringWriter();
final Serializer serializer = ObjectFactory.createProcessor().newSerializer(writer);
serializer.serializeNode(element);
return writer.toString();
} catch (final SaxonApiException e) {
throw new IllegalStateException("Can not convert to string", e);
}
}
public List<Element> extractHtmlAsElement() {
return extractHtml().stream().map(DefaultResult::convertToElement).collect(Collectors.toList());
}
private static Element convertToElement(final XdmNode xdmItem) {
return (Element) NodeOverNodeInfo.wrap(xdmItem.getUnderlyingNode());
return this.htmlExtraction.extractAsString(getReport());
}
public List<XdmNode> extractHtml() {
return this.htmlExtraction.extract(getReport());
}
/**
* Gibt alle Schematron-Ergebnisse vom Typ {@link FailedAssert} zurück.
*
* @return die {@link FailedAssert}
*/
public List<FailedAssert> getFailedAsserts() {
return filterSchematronResult(FailedAssert.class);
}
private <T> List<T> filterSchematronResult(final Class<T> type) {
return getSchematronResult().stream().filter(type::isInstance).map(type::cast).collect(Collectors.toList());
}
}

View file

@ -1,5 +1,6 @@
package de.kosit.validationtool.impl;
import java.io.StringWriter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -8,6 +9,7 @@ import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XPathExecutable;
import net.sf.saxon.s9api.XPathSelector;
import net.sf.saxon.s9api.XdmItem;
@ -19,7 +21,7 @@ import net.sf.saxon.s9api.XdmNode;
* @author Andreas Penski
*/
@RequiredArgsConstructor
public class HtmlExtraction {
public class HtmlExtractor {
private final ContentRepository repository;
@ -48,4 +50,25 @@ public class HtmlExtraction {
}
return executable.load();
}
private static String convertToString(final XdmNode element) {
try {
final StringWriter writer = new StringWriter();
final Serializer serializer = ObjectFactory.createProcessor().newSerializer(writer);
serializer.serializeNode(element);
return writer.toString();
} catch (final SaxonApiException e) {
throw new IllegalStateException("Can not convert to string", e);
}
}
/**
* Extrahiert evtl. vorhandene HTML-Knoten als String.
*
* @param node der root knoten
* @return HTML-Fragment als String
*/
public List<String> extractAsString(final XdmNode node) {
return extract(node).stream().map(HtmlExtractor::convertToString).collect(Collectors.toList());
}
}