#39 The supplied Input unnecessarily is not written into memory

This commit is contained in:
Andreas Penski 2019-12-18 15:57:44 +01:00
parent d7ee019194
commit efd4fd5fff
63 changed files with 1111 additions and 18196 deletions

View file

@ -19,24 +19,46 @@
package de.kosit.validationtool.api;
import lombok.*;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.transform.Source;
/**
* Eine Datei in eingelesener Form.
* An input for the validator.
*
* @author apenski
*/
@Getter
@RequiredArgsConstructor (access = AccessLevel.PACKAGE)
@AllArgsConstructor (access = AccessLevel.PACKAGE)
public class Input {
private final byte[] content;
public interface Input {
private final String name;
/**
* The name of the input for document identification
*
* @return the name
*/
String getName();
private byte[] hashCode;
/**
* The hashcode for document identification
*
* @return the computed hashcode
*/
byte[] getHashCode();
private String digestAlgorithm;
/**
* The digest algorithm used for computing the {@link #getHashCode()}
*
* @return the name of the digest algorith
*/
String getDigestAlgorithm();
/**
* Opens a new {@link InputStream } for this input which carries the actual data
*
* @return an open {@link InputStream}
* @throws IOException on I/O while opening the stream
*/
Source getSource() throws IOException;
}

View file

@ -22,7 +22,6 @@ package de.kosit.validationtool.api;
import static org.apache.commons.lang3.StringUtils.isNotEmpty;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
@ -30,6 +29,7 @@ import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.DigestInputStream;
@ -37,12 +37,19 @@ import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import javax.xml.bind.DatatypeConverter;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.lang3.StringUtils;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import de.kosit.validationtool.impl.input.ByteArrayInput;
import de.kosit.validationtool.impl.input.ResourceInput;
import de.kosit.validationtool.impl.input.SourceInput;
import de.kosit.validationtool.impl.input.StreamHelper;
/**
* Service zum Einlesen des Test-Objekts in den Speicher. Beim Einlesen wird gleichzeitig eine Prüfsumme ermittelt und
* mit dem Ergebnis mitgeführt.
@ -69,7 +76,8 @@ public class InputFactory {
InputFactory(final String specifiedAlgorithm) {
this.algorithm = isNotEmpty(specifiedAlgorithm) ? specifiedAlgorithm : DEFAULT_ALGORITH;
createDigest();
// check validity
StreamHelper.createDigest(this.algorithm);
}
/**
@ -140,11 +148,48 @@ public class InputFactory {
*/
public static Input read(final URL url, final String digestAlgorithm) {
checkNull(url);
checkNotEmpty(url.getFile());
try {
return read(url.openStream(), url.getFile(), digestAlgorithm);
} catch (final IOException e) {
final URLConnection urlConnection = url.openConnection();
urlConnection.connect();
} catch (IOException e) {
throw new IllegalArgumentException(MESSAGE_OPEN_STREAM_ERROR + url, e);
}
return new ResourceInput(url, url.getFile(), digestAlgorithm);
}
/**
* Reads a test document from a {@link Source}.
*
* @param source source
* @return an {@link Input}
*/
public static Input read(final StreamSource source) {
return read(source, DEFAULT_ALGORITH);
}
/**
* Reads a test document from a {@link Source} using a specified digest algorithm.
*
* @param source source
* @param digestAlgorithm the digest algorithm
* @return an {@link Input}
*/
public static Input read(final StreamSource source, final String digestAlgorithm) {
return read(source, digestAlgorithm, null);
}
/**
* Reads a test document from a {@link Source} using a specified digest algorithm.
*
* @param source source
* @param digestAlgorithm the digest algorithm
* @return an {@link Input}
*/
public static Input read(final Source source, final String digestAlgorithm, final byte[] hashcode) {
checkNull(source);
return new SourceInput(source, source.getSystemId(), digestAlgorithm, hashcode);
}
/**
@ -162,6 +207,7 @@ public class InputFactory {
} catch (final IOException e) {
throw new IllegalArgumentException(MESSAGE_OPEN_STREAM_ERROR + file, e);
}
}
/**
@ -186,7 +232,14 @@ public class InputFactory {
*/
public static Input read(final byte[] input, final String name, final String digestAlgorithm) {
checkNull(input);
return read(new ByteArrayInputStream(input), name, digestAlgorithm);
checkNotEmpty(name);
return new ByteArrayInput(input, name, digestAlgorithm);
}
private static void checkNotEmpty(final String name) {
if (StringUtils.isBlank(name)) {
throw new IllegalArgumentException("Input name can not be null");
}
}
private static void checkNull(final Object input) {
@ -221,7 +274,7 @@ public class InputFactory {
private Input readStream(final InputStream inputStream, final String name) {
if (StringUtils.isNotBlank(name)) {
log.debug("Generating hashcode for {} using {} algorithm", name, getAlgorithm());
final MessageDigest digest = createDigest();
final MessageDigest digest = StreamHelper.createDigest(getAlgorithm());
final byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
try ( final BufferedInputStream bis = new BufferedInputStream(inputStream);
final DigestInputStream dis = new DigestInputStream(bis, digest);
@ -236,7 +289,9 @@ public class InputFactory {
final byte[] hash = digest.digest();
log.debug("Generated hashcode for {} is {}", name, DatatypeConverter.printHexBinary(hash));
out.flush();
return new Input(out.toByteArray(), name, hash, digest.getAlgorithm());
final ByteArrayInput input = new ByteArrayInput(out.toByteArray(), name, digest.getAlgorithm());
input.setHashCode(hash);
return input;
} catch (final IOException e) {
throw new IllegalArgumentException(MESSAGE_OPEN_STREAM_ERROR + name, e);
}
@ -245,15 +300,4 @@ public class InputFactory {
}
}
private MessageDigest createDigest() {
try {
final MessageDigest digest;
digest = MessageDigest.getInstance(getAlgorithm());
return digest;
} catch (final NoSuchAlgorithmException e) {
// should not happen
throw new IllegalStateException(String.format("Specified method %s is not available", getAlgorithm()), e);
}
}
}

View file

@ -240,6 +240,7 @@ public class CommandLineApplication {
return result ? 0 : 1;
} catch (final Exception e) {
e.printStackTrace();
if (cmd.hasOption(DEBUG.getOpt())) {
log.error(e.getMessage(), e);
} else {

View file

@ -27,10 +27,10 @@ import lombok.extern.slf4j.Slf4j;
import de.kosit.validationtool.api.Check;
import de.kosit.validationtool.api.CheckConfiguration;
import de.kosit.validationtool.api.Input;
import de.kosit.validationtool.api.InputFactory;
import de.kosit.validationtool.impl.DefaultCheck;
import de.kosit.validationtool.impl.ObjectFactory;
import de.kosit.validationtool.impl.input.ByteArrayInput;
import de.kosit.validationtool.model.scenarios.Scenarios;
/**
@ -55,7 +55,7 @@ class Daemon {
private final Check implemenation;
HttpServerHandler(Check check) {
HttpServerHandler(final Check check) {
this.implemenation = check;
}
@ -66,24 +66,25 @@ class Daemon {
* soll.
*/
@Override
public void handle(HttpExchange httpExchange) throws IOException {
public void handle(final HttpExchange httpExchange) throws IOException {
try {
log.debug("Incoming request");
String requestMethod = httpExchange.getRequestMethod();
final String requestMethod = httpExchange.getRequestMethod();
if (requestMethod.equals("POST")) {
InputStream inputStream = httpExchange.getRequestBody();
Input serverInput = InputFactory.read(inputStream, "Prüfling" + counter.incrementAndGet());
final InputStream inputStream = httpExchange.getRequestBody();
final ByteArrayInput serverInput = (ByteArrayInput) InputFactory.read(inputStream,
"Prüfling" + counter.incrementAndGet());
int contentLength = serverInput.getContent().length;
if (contentLength != 0) {
writeOutputstreamArray(httpExchange, implemenation.check(serverInput));
if (serverInput.getLength() > 0) {
writeOutputstreamArray(httpExchange, this.implemenation.check(serverInput));
} else {
writeError(httpExchange, 400, "XML-Inhalt erforderlich!");
}
} else {
writeError(httpExchange, 405, "Es ist nur die POST-Methode erlaubt!");
}
} catch (Exception e) {
} catch (final Exception e) {
writeError(httpExchange, 500, "Interner Fehler bei der Verarbeitung des Requests: " + e.getMessage());
log.error("Es ist ein Fehler aufgetreten. Das Dokument kann nicht geprüft werden", e);
}
@ -100,17 +101,17 @@ class Daemon {
private final Scenarios scenarios;
HealthHandler(Scenarios scenarios) {
HealthHandler(final Scenarios scenarios) {
this.scenarios = scenarios;
}
@Override
public void handle(HttpExchange httpExchange) throws IOException {
Health health = new Health(scenarios);
Document doc = health.writeHealthXml();
public void handle(final HttpExchange httpExchange) throws IOException {
final Health health = new Health(this.scenarios);
final Document doc = health.writeHealthXml();
try {
writeOutputstreamArray(httpExchange, doc);
} catch (TransformerException e) {
} catch (final TransformerException e) {
writeError(httpExchange, 500, e.getMessage());
log.error("Fehler beim Erzeugen der Status-Information", e);
}
@ -134,9 +135,9 @@ class Daemon {
* @param rCode der Code-Status
* @param response die String antwort, die ich anzeigen möchte
*/
private static void writeError(HttpExchange httpExchange, int rCode, String response) throws IOException {
private static void writeError(final HttpExchange httpExchange, final int rCode, final String response) throws IOException {
httpExchange.sendResponseHeaders(rCode, response.length());
OutputStream os = httpExchange.getResponseBody();
final OutputStream os = httpExchange.getResponseBody();
os.write(response.getBytes());
os.close();
}
@ -147,9 +148,10 @@ class Daemon {
* @param httpExchange um den Antwort Body zu erhalten
* @param doc der Report
*/
private static void writeOutputstreamArray(HttpExchange httpExchange, Document doc) throws IOException, TransformerException {
private static void writeOutputstreamArray(final HttpExchange httpExchange, final Document doc)
throws IOException, TransformerException {
final byte[] bytes = serialize(doc);
OutputStream os = httpExchange.getResponseBody();
final OutputStream os = httpExchange.getResponseBody();
httpExchange.getResponseHeaders().add("Content-Type", "application/xml");
httpExchange.sendResponseHeaders(200, bytes.length);
os.write(bytes);
@ -162,15 +164,15 @@ class Daemon {
*
* @param report Vom Typ Dokument, aka Report .
*/
private static byte[] serialize(Document report) throws TransformerException {
private static byte[] serialize(final Document report) throws TransformerException {
try ( ByteArrayOutputStream bArrayOS = new ByteArrayOutputStream() ) {
DOMSource source = new DOMSource(report);
StreamResult streamResult = new StreamResult(bArrayOS);
Transformer transformer = ObjectFactory.createTransformer(true);
try ( final ByteArrayOutputStream bArrayOS = new ByteArrayOutputStream() ) {
final DOMSource source = new DOMSource(report);
final StreamResult streamResult = new StreamResult(bArrayOS);
final Transformer transformer = ObjectFactory.createTransformer(true);
transformer.transform(source, streamResult);
return bArrayOS.toByteArray();
} catch (IOException e) {
} catch (final IOException e) {
log.error("Report {}", e.getMessage(), e);
throw new IllegalStateException(e);
}
@ -180,19 +182,19 @@ class Daemon {
* Methode zum Starten des Servers
*/
void startServer() {
CheckConfiguration config = new CheckConfiguration(scenarioDefinition);
config.setScenarioRepository(repository);
final CheckConfiguration config = new CheckConfiguration(this.scenarioDefinition);
config.setScenarioRepository(this.repository);
HttpServer server = null;
try {
server = HttpServer.create(new InetSocketAddress(hostName, port), 0);
DefaultCheck check = new DefaultCheck(config);
server = HttpServer.create(new InetSocketAddress(this.hostName, this.port), 0);
final DefaultCheck check = new DefaultCheck(config);
server.createContext("/", new HttpServerHandler(check));
server.createContext("/health", new HealthHandler(check.getRepository().getScenarios()));
server.setExecutor(Executors.newFixedThreadPool(threadCount));
server.setExecutor(Executors.newFixedThreadPool(this.threadCount));
server.start();
log.info("Server unter Port {} ist erfolgreich gestartet", port);
} catch (IOException e) {
log.error("Fehler beim HttpServer erstellen!", e.getMessage(), e);
log.info("Server unter Port {} ist erfolgreich gestartet", this.port);
} catch (final IOException e) {
log.error("Fehler beim HttpServer erstellen: {}", e.getMessage(), e);
}
}
}

View file

@ -81,8 +81,8 @@ public class DefaultCheck implements Check {
this.repository = new ScenarioRepository(this.contentRepository);
this.repository.initialize(configuration);
this.checkSteps = new ArrayList<>();
this.checkSteps.add(new CreateDocumentIdentificationAction());
this.checkSteps.add(new DocumentParseAction());
this.checkSteps.add(new CreateDocumentIdentificationAction());
this.checkSteps.add(new ScenarioSelectionAction(this.repository));
this.checkSteps.add(new SchemaValidationAction());
this.checkSteps.add(new SchematronValidationAction(this.contentRepository, this.conversionService));

View file

@ -0,0 +1,57 @@
package de.kosit.validationtool.impl.input;
import java.io.InputStream;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import de.kosit.validationtool.api.Input;
/**
* Base class for all {@link Input Inputs}.
*
* @author Andreas Penski
*/
@Slf4j
public abstract class AbstractInput implements Input, LazyReadInput {
private byte[] hashCode;
@Getter
@Setter
private long length;
@Override
public byte[] getHashCode() {
if (this.hashCode == null) {
throw new IllegalStateException("Hashcode is not computed yet");
}
return this.hashCode;
}
protected InputStream wrap(final InputStream stream) {
InputStream result = stream;
if (!isHashcodeComputed()) {
result = StreamHelper.wrapDigesting(this, result, getDigestAlgorithm());
}
if (getLength() == 0) {
result = StreamHelper.wrapCount(this, result);
}
return result;
}
@Override
public boolean isHashcodeComputed() {
return this.hashCode != null;
}
@Override
public void setHashCode(final byte[] digest) {
this.hashCode = digest;
}
public boolean supportsMultipleReads() {
return true;
}
}

View file

@ -0,0 +1,39 @@
package de.kosit.validationtool.impl.input;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import lombok.AllArgsConstructor;
import lombok.Getter;
/**
* Classical in-memory {@link de.kosit.validationtool.api.Input}. It is not memory efficient to read the whole file into
* memory prio validating. Consider using the {@link ResourceInput}.
*
* @author Andreas Penski
*/
@Getter
@AllArgsConstructor
public class ByteArrayInput extends AbstractInput {
private final byte[] content;
private final String name;
private final String digestAlgorithm;
@Override
public long getLength() {
return this.content != null ? this.content.length : 0;
}
@Override
public Source getSource() {
final InputStream stream = wrap(new ByteArrayInputStream(this.content));
return new StreamSource(stream, getName());
}
}

View file

@ -0,0 +1,36 @@
package de.kosit.validationtool.impl.input;
import java.io.InputStream;
import de.kosit.validationtool.api.Input;
/**
* Internal interface used for lazy generation of the hashcode for document identification.
*
* @see StreamHelper#wrapDigesting(LazyReadInput, InputStream, String) for details
* @author Andreas Penski
*/
interface LazyReadInput {
/**
* Sets a hashcode
*
* @param digest the digest
*/
void setHashCode(byte[] digest);
/**
* Determines whether a hashcode has been computed yet
*
* @return true when computed
*/
boolean isHashcodeComputed();
/**
* Setting the length of the {@link Input}.
*
* @param length the length
*/
void setLength(long length);
}

View file

@ -0,0 +1,43 @@
package de.kosit.validationtool.impl.input;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import de.kosit.validationtool.api.Input;
/**
* An {@link Input} carries an {@link URL} which can be used for all 'locatable' inputs such as {@link File},
* {@link java.nio.file.Path} and any other {@link URL}.
*
* This stream is NOT read into memory. So this implementation has good in memory efficieny. The validation process MAY
* read the stream more than once. Make sure, that the {@link URL} points to fast I/O devices
*
* @author Andreas Penski
*/
@Getter
@RequiredArgsConstructor
public class ResourceInput extends AbstractInput {
private final URL url;
private final String name;
private final String digestAlgorithm;
@Override
public Source getSource() throws IOException {
InputStream stream = this.url.openStream();
if (!isHashcodeComputed()) {
stream = StreamHelper.wrapDigesting(this, stream, getDigestAlgorithm());
}
return new StreamSource(stream, this.name);
}
}

View file

@ -0,0 +1,108 @@
package de.kosit.validationtool.impl.input;
import java.io.IOException;
import java.nio.charset.Charset;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.io.input.ReaderInputStream;
import org.apache.commons.lang3.NotImplementedException;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
/**
* A validator {@link de.kosit.validationtool.api.Input} based an on a {@link Source}.
*
* @author Andreas Penski
*/
@Getter
@Slf4j
public class SourceInput extends AbstractInput {
private final Source source;
private final String name;
private final String digestAlgorithm;
public SourceInput(final StreamSource source, final String name, final String digestAlgorithm) {
this(source, name, digestAlgorithm, null);
}
public SourceInput(final Source source, final String name, final String digestAlgorithm, final byte[] hashCode) {
this.source = source;
this.name = name;
this.digestAlgorithm = digestAlgorithm;
setHashCode(hashCode);
validate();
}
private void validate() {
if (!isSupported()) {
throw new IllegalStateException("Unsupported source. Only StreamSource supported yet");
}
if (((StreamSource) this.source).getInputStream() == null && !isHashcodeComputed()) {
log.warn("No hashcode supplied, will wrap the reader using system default charset");
}
}
@Override
public Source getSource() throws IOException {
if (!isSupported()) {
throw new IllegalStateException("Unsupported source. Only InputStream-based StreamSource supported yet");
}
if (isWrappingRequired()) {
return wrap();
}
if (isConsumed()) {
throw new IllegalStateException("A StreamSource can only read once");
}
return this.source;
}
private boolean isSupported() {
return isStreamSource();
}
private boolean isConsumed() throws IOException {
if (!isStreamSource()) {
throw new NotImplementedException("Supports only StreamSource yet");
}
final StreamSource ss = (StreamSource) this.source;
try {
return (ss.getInputStream() != null && ss.getInputStream().available() == 0)
|| (ss.getReader() != null && !ss.getReader().ready());
} catch (final IOException e) {
return true;
}
}
private boolean isStreamSource() {
return this.source instanceof StreamSource;
}
private Source wrap() {
Source result = this.source;
if (isStreamSource()) {
final StreamSource ss = (StreamSource) this.source;
if (ss.getInputStream() != null) {
result = new StreamSource(wrap(ss.getInputStream()), this.source.getSystemId());
} else if (ss.getReader() != null) {
result = new StreamSource(wrap(new ReaderInputStream(ss.getReader(), Charset.defaultCharset())), this.source.getSystemId());
}
}
return result;
}
private boolean isWrappingRequired() {
return !isHashcodeComputed();
}
@Override
public boolean supportsMultipleReads() {
return false;
}
}

View file

@ -0,0 +1,95 @@
package de.kosit.validationtool.impl.input;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import org.apache.commons.io.input.CountingInputStream;
/**
* Helper for stream handling.
*
* @author Andreas Penski
*/
public class StreamHelper {
/**
* Helper class, which generates the hashcode while reading the stream e.g. for parsing the document. This allows
* generating the hashcode without an aditional reading step.
*/
private static class DigestingInputStream extends FilterInputStream {
private final MessageDigest digest;
private final LazyReadInput reference;
DigestingInputStream(final LazyReadInput input, final InputStream in, final MessageDigest digest) {
super(new DigestInputStream(in, digest));
this.digest = digest;
this.reference = input;
}
@Override
public void close() throws IOException {
super.close();
this.reference.setHashCode(this.digest.digest());
}
}
private static class CountInputStream extends FilterInputStream {
private final LazyReadInput reference;
public CountInputStream(final LazyReadInput input, final InputStream stream) {
super(new org.apache.commons.io.input.CountingInputStream(stream));
this.reference = input;
}
@Override
public void close() throws IOException {
super.close();
this.reference.setLength(((CountingInputStream) this.in).getByteCount());
}
}
private StreamHelper() {
// hide
}
public static MessageDigest createDigest(final String algorithm) {
try {
final MessageDigest digest;
digest = MessageDigest.getInstance(algorithm);
return digest;
} catch (final NoSuchAlgorithmException e) {
// should not happen
throw new IllegalArgumentException(String.format("Specified method %s is not available", algorithm), e);
}
}
/**
* Wraps the {@link InputStream} with a counting length implementation.
*
* @param input the {@link LazyReadInput input}
* @param stream the stream
* @return a wrapped stream
*/
public static InputStream wrapCount(final LazyReadInput input, final InputStream stream) {
return new CountInputStream(input, stream);
}
/**
* Wraps the {@link InputStream} with an implementation the generates a hash sum over the stream data.
*
* @param input the {@link LazyReadInput input}
* @param stream the stream
* @return a wrapped stream
*/
public static InputStream wrapDigesting(final LazyReadInput input, final InputStream stream, final String digestAlgorithm) {
return new DigestingInputStream(input, stream, createDigest(digestAlgorithm));
}
}

View file

@ -19,14 +19,10 @@
package de.kosit.validationtool.impl.tasks;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.stream.Collectors;
import javax.xml.transform.stream.StreamSource;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@ -63,10 +59,10 @@ public class DocumentParseAction implements CheckAction {
throw new IllegalArgumentException("Input may not be null");
}
Result<XdmNode, XMLSyntaxError> result;
try ( final InputStream input = new ByteArrayInputStream(content.getContent()) ) {
try {
final DocumentBuilder builder = ObjectFactory.createProcessor().newDocumentBuilder();
builder.setLineNumbering(true);
final XdmNode doc = builder.build(new StreamSource(input));
final XdmNode doc = builder.build(content.getSource());
result = new Result<>(doc, Collections.emptyList());
} catch (final SaxonApiException | IOException e) {
log.debug("Exception while parsing {}", content.getName(), e);

View file

@ -20,67 +20,201 @@
package de.kosit.validationtool.impl.tasks;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Validator;
import org.apache.commons.io.FileUtils;
import org.xml.sax.SAXException;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import de.kosit.validationtool.api.Input;
import de.kosit.validationtool.impl.CollectingErrorEventHandler;
import de.kosit.validationtool.impl.ObjectFactory;
import de.kosit.validationtool.impl.input.AbstractInput;
import de.kosit.validationtool.impl.model.Result;
import de.kosit.validationtool.model.reportInput.CreateReportInput;
import de.kosit.validationtool.model.reportInput.ValidationResultsXmlSchema;
import de.kosit.validationtool.model.reportInput.XMLSyntaxError;
import de.kosit.validationtool.model.scenarios.ScenarioType;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XdmNode;
/**
* Schema-Validierung der Eingabe-Datei mittels Schema-Definition aus dem identifizierten Szenario.
* Schema valiation of the {@link Input} with the schema of the supplied scenario. This implementation is based on JDK
* functionality and therefore needs a {@link Source} to do the actual validation. Since we base the validator on Saxon
* HE functionality, we have no support for schema in Saxon (e.g. the in memory version of the document is not
* schema-aware) and need to re-read the actual source.
*
* Since the actual {@link Input} implementation may not be read twice, we must serialize the previously read document.
* This implementation tries to do the validation in an efficient manner. If possible the source is read a second time
* to validate. If not, the source is serialized to the heap upon re-read/validaiton up to a configurable file size. The
* document is serialized to a temporary file otherwise.
*
* @author Andreas Penski
*/
@Slf4j
public class SchemaValidationAction implements CheckAction {
private Result<Boolean, XMLSyntaxError> validate(byte[] document, ScenarioType scenarioType) {
private static class ByteArraySerializedDocument implements SerializedDocument {
private byte[] bytes;
@Override
public void serialize(final XdmNode node) throws SaxonApiException, IOException {
try ( final ByteArrayOutputStream out = new ByteArrayOutputStream() ) {
final Serializer serializer = ObjectFactory.createProcessor().newSerializer();
serializer.setOutputStream(out);
serializer.serializeNode(node);
serializer.close();
this.bytes = out.toByteArray();
}
}
@Override
public void close() {
// nothing do do
}
@Override
public InputStream openStream() {
return new ByteArrayInputStream(this.bytes);
}
}
private static class FileSerializedDocument implements SerializedDocument {
private final Path file;
FileSerializedDocument() throws IOException {
this.file = Files.createTempFile("validator", ".xml");
}
@Override
public void serialize(final XdmNode node) throws SaxonApiException, IOException {
try ( final OutputStream out = Files.newOutputStream(this.file) ) {
final Serializer serializer = ObjectFactory.createProcessor().newSerializer();
serializer.setOutputStream(out);
serializer.serializeNode(node);
serializer.close();
}
}
@Override
public void close() throws IOException {
Files.deleteIfExists(this.file);
}
@Override
public InputStream openStream() throws IOException {
return Files.newInputStream(this.file);
}
}
private static final Long BA_LIMIT = 10L;
private static final String LIMIT_PARAMETER = "schema.validation.inmem.limit";
@Setter(AccessLevel.PACKAGE)
@Getter
private long inMemoryLimit = Long.parseLong(System.getProperty(LIMIT_PARAMETER, BA_LIMIT.toString())) * FileUtils.ONE_MB;
private Result<Boolean, XMLSyntaxError> validate(final Bag results, final ScenarioType scenarioType) {
log.debug("Validating document using scenario {}", scenarioType.getName());
final CollectingErrorEventHandler errorHandler = new CollectingErrorEventHandler();
try ( InputStream input = new ByteArrayInputStream(document) ) {
try ( final SourceProvider validateInput = resolveSource(results) ) {
final Validator validator = ObjectFactory.createValidator(scenarioType.getSchema());
validator.setErrorHandler(errorHandler);
validator.validate(new StreamSource(input));
validator.validate(validateInput.getSource());
return new Result<>(!errorHandler.hasErrors(), errorHandler.getErrors());
} catch (SAXException | IOException e) {
} catch (final SAXException | SaxonApiException | IOException e) {
throw new IllegalStateException("Error validating document", e);
}
}
@Override
public void check(Bag results) {
public void check(final Bag results) {
final CreateReportInput report = results.getReportInput();
final ScenarioType scenario = results.getScenarioSelectionResult().getObject();
final Result<Boolean, XMLSyntaxError> validateResult = validate(results.getInput().getContent(), scenario);
final Result<Boolean, XMLSyntaxError> validateResult = validate(results, scenario);
results.setSchemaValidationResult(validateResult);
ValidationResultsXmlSchema result = new ValidationResultsXmlSchema();
final ValidationResultsXmlSchema result = new ValidationResultsXmlSchema();
report.setValidationResultsXmlSchema(result);
result.getResource().addAll(scenario.getValidateWithXmlSchema().getResource());
if (!validateResult.isValid()) {
result.getXmlSyntaxError().addAll(validateResult.getErrors());
}
}
private SourceProvider resolveSource(final Bag results) throws IOException, SaxonApiException {
final SourceProvider source;
if (results.getInput() instanceof AbstractInput && (((AbstractInput) results.getInput()).supportsMultipleReads())) {
source = () -> results.getInput().getSource();
} else {
source = serialize(results.getInput(), results.getParserResult().getObject());
}
return source;
}
private SerializedDocument serialize(final Input input, final XdmNode object) throws IOException, SaxonApiException {
final SerializedDocument doc;
if (input instanceof AbstractInput && ((AbstractInput) input).getLength() < getInMemoryLimit()) {
doc = new ByteArraySerializedDocument();
} else {
doc = new FileSerializedDocument();
}
doc.serialize(object);
return doc;
}
@Override
public boolean isSkipped(Bag results) {
public boolean isSkipped(final Bag results) {
return hasNoScenario(results);
}
private static boolean hasNoScenario(Bag results) {
private static boolean hasNoScenario(final Bag results) {
return results.getScenarioSelectionResult() == null || results.getScenarioSelectionResult().isInvalid();
}
private interface SourceProvider extends AutoCloseable {
Source getSource() throws IOException;
@Override
default void close() throws IOException {
// nothing
}
}
private interface SerializedDocument extends AutoCloseable, SourceProvider {
void serialize(XdmNode node) throws SaxonApiException, IOException;
InputStream openStream() throws IOException;
@Override
default Source getSource() throws IOException {
return new StreamSource(openStream());
}
}
}