#41 fix validation of garbage e.g. non xml files

This commit is contained in:
Andreas Penski (init) 2019-08-14 11:25:56 +02:00
parent 2d85fccd95
commit fa5966d464
17 changed files with 134 additions and 62 deletions

View file

@ -4,6 +4,12 @@ All notable changes to the Schematron Rules and this project will be documented
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Upcoming version
### Added
- Convenience method for accessing information about well-formedness in Result
- Convenience method for accessing information about schema validation result in Result
### Fixed
- NPE when validating non-XM files
## 1.1.0

View file

@ -19,18 +19,23 @@
package de.kosit.validationtool.api;
import static org.apache.commons.lang3.StringUtils.isNotEmpty;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import javax.xml.bind.DatatypeConverter;
import org.apache.commons.lang3.StringUtils;
@ -38,8 +43,6 @@ import org.apache.commons.lang3.StringUtils;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import static org.apache.commons.lang3.StringUtils.isNotEmpty;
/**
* Service zum Einlesen des Test-Objekts in den Speicher. Beim Einlesen wird gleichzeitig eine Prüfsumme ermittelt und
* mit dem Ergebnis mitgeführt.
@ -64,7 +67,7 @@ public class InputFactory {
this(null);
}
InputFactory(String specifiedAlgorithm) {
InputFactory(final String specifiedAlgorithm) {
this.algorithm = isNotEmpty(specifiedAlgorithm) ? specifiedAlgorithm : DEFAULT_ALGORITH;
createDigest();
}
@ -76,7 +79,7 @@ public class InputFactory {
* @param path der Prüflings
* @return ein Prüf-Eingabe-Objekt
*/
public static Input read(Path path) {
public static Input read(final Path path) {
return read(path, DEFAULT_ALGORITH);
}
@ -88,11 +91,11 @@ public class InputFactory {
* @param digestAlgorithm der Prüfsummenalgorithmus
* @return ein Prüf-Eingabe-Objekt
*/
public static Input read(Path path, String digestAlgorithm) {
public static Input read(final Path path, final String digestAlgorithm) {
checkNull(path);
try ( InputStream stream = Files.newInputStream(path) ) {
try ( final InputStream stream = Files.newInputStream(path) ) {
return read(stream, path.toString(), digestAlgorithm);
} catch (IOException e) {
} catch (final IOException e) {
throw new IllegalArgumentException(MESSAGE_OPEN_STREAM_ERROR + path, e);
}
}
@ -104,7 +107,7 @@ public class InputFactory {
* @param file der Prüflings
* @return ein Prüf-Eingabe-Objekt
*/
public static Input read(File file) {
public static Input read(final File file) {
return read(file, DEFAULT_ALGORITH);
}
@ -115,10 +118,18 @@ public class InputFactory {
* @param url URL des Prüflings
* @return ein Prüf-Eingabe-Objekt
*/
public static Input read(URL url) {
public static Input read(final URL url) {
return read(url, DEFAULT_ALGORITH);
}
public static Input read(final URI uri) {
try {
return read(uri.toURL(), DEFAULT_ALGORITH);
} catch (final MalformedURLException e) {
throw new IllegalArgumentException(String.format("Can not read from uri %s Not a valid uri supplied", uri));
}
}
/**
* Liest einen Prüfling von der übergebenen URL. Es wird ein definierter Algorithmis zur Ermittlung der Prüfsumme
* genutzt.
@ -127,11 +138,11 @@ public class InputFactory {
* @param digestAlgorithm der Prüfsummenalgorithmus
* @return ein Prüf-Eingabe-Objekt
*/
public static Input read(URL url, String digestAlgorithm) {
public static Input read(final URL url, final String digestAlgorithm) {
checkNull(url);
try {
return read(url.openStream(), url.getFile(), digestAlgorithm);
} catch (IOException e) {
} catch (final IOException e) {
throw new IllegalArgumentException(MESSAGE_OPEN_STREAM_ERROR + url, e);
}
}
@ -144,11 +155,11 @@ public class InputFactory {
* @param digestAlgorithm der Prüfsummenalgorithmus
* @return ein Prüf-Eingabe-Objekt
*/
public static Input read(File file, String digestAlgorithm) {
public static Input read(final File file, final String digestAlgorithm) {
checkNull(file);
try {
return read(file.toURI().toURL(), digestAlgorithm);
} catch (IOException e) {
} catch (final IOException e) {
throw new IllegalArgumentException(MESSAGE_OPEN_STREAM_ERROR + file, e);
}
}
@ -160,7 +171,7 @@ public class InputFactory {
* @param input URL des Prüflings
* @return ein Prüf-Eingabe-Objekt
*/
public static Input read(byte[] input, String name) {
public static Input read(final byte[] input, final String name) {
checkNull(input);
return read(input, name, DEFAULT_ALGORITH);
}
@ -173,12 +184,12 @@ public class InputFactory {
* @param digestAlgorithm der Prüfsummenalgorithmus
* @return ein Prüf-Eingabe-Objekt
*/
public static Input read(byte[] input, String name, String digestAlgorithm) {
public static Input read(final byte[] input, final String name, final String digestAlgorithm) {
checkNull(input);
return read(new ByteArrayInputStream(input), name, digestAlgorithm);
}
private static void checkNull(Object input) {
private static void checkNull(final Object input) {
if (input == null) {
throw new IllegalArgumentException("Input can not be null");
}
@ -191,7 +202,7 @@ public class InputFactory {
* @param name der Name/Bezeichner des Prüflings
* @return einen Prüfling in eingelesener Form
*/
public static Input read(InputStream inputStream, String name) {
public static Input read(final InputStream inputStream, final String name) {
return read(inputStream, name, DEFAULT_ALGORITH);
}
@ -203,18 +214,18 @@ public class InputFactory {
* @param digestAlgorithm der Prüfsummenalgorithmus
* @return einen Prüfling in eingelesener Form
*/
public static Input read(InputStream inputStream, String name, String digestAlgorithm) {
public static Input read(final InputStream inputStream, final String name, final String digestAlgorithm) {
return new InputFactory(digestAlgorithm).readStream(inputStream, name);
}
private Input readStream(InputStream inputStream, String name) {
private Input readStream(final InputStream inputStream, final String name) {
if (StringUtils.isNotBlank(name)) {
log.debug("Generating hashcode for {} using {} algorithm", name, getAlgorithm());
MessageDigest digest = createDigest();
byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
try ( BufferedInputStream bis = new BufferedInputStream(inputStream);
DigestInputStream dis = new DigestInputStream(bis, digest);
ByteArrayOutputStream out = new ByteArrayOutputStream() ) {
final MessageDigest digest = createDigest();
final byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
try ( final BufferedInputStream bis = new BufferedInputStream(inputStream);
final DigestInputStream dis = new DigestInputStream(bis, digest);
final ByteArrayOutputStream out = new ByteArrayOutputStream() ) {
// read the file and update the hash calculation
int n;
@ -222,11 +233,11 @@ public class InputFactory {
out.write(buffer, 0, n);
}
// get the hash value as byte array
byte[] hash = digest.digest();
final byte[] hash = digest.digest();
log.debug("Generated hashcode for {} is {}", name, DatatypeConverter.printHexBinary(hash));
out.flush();
return new Input(out.toByteArray(), name, hash, digest.getAlgorithm());
} catch (IOException e) {
} catch (final IOException e) {
throw new IllegalArgumentException(MESSAGE_OPEN_STREAM_ERROR + name, e);
}
} else {
@ -236,10 +247,10 @@ public class InputFactory {
private MessageDigest createDigest() {
try {
MessageDigest digest;
final MessageDigest digest;
digest = MessageDigest.getInstance(getAlgorithm());
return digest;
} catch (NoSuchAlgorithmException e) {
} catch (final NoSuchAlgorithmException e) {
// should not happen
throw new IllegalStateException(String.format("Specified method %s is not available", getAlgorithm()), e);
}

View file

@ -67,4 +67,17 @@ public interface Result {
*/
List<SchematronOutput> getSchematronResult();
/**
* Liefert ein true, wenn keine Schema-Violations vorhanden sind.
*
* @return true wenn Schema-valide
*/
boolean isSchemaValid();
/**
* Liefert ein true, wenn der Prüfling eine well-formed XML-Datei ist.
*
* @return true wenn well-formed
*/
boolean isWellformed();
}

View file

@ -96,7 +96,7 @@ public class ContentRepository {
final CollectingErrorEventHandler listener = new CollectingErrorEventHandler();
try {
xsltCompiler.setErrorListener(listener);
xsltCompiler.setURIResolver(new RelativeUriResolver(this.repository));
xsltCompiler.setURIResolver(createResolver());
return xsltCompiler.compile(resolve(uri));
} catch (final SaxonApiException e) {
@ -202,4 +202,12 @@ public class ContentRepository {
return StringUtils.startsWithIgnoreCase(path, "jar:") && path.split("!").length == 2;
}
/**
* Erzeugt einen resolver für dieses Repository, der nur relativ auflösen kann
*
* @return ein neuer Resolver
*/
public RelativeUriResolver createResolver() {
return new RelativeUriResolver(this.repository);
}
}

View file

@ -86,9 +86,9 @@ public class DefaultCheck implements Check {
this.checkSteps.add(new DocumentParseAction());
this.checkSteps.add(new ScenarioSelectionAction(this.repository));
this.checkSteps.add(new SchemaValidationAction());
this.checkSteps.add(new SchematronValidationAction(configuration.getScenarioRepository(), this.conversionService));
this.checkSteps.add(new SchematronValidationAction(this.contentRepository, this.conversionService));
this.checkSteps.add(new ValidateReportInputAction(this.conversionService, this.contentRepository.getReportInputSchema()));
this.checkSteps.add(new CreateReportAction(processor, this.conversionService, configuration.getScenarioRepository()));
this.checkSteps.add(new CreateReportAction(processor, this.conversionService, this.repository, this.contentRepository));
this.checkSteps.add(new ComputeAcceptanceAction());
}
@ -131,14 +131,12 @@ public class DefaultCheck implements Check {
private Result createResult(final Bag t) {
final DefaultResult result = new DefaultResult(t.getReport(), t.getAcceptStatus(), new HtmlExtractor(this.contentRepository));
result.setWellformed(t.getParserResult().isValid());
result.setReportInput(t.getReportInput());
if (t.getSchemaValidationResult() != null) {
result.setSchemaViolations(convertErrors(t.getSchemaValidationResult().getErrors()));
}
result.setProcessingSuccessful(!t.isStopped() && t.isFinished());
if (t.getReportInput().getProcessingError() != null) {
result.getProcessingErrors().addAll(t.getReportInput().getProcessingError().getError());
}
result.setSchematronResult(t.getReportInput().getValidationResultsSchematron().stream()
.map(e -> e.getResults().getSchematronOutput()).collect(Collectors.toList()));
return result;

View file

@ -1,6 +1,6 @@
package de.kosit.validationtool.impl;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
@ -45,10 +45,7 @@ public class DefaultResult implements Result {
@Setter(AccessLevel.PACKAGE)
@Getter
private List<XmlError> schemaViolations = new ArrayList<>();
@Getter
private final List<String> processingErrors = new ArrayList<>();
private List<XmlError> schemaViolations;
@Getter
@Setter(AccessLevel.PACKAGE)
@ -58,12 +55,21 @@ public class DefaultResult implements Result {
@Setter
private boolean processingSuccessful;
@Getter
@Setter
private boolean wellformed;
public DefaultResult(final XdmNode report, final AcceptRecommendation recommendation, final HtmlExtractor htmlExtractor) {
this.report = report;
this.acceptRecommendation = recommendation;
this.htmlExtraction = htmlExtractor;
}
@Override
public List<String> getProcessingErrors() {
return getReportInput().getProcessingError() != null ? getReportInput().getProcessingError().getError() : Collections.emptyList();
}
/**
* Gibt den Report als W3C-{@link Document} zurück.
*
@ -84,6 +90,11 @@ public class DefaultResult implements Result {
return isProcessingSuccessful() && AcceptRecommendation.ACCEPTABLE.equals(this.acceptRecommendation);
}
@Override
public boolean isSchemaValid() {
return getSchemaViolations() != null && getSchemaViolations().isEmpty();
}
/**
* Extrahiert evtl. im Report vorhandene HTML-Fragmente als String.
*

View file

@ -28,6 +28,7 @@ import javax.xml.transform.Source;
import javax.xml.transform.URIResolver;
import javax.xml.transform.stream.StreamSource;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import net.sf.saxon.Configuration;
@ -40,7 +41,7 @@ import net.sf.saxon.trans.XPathException;
*
* @author Andreas Penski
*/
@RequiredArgsConstructor
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
public class RelativeUriResolver implements URIResolver, UnparsedTextURIResolver {
/** the base uri */

View file

@ -62,7 +62,7 @@ public class ScenarioRepository {
private static final String SUPPORTED_MAJOR_VERSION_SCHEMA = "http://www.xoev.de/de/validator/framework/1/scenarios";
@Getter(value = AccessLevel.PRIVATE)
@Getter(value = AccessLevel.PACKAGE)
private final ContentRepository repository;
@Getter

View file

@ -19,6 +19,8 @@
package de.kosit.validationtool.impl.tasks;
import java.util.Collection;
import java.util.Collections;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -30,6 +32,7 @@ import de.kosit.validationtool.api.AcceptRecommendation;
import de.kosit.validationtool.api.Input;
import de.kosit.validationtool.impl.model.Result;
import de.kosit.validationtool.model.reportInput.CreateReportInput;
import de.kosit.validationtool.model.reportInput.ProcessingError;
import de.kosit.validationtool.model.reportInput.XMLSyntaxError;
import de.kosit.validationtool.model.scenarios.ScenarioType;
@ -84,12 +87,19 @@ public interface CheckAction {
this.reportInput = reportInput;
}
/**
* Signalisiert einen vorzeitigen Stop der Vearbeitung.
*/
public void stopProcessing() {
public void stopProcessing(final String error) {
stopProcessing(Collections.singleton(error));
}
public void stopProcessing(final Collection<String> errors) {
this.stopped = true;
if (this.reportInput.getProcessingError() == null) {
this.reportInput.setProcessingError(new ProcessingError());
}
this.reportInput.getProcessingError().getError().addAll(errors);
}
/**

View file

@ -19,8 +19,6 @@
package de.kosit.validationtool.impl.tasks;
import java.net.URI;
import javax.xml.transform.dom.DOMSource;
import org.w3c.dom.Document;
@ -28,6 +26,7 @@ import org.w3c.dom.Document;
import lombok.RequiredArgsConstructor;
import de.kosit.validationtool.impl.CollectingErrorEventHandler;
import de.kosit.validationtool.impl.ContentRepository;
import de.kosit.validationtool.impl.ConversionService;
import de.kosit.validationtool.impl.ObjectFactory;
import de.kosit.validationtool.impl.RelativeUriResolver;
@ -57,7 +56,9 @@ public class CreateReportAction implements CheckAction {
private final ConversionService conversionService;
private final URI contentRepository;
private final ScenarioRepository scenarioRepository;
private final ContentRepository contentRepository;
private static XsltExecutable loadFromScenario(final ScenarioType object) {
return object.getReportTransformation().getExecutable();
@ -76,7 +77,7 @@ public class CreateReportAction implements CheckAction {
final XsltTransformer transformer = getTransformation(results).load();
transformer.setInitialContextNode(root);
final CollectingErrorEventHandler e = new CollectingErrorEventHandler();
final RelativeUriResolver resolver = new RelativeUriResolver(this.contentRepository);
final RelativeUriResolver resolver = this.contentRepository.createResolver();
transformer.setMessageListener(e);
transformer.setURIResolver(resolver);
transformer.getUnderlyingController().setUnparsedTextURIResolver(resolver);
@ -91,9 +92,10 @@ public class CreateReportAction implements CheckAction {
}
}
private static XsltExecutable getTransformation(final Bag results) {
private XsltExecutable getTransformation(final Bag results) {
final Result<ScenarioType, String> scenario = results.getScenarioSelectionResult();
return loadFromScenario(scenario.getObject());
final ScenarioType reportScenario = scenario.isValid() ? scenario.getObject() : this.scenarioRepository.getFallbackScenario();
return loadFromScenario(reportScenario);
}
}

View file

@ -23,6 +23,7 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.stream.Collectors;
import javax.xml.transform.stream.StreamSource;
@ -51,7 +52,8 @@ public class DocumentParseAction implements CheckAction {
/**
* Parsed und überprüft ein übergebenes Dokument darauf ob es well-formed ist. Dies stellt den ersten
* Verarbeitungsschritt des Prüf-Tools dar. Diese Funktion verzichtet explizit auf die Validierung gegenüber einem Schema.
* Verarbeitungsschritt des Prüf-Tools dar. Diese Funktion verzichtet explizit auf die Validierung gegenüber einem
* Schema.
*
* @param content ein Dokument
* @return Ergebnis des Parsings inklusive etwaiger Fehler
@ -86,6 +88,7 @@ public class DocumentParseAction implements CheckAction {
results.getReportInput().setValidationResultsWellformedness(v);
if (parserResult.isInvalid()) {
log.info("Parsing war nicht erfolgreich: {} -> {}", parserResult.getObject(), parserResult.getErrors());
results.stopProcessing(parserResult.getErrors().stream().map(XMLSyntaxError::getMessage).collect(Collectors.toList()));
}
}

View file

@ -24,7 +24,6 @@ import lombok.RequiredArgsConstructor;
import de.kosit.validationtool.impl.ScenarioRepository;
import de.kosit.validationtool.impl.model.Result;
import de.kosit.validationtool.model.reportInput.CreateReportInput;
import de.kosit.validationtool.model.reportInput.ProcessingError;
import de.kosit.validationtool.model.scenarios.ScenarioType;
/**
@ -38,23 +37,20 @@ public class ScenarioSelectionAction implements CheckAction {
private final ScenarioRepository repository;
@Override
public void check(Bag results) {
public void check(final Bag results) {
final CreateReportInput report = results.getReportInput();
final Result<ScenarioType, String> scenarioTypeResult = repository.selectScenario(results.getParserResult().getObject());
final Result<ScenarioType, String> scenarioTypeResult = this.repository.selectScenario(results.getParserResult().getObject());
results.setScenarioSelectionResult(scenarioTypeResult);
if (scenarioTypeResult.isValid()) {
final ScenarioType scenario = scenarioTypeResult.getObject();
report.setScenario(scenario);
} else {
if (report.getProcessingError() == null) {
report.setProcessingError(new ProcessingError());
}
report.getProcessingError().getError().addAll(scenarioTypeResult.getErrors());
results.stopProcessing(scenarioTypeResult.getErrors());
}
}
@Override
public boolean isSkipped(Bag results) {
public boolean isSkipped(final Bag results) {
return results.getParserResult().isInvalid();
}
}

View file

@ -19,7 +19,6 @@
package de.kosit.validationtool.impl.tasks;
import java.net.URI;
import java.util.List;
import java.util.stream.Collectors;
@ -31,6 +30,7 @@ import org.w3c.dom.Document;
import lombok.RequiredArgsConstructor;
import de.kosit.validationtool.impl.CollectingErrorEventHandler;
import de.kosit.validationtool.impl.ContentRepository;
import de.kosit.validationtool.impl.ConversionService;
import de.kosit.validationtool.impl.ObjectFactory;
import de.kosit.validationtool.impl.RelativeUriResolver;
@ -52,7 +52,7 @@ import net.sf.saxon.s9api.XsltTransformer;
@RequiredArgsConstructor
public class SchematronValidationAction implements CheckAction {
private final URI repository;
private final ContentRepository repository;
private final ConversionService conversionService;
@ -64,7 +64,7 @@ public class SchematronValidationAction implements CheckAction {
try {
final XsltTransformer transformer = validation.getExecutable().load();
// resolving nur relative zum Repository
final RelativeUriResolver resolver = new RelativeUriResolver(this.repository);
final RelativeUriResolver resolver = this.repository.createResolver();
transformer.setURIResolver(resolver);
final CollectingErrorEventHandler e = new CollectingErrorEventHandler();
transformer.setMessageListener(e);

View file

@ -49,7 +49,7 @@ public class ValidateReportInputAction implements CheckAction {
final Result<Boolean, XMLSyntaxError> results = validate(bag.getReportInput());
if (!results.isValid()) {
log.error("Report input has errors {}", results.getErrors());
bag.stopProcessing();
bag.stopProcessing(String.format("Report input has errors %s", results.getErrors()));
}
}

View file

@ -20,6 +20,7 @@
package de.kosit.validationtool.impl;
import static de.kosit.validationtool.api.InputFactory.read;
import static de.kosit.validationtool.impl.Helper.Simple.GARBAGE;
import static org.assertj.core.api.Assertions.assertThat;
import java.io.File;
@ -103,4 +104,13 @@ public class DefaultCheckTest {
assertThat(doc.extractHtml()).isNotEmpty();
}
@Test
public void testGarbage() {
final Result result = this.implementation.checkInput(read(GARBAGE));
assertThat(result).isNotNull();
assertThat(result.isWellformed()).isFalse();
assertThat(result.isSchemaValid()).isFalse();
assertThat(result.isProcessingSuccessful()).isFalse();
}
}

View file

@ -54,6 +54,8 @@ public class Helper {
public static final URI INVALID = ROOT.resolve("input/simple-invalid.xml");
public static final URI UNKNOWN = ROOT.resolve("input/unknown.xml");
public static final URI GARBAGE = ROOT.resolve("input/no-xml.file");
}
public static final URI SOURCE_ROOT = Paths.get("src/main/resources").toUri();

View file

@ -0,0 +1 @@
some binary stuff