#39 The supplied Input unnecessarily is not written into memory

This commit is contained in:
Andreas Penski 2019-12-18 15:57:44 +01:00
parent d7ee019194
commit efd4fd5fff
63 changed files with 1111 additions and 18196 deletions

View file

@ -0,0 +1,57 @@
package de.kosit.validationtool.impl.input;
import java.io.InputStream;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import de.kosit.validationtool.api.Input;
/**
* Base class for all {@link Input Inputs}.
*
* @author Andreas Penski
*/
@Slf4j
public abstract class AbstractInput implements Input, LazyReadInput {
private byte[] hashCode;
@Getter
@Setter
private long length;
@Override
public byte[] getHashCode() {
if (this.hashCode == null) {
throw new IllegalStateException("Hashcode is not computed yet");
}
return this.hashCode;
}
protected InputStream wrap(final InputStream stream) {
InputStream result = stream;
if (!isHashcodeComputed()) {
result = StreamHelper.wrapDigesting(this, result, getDigestAlgorithm());
}
if (getLength() == 0) {
result = StreamHelper.wrapCount(this, result);
}
return result;
}
@Override
public boolean isHashcodeComputed() {
return this.hashCode != null;
}
@Override
public void setHashCode(final byte[] digest) {
this.hashCode = digest;
}
public boolean supportsMultipleReads() {
return true;
}
}

View file

@ -0,0 +1,39 @@
package de.kosit.validationtool.impl.input;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import lombok.AllArgsConstructor;
import lombok.Getter;
/**
* Classical in-memory {@link de.kosit.validationtool.api.Input}. It is not memory efficient to read the whole file into
* memory prio validating. Consider using the {@link ResourceInput}.
*
* @author Andreas Penski
*/
@Getter
@AllArgsConstructor
public class ByteArrayInput extends AbstractInput {
private final byte[] content;
private final String name;
private final String digestAlgorithm;
@Override
public long getLength() {
return this.content != null ? this.content.length : 0;
}
@Override
public Source getSource() {
final InputStream stream = wrap(new ByteArrayInputStream(this.content));
return new StreamSource(stream, getName());
}
}

View file

@ -0,0 +1,36 @@
package de.kosit.validationtool.impl.input;
import java.io.InputStream;
import de.kosit.validationtool.api.Input;
/**
* Internal interface used for lazy generation of the hashcode for document identification.
*
* @see StreamHelper#wrapDigesting(LazyReadInput, InputStream, String) for details
* @author Andreas Penski
*/
interface LazyReadInput {
/**
* Sets a hashcode
*
* @param digest the digest
*/
void setHashCode(byte[] digest);
/**
* Determines whether a hashcode has been computed yet
*
* @return true when computed
*/
boolean isHashcodeComputed();
/**
* Setting the length of the {@link Input}.
*
* @param length the length
*/
void setLength(long length);
}

View file

@ -0,0 +1,43 @@
package de.kosit.validationtool.impl.input;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import de.kosit.validationtool.api.Input;
/**
* An {@link Input} carries an {@link URL} which can be used for all 'locatable' inputs such as {@link File},
* {@link java.nio.file.Path} and any other {@link URL}.
*
* This stream is NOT read into memory. So this implementation has good in memory efficieny. The validation process MAY
* read the stream more than once. Make sure, that the {@link URL} points to fast I/O devices
*
* @author Andreas Penski
*/
@Getter
@RequiredArgsConstructor
public class ResourceInput extends AbstractInput {
private final URL url;
private final String name;
private final String digestAlgorithm;
@Override
public Source getSource() throws IOException {
InputStream stream = this.url.openStream();
if (!isHashcodeComputed()) {
stream = StreamHelper.wrapDigesting(this, stream, getDigestAlgorithm());
}
return new StreamSource(stream, this.name);
}
}

View file

@ -0,0 +1,108 @@
package de.kosit.validationtool.impl.input;
import java.io.IOException;
import java.nio.charset.Charset;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.io.input.ReaderInputStream;
import org.apache.commons.lang3.NotImplementedException;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
/**
* A validator {@link de.kosit.validationtool.api.Input} based an on a {@link Source}.
*
* @author Andreas Penski
*/
@Getter
@Slf4j
public class SourceInput extends AbstractInput {
private final Source source;
private final String name;
private final String digestAlgorithm;
public SourceInput(final StreamSource source, final String name, final String digestAlgorithm) {
this(source, name, digestAlgorithm, null);
}
public SourceInput(final Source source, final String name, final String digestAlgorithm, final byte[] hashCode) {
this.source = source;
this.name = name;
this.digestAlgorithm = digestAlgorithm;
setHashCode(hashCode);
validate();
}
private void validate() {
if (!isSupported()) {
throw new IllegalStateException("Unsupported source. Only StreamSource supported yet");
}
if (((StreamSource) this.source).getInputStream() == null && !isHashcodeComputed()) {
log.warn("No hashcode supplied, will wrap the reader using system default charset");
}
}
@Override
public Source getSource() throws IOException {
if (!isSupported()) {
throw new IllegalStateException("Unsupported source. Only InputStream-based StreamSource supported yet");
}
if (isWrappingRequired()) {
return wrap();
}
if (isConsumed()) {
throw new IllegalStateException("A StreamSource can only read once");
}
return this.source;
}
private boolean isSupported() {
return isStreamSource();
}
private boolean isConsumed() throws IOException {
if (!isStreamSource()) {
throw new NotImplementedException("Supports only StreamSource yet");
}
final StreamSource ss = (StreamSource) this.source;
try {
return (ss.getInputStream() != null && ss.getInputStream().available() == 0)
|| (ss.getReader() != null && !ss.getReader().ready());
} catch (final IOException e) {
return true;
}
}
private boolean isStreamSource() {
return this.source instanceof StreamSource;
}
private Source wrap() {
Source result = this.source;
if (isStreamSource()) {
final StreamSource ss = (StreamSource) this.source;
if (ss.getInputStream() != null) {
result = new StreamSource(wrap(ss.getInputStream()), this.source.getSystemId());
} else if (ss.getReader() != null) {
result = new StreamSource(wrap(new ReaderInputStream(ss.getReader(), Charset.defaultCharset())), this.source.getSystemId());
}
}
return result;
}
private boolean isWrappingRequired() {
return !isHashcodeComputed();
}
@Override
public boolean supportsMultipleReads() {
return false;
}
}

View file

@ -0,0 +1,95 @@
package de.kosit.validationtool.impl.input;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import org.apache.commons.io.input.CountingInputStream;
/**
* Helper for stream handling.
*
* @author Andreas Penski
*/
public class StreamHelper {
/**
* Helper class, which generates the hashcode while reading the stream e.g. for parsing the document. This allows
* generating the hashcode without an aditional reading step.
*/
private static class DigestingInputStream extends FilterInputStream {
private final MessageDigest digest;
private final LazyReadInput reference;
DigestingInputStream(final LazyReadInput input, final InputStream in, final MessageDigest digest) {
super(new DigestInputStream(in, digest));
this.digest = digest;
this.reference = input;
}
@Override
public void close() throws IOException {
super.close();
this.reference.setHashCode(this.digest.digest());
}
}
private static class CountInputStream extends FilterInputStream {
private final LazyReadInput reference;
public CountInputStream(final LazyReadInput input, final InputStream stream) {
super(new org.apache.commons.io.input.CountingInputStream(stream));
this.reference = input;
}
@Override
public void close() throws IOException {
super.close();
this.reference.setLength(((CountingInputStream) this.in).getByteCount());
}
}
private StreamHelper() {
// hide
}
public static MessageDigest createDigest(final String algorithm) {
try {
final MessageDigest digest;
digest = MessageDigest.getInstance(algorithm);
return digest;
} catch (final NoSuchAlgorithmException e) {
// should not happen
throw new IllegalArgumentException(String.format("Specified method %s is not available", algorithm), e);
}
}
/**
* Wraps the {@link InputStream} with a counting length implementation.
*
* @param input the {@link LazyReadInput input}
* @param stream the stream
* @return a wrapped stream
*/
public static InputStream wrapCount(final LazyReadInput input, final InputStream stream) {
return new CountInputStream(input, stream);
}
/**
* Wraps the {@link InputStream} with an implementation the generates a hash sum over the stream data.
*
* @param input the {@link LazyReadInput input}
* @param stream the stream
* @return a wrapped stream
*/
public static InputStream wrapDigesting(final LazyReadInput input, final InputStream stream, final String digestAlgorithm) {
return new DigestingInputStream(input, stream, createDigest(digestAlgorithm));
}
}