mirror of
https://github.com/itplr-kosit/validator.git
synced 2026-05-26 01:05:38 +00:00
#39 The supplied Input unnecessarily is not written into memory
This commit is contained in:
parent
d7ee019194
commit
efd4fd5fff
63 changed files with 1111 additions and 18196 deletions
|
|
@ -0,0 +1,57 @@
|
|||
package de.kosit.validationtool.impl.input;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import de.kosit.validationtool.api.Input;
|
||||
|
||||
/**
|
||||
* Base class for all {@link Input Inputs}.
|
||||
*
|
||||
* @author Andreas Penski
|
||||
*/
|
||||
@Slf4j
|
||||
public abstract class AbstractInput implements Input, LazyReadInput {
|
||||
|
||||
private byte[] hashCode;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
private long length;
|
||||
|
||||
@Override
|
||||
public byte[] getHashCode() {
|
||||
if (this.hashCode == null) {
|
||||
throw new IllegalStateException("Hashcode is not computed yet");
|
||||
}
|
||||
return this.hashCode;
|
||||
}
|
||||
|
||||
protected InputStream wrap(final InputStream stream) {
|
||||
InputStream result = stream;
|
||||
if (!isHashcodeComputed()) {
|
||||
result = StreamHelper.wrapDigesting(this, result, getDigestAlgorithm());
|
||||
}
|
||||
if (getLength() == 0) {
|
||||
result = StreamHelper.wrapCount(this, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHashcodeComputed() {
|
||||
return this.hashCode != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setHashCode(final byte[] digest) {
|
||||
this.hashCode = digest;
|
||||
}
|
||||
|
||||
public boolean supportsMultipleReads() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
package de.kosit.validationtool.impl.input;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
import javax.xml.transform.Source;
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
/**
|
||||
* Classical in-memory {@link de.kosit.validationtool.api.Input}. It is not memory efficient to read the whole file into
|
||||
* memory prio validating. Consider using the {@link ResourceInput}.
|
||||
*
|
||||
* @author Andreas Penski
|
||||
*/
|
||||
@Getter
|
||||
@AllArgsConstructor
|
||||
public class ByteArrayInput extends AbstractInput {
|
||||
|
||||
private final byte[] content;
|
||||
|
||||
private final String name;
|
||||
|
||||
private final String digestAlgorithm;
|
||||
|
||||
@Override
|
||||
public long getLength() {
|
||||
return this.content != null ? this.content.length : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source getSource() {
|
||||
final InputStream stream = wrap(new ByteArrayInputStream(this.content));
|
||||
return new StreamSource(stream, getName());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
package de.kosit.validationtool.impl.input;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
import de.kosit.validationtool.api.Input;
|
||||
|
||||
/**
|
||||
* Internal interface used for lazy generation of the hashcode for document identification.
|
||||
*
|
||||
* @see StreamHelper#wrapDigesting(LazyReadInput, InputStream, String) for details
|
||||
* @author Andreas Penski
|
||||
*/
|
||||
interface LazyReadInput {
|
||||
|
||||
/**
|
||||
* Sets a hashcode
|
||||
*
|
||||
* @param digest the digest
|
||||
*/
|
||||
void setHashCode(byte[] digest);
|
||||
|
||||
/**
|
||||
* Determines whether a hashcode has been computed yet
|
||||
*
|
||||
* @return true when computed
|
||||
*/
|
||||
boolean isHashcodeComputed();
|
||||
|
||||
/**
|
||||
* Setting the length of the {@link Input}.
|
||||
*
|
||||
* @param length the length
|
||||
*/
|
||||
void setLength(long length);
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
package de.kosit.validationtool.impl.input;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
|
||||
import javax.xml.transform.Source;
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
import de.kosit.validationtool.api.Input;
|
||||
|
||||
/**
|
||||
* An {@link Input} carries an {@link URL} which can be used for all 'locatable' inputs such as {@link File},
|
||||
* {@link java.nio.file.Path} and any other {@link URL}.
|
||||
*
|
||||
* This stream is NOT read into memory. So this implementation has good in memory efficieny. The validation process MAY
|
||||
* read the stream more than once. Make sure, that the {@link URL} points to fast I/O devices
|
||||
*
|
||||
* @author Andreas Penski
|
||||
*/
|
||||
@Getter
|
||||
@RequiredArgsConstructor
|
||||
public class ResourceInput extends AbstractInput {
|
||||
|
||||
private final URL url;
|
||||
|
||||
private final String name;
|
||||
|
||||
private final String digestAlgorithm;
|
||||
|
||||
@Override
|
||||
public Source getSource() throws IOException {
|
||||
InputStream stream = this.url.openStream();
|
||||
if (!isHashcodeComputed()) {
|
||||
stream = StreamHelper.wrapDigesting(this, stream, getDigestAlgorithm());
|
||||
}
|
||||
return new StreamSource(stream, this.name);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
package de.kosit.validationtool.impl.input;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import javax.xml.transform.Source;
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
|
||||
import org.apache.commons.io.input.ReaderInputStream;
|
||||
import org.apache.commons.lang3.NotImplementedException;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* A validator {@link de.kosit.validationtool.api.Input} based an on a {@link Source}.
|
||||
*
|
||||
* @author Andreas Penski
|
||||
*/
|
||||
@Getter
|
||||
@Slf4j
|
||||
public class SourceInput extends AbstractInput {
|
||||
|
||||
private final Source source;
|
||||
|
||||
private final String name;
|
||||
|
||||
private final String digestAlgorithm;
|
||||
|
||||
public SourceInput(final StreamSource source, final String name, final String digestAlgorithm) {
|
||||
this(source, name, digestAlgorithm, null);
|
||||
}
|
||||
|
||||
public SourceInput(final Source source, final String name, final String digestAlgorithm, final byte[] hashCode) {
|
||||
this.source = source;
|
||||
this.name = name;
|
||||
this.digestAlgorithm = digestAlgorithm;
|
||||
setHashCode(hashCode);
|
||||
validate();
|
||||
}
|
||||
|
||||
private void validate() {
|
||||
if (!isSupported()) {
|
||||
throw new IllegalStateException("Unsupported source. Only StreamSource supported yet");
|
||||
}
|
||||
if (((StreamSource) this.source).getInputStream() == null && !isHashcodeComputed()) {
|
||||
log.warn("No hashcode supplied, will wrap the reader using system default charset");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source getSource() throws IOException {
|
||||
if (!isSupported()) {
|
||||
throw new IllegalStateException("Unsupported source. Only InputStream-based StreamSource supported yet");
|
||||
}
|
||||
if (isWrappingRequired()) {
|
||||
return wrap();
|
||||
}
|
||||
if (isConsumed()) {
|
||||
throw new IllegalStateException("A StreamSource can only read once");
|
||||
}
|
||||
return this.source;
|
||||
}
|
||||
|
||||
private boolean isSupported() {
|
||||
return isStreamSource();
|
||||
}
|
||||
|
||||
private boolean isConsumed() throws IOException {
|
||||
if (!isStreamSource()) {
|
||||
throw new NotImplementedException("Supports only StreamSource yet");
|
||||
}
|
||||
final StreamSource ss = (StreamSource) this.source;
|
||||
try {
|
||||
return (ss.getInputStream() != null && ss.getInputStream().available() == 0)
|
||||
|| (ss.getReader() != null && !ss.getReader().ready());
|
||||
} catch (final IOException e) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isStreamSource() {
|
||||
return this.source instanceof StreamSource;
|
||||
}
|
||||
|
||||
private Source wrap() {
|
||||
Source result = this.source;
|
||||
if (isStreamSource()) {
|
||||
final StreamSource ss = (StreamSource) this.source;
|
||||
if (ss.getInputStream() != null) {
|
||||
result = new StreamSource(wrap(ss.getInputStream()), this.source.getSystemId());
|
||||
} else if (ss.getReader() != null) {
|
||||
result = new StreamSource(wrap(new ReaderInputStream(ss.getReader(), Charset.defaultCharset())), this.source.getSystemId());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private boolean isWrappingRequired() {
|
||||
return !isHashcodeComputed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supportsMultipleReads() {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
package de.kosit.validationtool.impl.input;
|
||||
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.security.DigestInputStream;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
|
||||
import org.apache.commons.io.input.CountingInputStream;
|
||||
|
||||
/**
|
||||
* Helper for stream handling.
|
||||
*
|
||||
* @author Andreas Penski
|
||||
*/
|
||||
public class StreamHelper {
|
||||
|
||||
/**
|
||||
* Helper class, which generates the hashcode while reading the stream e.g. for parsing the document. This allows
|
||||
* generating the hashcode without an aditional reading step.
|
||||
*/
|
||||
private static class DigestingInputStream extends FilterInputStream {
|
||||
|
||||
private final MessageDigest digest;
|
||||
|
||||
private final LazyReadInput reference;
|
||||
|
||||
DigestingInputStream(final LazyReadInput input, final InputStream in, final MessageDigest digest) {
|
||||
super(new DigestInputStream(in, digest));
|
||||
this.digest = digest;
|
||||
this.reference = input;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
this.reference.setHashCode(this.digest.digest());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class CountInputStream extends FilterInputStream {
|
||||
|
||||
private final LazyReadInput reference;
|
||||
|
||||
public CountInputStream(final LazyReadInput input, final InputStream stream) {
|
||||
super(new org.apache.commons.io.input.CountingInputStream(stream));
|
||||
this.reference = input;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
this.reference.setLength(((CountingInputStream) this.in).getByteCount());
|
||||
}
|
||||
}
|
||||
|
||||
private StreamHelper() {
|
||||
// hide
|
||||
}
|
||||
|
||||
public static MessageDigest createDigest(final String algorithm) {
|
||||
try {
|
||||
final MessageDigest digest;
|
||||
digest = MessageDigest.getInstance(algorithm);
|
||||
return digest;
|
||||
} catch (final NoSuchAlgorithmException e) {
|
||||
// should not happen
|
||||
throw new IllegalArgumentException(String.format("Specified method %s is not available", algorithm), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps the {@link InputStream} with a counting length implementation.
|
||||
*
|
||||
* @param input the {@link LazyReadInput input}
|
||||
* @param stream the stream
|
||||
* @return a wrapped stream
|
||||
*/
|
||||
public static InputStream wrapCount(final LazyReadInput input, final InputStream stream) {
|
||||
return new CountInputStream(input, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps the {@link InputStream} with an implementation the generates a hash sum over the stream data.
|
||||
*
|
||||
* @param input the {@link LazyReadInput input}
|
||||
* @param stream the stream
|
||||
* @return a wrapped stream
|
||||
*/
|
||||
public static InputStream wrapDigesting(final LazyReadInput input, final InputStream stream, final String digestAlgorithm) {
|
||||
return new DigestingInputStream(input, stream, createDigest(digestAlgorithm));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue