Document.java
/*
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
*/
package org.apache.rat.api;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.SortedSet;
import org.apache.rat.analysis.TikaProcessor;
import org.apache.rat.document.DocumentName;
import org.apache.rat.document.DocumentNameMatcher;
import org.apache.tika.parser.txt.CharsetDetector;
/**
* The representation of a document being scanned.
*/
public abstract class Document implements Comparable<Document> {
/**
* An enumeration of document types.
*/
public enum Type {
/** An archive type document. */
ARCHIVE,
/** A binary file. */
BINARY,
/** A generated document that is ignored. */
IGNORED,
/** A notice document (e.g. LICENSE file). */
NOTICE,
/** A standard document. */
STANDARD,
/** An unknown document type. */
UNKNOWN
}
/** The path matcher used by this document */
protected final DocumentNameMatcher nameMatcher;
/** The metadata for this document */
private final MetaData metaData;
/** The fully qualified name of this document */
protected final DocumentName name;
/**
* Creates an instance.
* @param name the native NameSet of the resource.
* @param nameMatcher the document name matcher to filter directories/files.
*/
protected Document(final DocumentName name, final DocumentNameMatcher nameMatcher) {
this.name = name;
this.nameMatcher = nameMatcher;
this.metaData = new MetaData();
}
/**
* Gets the name of the current document.
* @return the name of the current document.
*/
public final DocumentName getName() {
return name;
}
/**
* Gets the file filter this document was created with.
* @return the file filter this document was created with.
*/
public final DocumentNameMatcher getNameMatcher() {
return nameMatcher;
}
@Override
public int compareTo(final Document doc) {
return name.compareTo(doc.name);
}
@Override
public int hashCode() {
return name.hashCode();
}
@Override
public boolean equals(final Object obj) {
if (!(obj instanceof Document)) {
return false;
}
return name.equals(((Document) obj).name);
}
/**
* Reads the contents of this document.
* @return <code>Reader</code> not null
* @throws IOException if this document cannot be read.
*/
public Reader reader() throws IOException {
final int bytesForCharsetDetection = 256;
CharsetDetector charsetDetector = new CharsetDetector(bytesForCharsetDetection);
// RAT-494: Tika's CharsetDetector.getReader() may return null if the read can not be constructed due to I/O or encoding errors
Reader result = charsetDetector.getReader(TikaProcessor.markSupportedInputStream(inputStream()), getMetaData().getCharset().name());
if (result == null) {
throw new IOException(String.format("Can not read document `%s`", getName()));
}
return result;
}
/**
* Streams the document's contents.
* @return a non-null input stream of the document.
* @throws IOException when stream could not be opened.
*/
public abstract InputStream inputStream() throws IOException;
/**
* Gets data describing this resource.
* @return a non-null MetaData object.
*/
public final MetaData getMetaData() {
return metaData;
}
/**
* Checks if document is ignored or not.
* @return {@code true} if the document is of type {@code IGNORED}.
*/
public final boolean isIgnored() {
return Type.IGNORED == metaData.getDocumentType();
}
/**
* Representations suitable for logging.
* @return a <code>String</code> representation
* of this object.
*/
@Override
public String toString() {
return String.format("%s( name = %s metaData = %s )", this.getClass().getSimpleName(), getName().localized(), getMetaData());
}
/**
* Determines if this document is a directory type.
* @return {@code true} if this is a directory.
*/
public abstract boolean isDirectory();
/**
* Gets a sorted set of documents that are children of this document.
* @return A sorted set of child Documents. May be empty.
*/
public abstract SortedSet<Document> listChildren();
}