HeaderCheckWorker.java
package org.apache.rat.analysis;
/*
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
*/
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Collection;
import java.util.Locale;
import java.util.Objects;
import org.apache.rat.ConfigurationException;
import org.apache.rat.analysis.matchers.FullTextMatcher;
import org.apache.rat.api.Document;
import org.apache.rat.license.ILicense;
/**
* Reads from a stream to check license.
* <p>
* <strong>Note</strong> that this class is not thread safe.
* </p>
*/
public class HeaderCheckWorker {
/*
* TODO revisit this class. It is only used in one place and can be moved inline
* as the DocumentHeaderAnalyser states. However, it may also be possible to
* make the entire set thread safe so that multiple files can be checked
* simultaneously.
*/
/**
* The default number of header lines to read while looking for the license
* information.
*/
public static final int DEFAULT_NUMBER_OF_RETAINED_HEADER_LINES = 50;
/** The number of header lines to retain for processing */
private final int numberOfRetainedHeaderLines;
/** The BufferedReader used to read the lines */
private final BufferedReader reader;
/** The licenses to check for match */
private final Collection<ILicense> licenses;
/** The document being processed */
private final Document document;
/** The matcher for generated headers */
private final IHeaderMatcher generatedMatcher;
/**
* Read the input and perform the header check.
* <p>
* The number of lines indicates how many lines from the top of the file will be read for processing
*
* @param reader The reader for the document.
* @param numberOfLines the number of lines to read from the header.
* @return The IHeaders instance for the header.
* @throws IOException on input failure
*/
public static IHeaders readHeader(final BufferedReader reader, final int numberOfLines) throws IOException {
final StringBuilder headers = new StringBuilder();
int headerLinesRead = 0;
String line;
while (headerLinesRead < numberOfLines && (line = reader.readLine()) != null) {
headers.append(line).append(System.lineSeparator());
}
final String raw = headers.toString();
final String pruned = FullTextMatcher.prune(raw).toLowerCase(Locale.ENGLISH);
return new IHeaders() {
@Override
public String raw() {
return raw;
}
@Override
public String pruned() {
return pruned;
}
@Override
public String toString() {
return this.getClass().getSimpleName();
}
};
}
/**
* Convenience constructor wraps given <code>Reader</code> in a
* <code>BufferedReader</code>.
*
* @param generatedMatcher The matcher for generated headers.
* @param reader The reader on the document. Not null.
* @param licenses The licenses to check against. Not null.
* @param name The document that is being checked. Possibly null.
*/
public HeaderCheckWorker(final IHeaderMatcher generatedMatcher, final Reader reader, final Collection<ILicense> licenses, final Document name) {
this(generatedMatcher, reader, DEFAULT_NUMBER_OF_RETAINED_HEADER_LINES, licenses, name);
}
/**
* Constructs a check worker for the license against the specified document.
*
* @param generatedMatcher The matcher for generated headers.
* @param reader The reader on the document. Not null.
* @param numberOfRetainedHeaderLine the maximum number of lines to read to find
* the license information.
* @param licenses The licenses to check against. Not null.
* @param document The document that is being checked. Possibly null.
*/
public HeaderCheckWorker(final IHeaderMatcher generatedMatcher, final Reader reader,
final int numberOfRetainedHeaderLine, final Collection<ILicense> licenses,
final Document document) {
Objects.requireNonNull(reader, "Reader may not be null");
Objects.requireNonNull(licenses, "Licenses may not be null");
if (numberOfRetainedHeaderLine < 0) {
throw new ConfigurationException("numberOfRetainedHeaderLine may not be less than zero");
}
this.reader = reader instanceof BufferedReader ? (BufferedReader) reader : new BufferedReader(reader);
this.numberOfRetainedHeaderLines = numberOfRetainedHeaderLine;
this.licenses = licenses;
this.document = document;
this.generatedMatcher = generatedMatcher;
}
/**
* Read the input and perform the header check.
*
* @throws RatHeaderAnalysisException on IO exception.
*/
public void read() throws RatHeaderAnalysisException {
try {
final IHeaders headers = readHeader(reader, numberOfRetainedHeaderLines);
if (generatedMatcher.matches(headers)) {
document.getMetaData().setDocumentType(Document.Type.IGNORED);
} else {
licenses.stream().filter(lic -> lic.matches(headers)).forEach(document.getMetaData()::reportOnLicense);
if (!document.getMetaData().detectedLicense()) {
document.getMetaData().reportOnLicense(UnknownLicense.INSTANCE);
}
}
} catch (IOException e) {
throw new RatHeaderAnalysisException("Cannot read header for " + document, e);
} finally {
licenses.forEach(ILicense::reset);
}
}
}