View Javadoc
1   package org.apache.rat.analysis;
2   /*
3    * Licensed to the Apache Software Foundation (ASF) under one   *
4    * or more contributor license agreements.  See the NOTICE file *
5    * distributed with this work for additional information        *
6    * regarding copyright ownership.  The ASF licenses this file   *
7    * to you under the Apache License, Version 2.0 (the            *
8    * "License"); you may not use this file except in compliance   *
9    * with the License.  You may obtain a copy of the License at   *
10   *                                                              *
11   *   http://www.apache.org/licenses/LICENSE-2.0                 *
12   *                                                              *
13   * Unless required by applicable law or agreed to in writing,   *
14   * software distributed under the License is distributed on an  *
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
16   * KIND, either express or implied.  See the License for the    *
17   * specific language governing permissions and limitations      *
18   * under the License.                                           *
19   */
20  
21  import java.io.BufferedReader;
22  import java.io.IOException;
23  import java.io.Reader;
24  import java.util.Collection;
25  import java.util.Locale;
26  import java.util.Objects;
27  
28  import org.apache.rat.ConfigurationException;
29  import org.apache.rat.analysis.matchers.FullTextMatcher;
30  import org.apache.rat.api.Document;
31  import org.apache.rat.license.ILicense;
32  
33  /**
34   * Reads from a stream to check license.
35   * <p>
36   * <strong>Note</strong> that this class is not thread safe.
37   * </p>
38   */
39  public class HeaderCheckWorker {
40  
41      /*
42       * TODO revisit this class. It is only used in one place and can be moved inline
43       * as the DocumentHeaderAnalyser states. However, it may also be possible to
44       * make the entire set thread safe so that multiple files can be checked
45       * simultaneously.
46       */
47      /**
48       * The default number of header lines to read while looking for the license
49       * information.
50       */
51      public static final int DEFAULT_NUMBER_OF_RETAINED_HEADER_LINES = 50;
52      /** The number of header lines to retain for processing */
53      private final int numberOfRetainedHeaderLines;
54      /** The BufferedReader used to read the lines */
55      private final BufferedReader reader;
56      /** The licenses to check for match */
57      private final Collection<ILicense> licenses;
58      /** The document being processed */
59      private final Document document;
60      /**  The matcher for generated headers */
61      private final IHeaderMatcher generatedMatcher;
62  
63  
64      /**
65       * Read the input and perform the header check.
66       * <p>
67       * The number of lines indicates how many lines from the top of the file will be read for processing
68       *
69       * @param reader The reader for the document.
70       * @param numberOfLines the number of lines to read from the header.
71       * @return The IHeaders instance for the header.
72       * @throws IOException on input failure
73       */
74      public static IHeaders readHeader(final BufferedReader reader, final int numberOfLines) throws IOException {
75          final StringBuilder headers = new StringBuilder();
76          int headerLinesRead = 0;
77          String line;
78  
79          while (headerLinesRead < numberOfLines && (line = reader.readLine()) != null) {
80              headers.append(line).append(System.lineSeparator());
81          }
82          final String raw = headers.toString();
83          final String pruned = FullTextMatcher.prune(raw).toLowerCase(Locale.ENGLISH);
84          return new IHeaders() {
85              @Override
86              public String raw() {
87                  return raw;
88              }
89  
90              @Override
91              public String pruned() {
92                  return pruned;
93              }
94  
95              @Override
96              public String toString() {
97                  return this.getClass().getSimpleName();
98              }
99          };
100     }
101 
102     /**
103      * Convenience constructor wraps given <code>Reader</code> in a
104      * <code>BufferedReader</code>.
105      *
106      * @param generatedMatcher The matcher for generated headers.
107      * @param reader The reader on the document. Not null.
108      * @param licenses The licenses to check against. Not null.
109      * @param name The document that is being checked. Possibly null.
110      */
111     public HeaderCheckWorker(final IHeaderMatcher generatedMatcher, final Reader reader, final Collection<ILicense> licenses, final Document name) {
112         this(generatedMatcher, reader, DEFAULT_NUMBER_OF_RETAINED_HEADER_LINES, licenses, name);
113     }
114 
115     /**
116      * Constructs a check worker for the license against the specified document.
117      *
118      * @param generatedMatcher The matcher for generated headers.
119      * @param reader The reader on the document. Not null.
120      * @param numberOfRetainedHeaderLine the maximum number of lines to read to find
121      * the license information.
122      * @param licenses The licenses to check against. Not null.
123      * @param document The document that is being checked. Possibly null.
124      */
125     public HeaderCheckWorker(final IHeaderMatcher generatedMatcher, final Reader reader,
126                              final int numberOfRetainedHeaderLine, final Collection<ILicense> licenses,
127                              final Document document) {
128         Objects.requireNonNull(reader, "Reader may not be null");
129         Objects.requireNonNull(licenses, "Licenses may not be null");
130         if (numberOfRetainedHeaderLine < 0) {
131             throw new ConfigurationException("numberOfRetainedHeaderLine may not be less than zero");
132         }
133         this.reader = reader instanceof BufferedReader ? (BufferedReader) reader : new BufferedReader(reader);
134         this.numberOfRetainedHeaderLines = numberOfRetainedHeaderLine;
135         this.licenses = licenses;
136         this.document = document;
137         this.generatedMatcher = generatedMatcher;
138     }
139 
140     /**
141      * Read the input and perform the header check.
142      *
143      * @throws RatHeaderAnalysisException on IO exception.
144      */
145     public void read() throws RatHeaderAnalysisException {
146         try {
147             final IHeaders headers = readHeader(reader, numberOfRetainedHeaderLines);
148             if (generatedMatcher.matches(headers)) {
149                 document.getMetaData().setDocumentType(Document.Type.IGNORED);
150             } else {
151                 licenses.stream().filter(lic -> lic.matches(headers)).forEach(document.getMetaData()::reportOnLicense);
152                 if (!document.getMetaData().detectedLicense()) {
153                     document.getMetaData().reportOnLicense(UnknownLicense.INSTANCE);
154                 }
155             }
156         } catch (IOException e) {
157             throw new RatHeaderAnalysisException("Cannot read header for " + document, e);
158         } finally {
159             licenses.forEach(ILicense::reset);
160         }
161     }
162 }