View Javadoc
1   package org.apache.rat.analysis;
2   /*
3    * Licensed to the Apache Software Foundation (ASF) under one   *
4    * or more contributor license agreements.  See the NOTICE file *
5    * distributed with this work for additional information        *
6    * regarding copyright ownership.  The ASF licenses this file   *
7    * to you under the Apache License, Version 2.0 (the            *
8    * "License"); you may not use this file except in compliance   *
9    * with the License.  You may obtain a copy of the License at   *
10   *                                                              *
11   *   http://www.apache.org/licenses/LICENSE-2.0                 *
12   *                                                              *
13   * Unless required by applicable law or agreed to in writing,   *
14   * software distributed under the License is distributed on an  *
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
16   * KIND, either express or implied.  See the License for the    *
17   * specific language governing permissions and limitations      *
18   * under the License.                                           *
19   */
20  
21  import java.io.BufferedReader;
22  import java.io.IOException;
23  import java.io.Reader;
24  import java.util.Collection;
25  import java.util.Locale;
26  import java.util.Objects;
27  
28  import org.apache.rat.ConfigurationException;
29  import org.apache.rat.analysis.matchers.FullTextMatcher;
30  import org.apache.rat.api.Document;
31  import org.apache.rat.license.ILicense;
32  import org.apache.rat.license.ILicenseFamily;
33  
34  /**
35   * Reads from a stream to check license.
36   * <p>
37   * <strong>Note</strong> that this class is not thread safe.
38   * </p>
39   */
40  public class HeaderCheckWorker {
41  
42      /*
43       * TODO revisit this class. It is only used in one place and can be moved inline
44       * as the DocumentHeaderAnalyser states. However, it may also be possible to
45       * make the entire set threadsafe so that multiple files can be checked
46       * simultaneously.
47       */
48      /**
49       * The default number of header lines to read while looking for the license
50       * information.
51       */
52      public static final int DEFAULT_NUMBER_OF_RETAINED_HEADER_LINES = 50;
53  
54      private final int numberOfRetainedHeaderLines;
55      private final BufferedReader reader;
56      private final Collection<ILicense> licenses;
57      private final Document document;
58  
59      /**
60       * Read the input and perform the header check.
61       *
62       * The number of lines indicates how many lines from the top of the file will be read for processing
63       * 
64       * @param reader The reader for the document.
65       * @param numberOfLines the number of lines to read from the header.
66       * @return The IHeaders instance for the header.  
67       * @throws IOException on input failure
68       */
69      public static IHeaders readHeader(BufferedReader reader, int numberOfLines) throws IOException {
70          final StringBuilder headers = new StringBuilder();
71          int headerLinesRead = 0;
72          String line;
73  
74          while (headerLinesRead < numberOfLines && (line = reader.readLine()) != null) {
75              headers.append(line).append(System.lineSeparator());
76          }
77          final String raw = headers.toString();
78          final String pruned = FullTextMatcher.prune(raw).toLowerCase(Locale.ENGLISH);
79          return new IHeaders() {
80              @Override
81              public String raw() {
82                  return raw;
83              }
84  
85              @Override
86              public String pruned() {
87                  return pruned;
88              }
89  
90              @Override
91              public String toString() {
92                  return this.getClass().getSimpleName();
93              }
94          };
95      }
96  
97      /**
98       * Convenience constructor wraps given <code>Reader</code> in a
99       * <code>BufferedReader</code>.
100      *
101      * @param reader The reader on the document. not null.
102      * @param licenses The licenses to check against. not null.
103      * @param name The document that is being checked. possibly null
104      */
105     public HeaderCheckWorker(Reader reader, final Collection<ILicense> licenses, final Document name) {
106         this(reader, DEFAULT_NUMBER_OF_RETAINED_HEADER_LINES, licenses, name);
107     }
108 
109     /**
110      * Constructs a check worker for the license against the specified document.
111      *
112      * @param reader The reader on the document. not null.
113      * @param numberOfRetainedHeaderLine the maximum number of lines to read to find
114      * the license information.
115      * @param licenses The licenses to check against. not null.
116      * @param document The document that is being checked. possibly null
117      */
118     public HeaderCheckWorker(Reader reader, int numberOfRetainedHeaderLine, final Collection<ILicense> licenses,
119             final Document document) {
120         Objects.requireNonNull(reader, "Reader may not be null");
121         Objects.requireNonNull(licenses, "Licenses may not be null");
122         if (numberOfRetainedHeaderLine < 0) {
123             throw new ConfigurationException("numberOfRetainedHeaderLine may not be less than zero");
124         }
125         this.reader = reader instanceof BufferedReader ? (BufferedReader) reader : new BufferedReader(reader);
126         this.numberOfRetainedHeaderLines = numberOfRetainedHeaderLine;
127         this.licenses = licenses;
128         this.document = document;
129     }
130 
131     /**
132      * Read the input and perform the header check.
133      *
134      * @throws RatHeaderAnalysisException on IO Exception.
135      */
136     public void read() throws RatHeaderAnalysisException {
137         try {
138             final IHeaders headers = readHeader(reader, numberOfRetainedHeaderLines);
139             licenses.stream().filter(lic -> lic.matches(headers)).forEach(document.getMetaData()::reportOnLicense);
140             if (document.getMetaData().detectedLicense()) {
141                 if (document.getMetaData().licenses().anyMatch(
142                         lic -> ILicenseFamily.GENTERATED_CATEGORY.equals(lic.getLicenseFamily().getFamilyCategory()))) {
143                     document.getMetaData().setDocumentType(Document.Type.GENERATED);
144                 }
145             } else {
146                 document.getMetaData().reportOnLicense(UnknownLicense.INSTANCE);
147                 document.getMetaData().setSampleHeader(headers.raw());
148             }
149         } catch (IOException e) {
150             throw new RatHeaderAnalysisException("Cannot read header for " + document, e);
151         } finally {
152             licenses.forEach(ILicense::reset);
153         }
154     }
155 }