View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   */
19  package org.apache.rat.analysis.matchers;
20  
21  import java.util.Locale;
22  import java.util.Objects;
23  
24  /**
25   * Accumulates all letters and numbers contained inside the header and compares
26   * it to the full text of a given license (after reducing it to letters and
27   * numbers as well).
28   *
29   * <p>
30   * The text comparison is case insensitive but assumes only characters in the
31   * US-ASCII charset are being matched.
32   * </p>
33   */
34  public class FullTextMatcher extends AbstractSimpleMatcher {
35  
36      // Number of match characters assumed to be present on first line
37      private static final int DEFAULT_INITIAL_LINE_LENGTH = 20;
38  
39      private final String fullText;
40  
41      private final String firstLine;
42  
43      private boolean seenFirstLine;
44  
45      private final StringBuilder buffer = new StringBuilder();
46  
47      /**
48       * Constructs the full text matcher with a unique random id and the specified text to match.
49       * @param fullText the text to match
50       */
51      public FullTextMatcher(String fullText) {
52          this(null, fullText);
53      }
54  
55      /**
56       * Constructs the full text matcher for the specified text.
57       * @param id the id for the matcher
58       * @param fullText the text to match
59       */
60      public FullTextMatcher(String id, String fullText) {
61          super(id);
62          Objects.requireNonNull(fullText, "fullText may not be null");
63          int offset = fullText.indexOf('\n');
64          if (offset == -1) {
65              offset = Math.min(DEFAULT_INITIAL_LINE_LENGTH, fullText.length());
66          }
67          firstLine = prune(fullText.substring(0, offset)).toLowerCase(Locale.ENGLISH);
68          this.fullText = prune(fullText).toLowerCase(Locale.ENGLISH);
69          buffer.setLength(0);
70          seenFirstLine = false;
71      }
72  
73      /**
74       * Removes everything except letter or digit from text.
75       * 
76       * @param text The text to remove extra chars from.
77       * @return the pruned text.
78       */
79      public static String prune(String text) {
80          final int length = text.length();
81          final StringBuilder buffer = new StringBuilder(length);
82          for (int i = 0; i < length; i++) {
83              char at = text.charAt(i);
84              if (Character.isLetterOrDigit(at)) {
85                  buffer.append(at);
86              }
87          }
88          return buffer.toString();
89      }
90  
91      @Override
92      public boolean doMatch(String line) {
93          final String inputToMatch = prune(line).toLowerCase(Locale.ENGLISH);
94          if (seenFirstLine) { // Accumulate more input
95              buffer.append(inputToMatch);
96          } else {
97              int offset = inputToMatch.indexOf(firstLine);
98              if (offset >= 0) {
99                  // we have a match, save the text starting with the match
100                 buffer.append(inputToMatch.substring(offset));
101                 seenFirstLine = true;
102                 // Drop out to check whether full text is matched
103             } else {
104                 // we assume that the first line must appear in a single line
105                 return false; // no more to do here
106             }
107         }
108 
109         if (buffer.length() >= fullText.length()) { // we have enough data to match
110             if (buffer.toString().contains(fullText)) {
111                 return true;
112             }
113             // buffer contains first line but does not contain full text
114             // It's possible that the buffer contains the first line again
115             int offset = buffer.substring(1).indexOf(firstLine);
116             if (offset >= 0) { // first line found again
117                 buffer.delete(0, offset); // reset buffer to the new start
118             } else { // buffer does not even contain first line, so cannot be used to match full text
119                 reset();
120             }
121         }
122         return false;
123     }
124 
125     @Override
126     public void reset() {
127         super.reset();
128         buffer.setLength(0);
129         seenFirstLine = false;
130     }
131 
132 }