1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one *
3 * or more contributor license agreements. See the NOTICE file *
4 * distributed with this work for additional information *
5 * regarding copyright ownership. The ASF licenses this file *
6 * to you under the Apache License, Version 2.0 (the *
7 * "License"); you may not use this file except in compliance *
8 * with the License. You may obtain a copy of the License at *
9 * *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, *
13 * software distributed under the License is distributed on an *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
15 * KIND, either express or implied. See the License for the *
16 * specific language governing permissions and limitations *
17 * under the License. *
18 */
19 package org.apache.rat.analysis.matchers;
20
21 import java.util.Locale;
22 import java.util.Objects;
23
24 /**
25 * Accumulates all letters and numbers contained inside the header and compares
26 * it to the full text of a given license (after reducing it to letters and
27 * numbers as well).
28 *
29 * <p>
30 * The text comparison is case insensitive but assumes only characters in the
31 * US-ASCII charset are being matched.
32 * </p>
33 */
34 public class FullTextMatcher extends AbstractSimpleMatcher {
35
36 // Number of match characters assumed to be present on first line
37 private static final int DEFAULT_INITIAL_LINE_LENGTH = 20;
38
39 private final String fullText;
40
41 private final String firstLine;
42
43 private boolean seenFirstLine;
44
45 private final StringBuilder buffer = new StringBuilder();
46
47 /**
48 * Constructs the full text matcher with a unique random id and the specified text to match.
49 * @param fullText the text to match
50 */
51 public FullTextMatcher(String fullText) {
52 this(null, fullText);
53 }
54
55 /**
56 * Constructs the full text matcher for the specified text.
57 * @param id the id for the matcher
58 * @param fullText the text to match
59 */
60 public FullTextMatcher(String id, String fullText) {
61 super(id);
62 Objects.requireNonNull(fullText, "fullText may not be null");
63 int offset = fullText.indexOf('\n');
64 if (offset == -1) {
65 offset = Math.min(DEFAULT_INITIAL_LINE_LENGTH, fullText.length());
66 }
67 firstLine = prune(fullText.substring(0, offset)).toLowerCase(Locale.ENGLISH);
68 this.fullText = prune(fullText).toLowerCase(Locale.ENGLISH);
69 buffer.setLength(0);
70 seenFirstLine = false;
71 }
72
73 /**
74 * Removes everything except letter or digit from text.
75 *
76 * @param text The text to remove extra chars from.
77 * @return the pruned text.
78 */
79 public static String prune(String text) {
80 final int length = text.length();
81 final StringBuilder buffer = new StringBuilder(length);
82 for (int i = 0; i < length; i++) {
83 char at = text.charAt(i);
84 if (Character.isLetterOrDigit(at)) {
85 buffer.append(at);
86 }
87 }
88 return buffer.toString();
89 }
90
91 @Override
92 public boolean doMatch(String line) {
93 final String inputToMatch = prune(line).toLowerCase(Locale.ENGLISH);
94 if (seenFirstLine) { // Accumulate more input
95 buffer.append(inputToMatch);
96 } else {
97 int offset = inputToMatch.indexOf(firstLine);
98 if (offset >= 0) {
99 // we have a match, save the text starting with the match
100 buffer.append(inputToMatch.substring(offset));
101 seenFirstLine = true;
102 // Drop out to check whether full text is matched
103 } else {
104 // we assume that the first line must appear in a single line
105 return false; // no more to do here
106 }
107 }
108
109 if (buffer.length() >= fullText.length()) { // we have enough data to match
110 if (buffer.toString().contains(fullText)) {
111 return true;
112 }
113 // buffer contains first line but does not contain full text
114 // It's possible that the buffer contains the first line again
115 int offset = buffer.substring(1).indexOf(firstLine);
116 if (offset >= 0) { // first line found again
117 buffer.delete(0, offset); // reset buffer to the new start
118 } else { // buffer does not even contain first line, so cannot be used to match full text
119 reset();
120 }
121 }
122 return false;
123 }
124
125 @Override
126 public void reset() {
127 super.reset();
128 buffer.setLength(0);
129 seenFirstLine = false;
130 }
131
132 }