1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one *
3 * or more contributor license agreements. See the NOTICE file *
4 * distributed with this work for additional information *
5 * regarding copyright ownership. The ASF licenses this file *
6 * to you under the Apache License, Version 2.0 (the *
7 * "License"); you may not use this file except in compliance *
8 * with the License. You may obtain a copy of the License at *
9 * *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, *
13 * software distributed under the License is distributed on an *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
15 * KIND, either express or implied. See the License for the *
16 * specific language governing permissions and limitations *
17 * under the License. *
18 */
19 package org.apache.rat.analysis.matchers;
20
21 import java.util.HashMap;
22 import java.util.HashSet;
23 import java.util.Map;
24 import java.util.Objects;
25 import java.util.Set;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28
29 import org.apache.commons.lang3.StringUtils;
30 import org.apache.rat.ConfigurationException;
31 import org.apache.rat.analysis.IHeaders;
32 import org.apache.rat.config.parameters.ComponentType;
33 import org.apache.rat.config.parameters.ConfigComponent;
34
35 /**
36 * Defines a factory to produce matchers for an SPDX tag. SPDX tag is of the
37 * format {@code SPDX-License-Identifier: short-name} where {@code short-name}
38 * matches the regex pattern [A-Za-z0-9\.\-]+
39 * <p>
40 * SPDX identifiers are specified by the Software Package Data Exchange(R) also
41 * known as SPDX(R) project from the Linux foundation.
42 * </p>
43 *
44 * @see <a href="https://spdx.dev/ids/">List of Ids at spdx.dev</a>
45 */
46 public final class SPDXMatcherFactory {
47
48 /**
49 * The collection of all matchers produced by this factory.
50 */
51 private static final Map<String, SPDXMatcherFactory.Match> MATCHER_MAP = new HashMap<>();
52
53 /**
54 * The instance of this factory.
55 */
56 public static final SPDXMatcherFactory INSTANCE = new SPDXMatcherFactory();
57
58 /**
59 * The text for the group selector.
60 */
61 static final String LICENSE_IDENTIFIER = "SPDX-License-Identifier:";
62
63 /**
64 * The regular expression to locate the SPDX license identifier in the text
65 * stream.
66 */
67 private static final Pattern GROUP_SELECTOR = Pattern.compile(".*" + LICENSE_IDENTIFIER + "\\s([A-Za-z0-9\\.\\-]+)");
68
69 /**
70 * The set of SPDX Ids that matched the last text.
71 */
72 private final Set<String> lastMatch;
73
74 /**
75 * Flag to indicate this document has been checked for SPDX tags.
76 */
77 private boolean checked;
78
79 /**
80 * Constructor.
81 */
82 private SPDXMatcherFactory() {
83 lastMatch = new HashSet<>();
84 }
85
86 /**
87 * Reset the matching for the next document.
88 */
89 private void reset() {
90 lastMatch.clear();
91 checked = false;
92 }
93
94 /**
95 * Creates the SPDX matcher.
96 *
97 * @param spdxId the SPDX name to match.
98 * @return a SPDX matcher.
99 */
100 public Match create(final String spdxId) {
101 if (StringUtils.isBlank(spdxId)) {
102 throw new ConfigurationException("'SPDX' type matcher requires a name");
103 }
104 Match matcher = MATCHER_MAP.get(spdxId);
105 if (matcher == null) {
106 matcher = new Match(spdxId);
107 MATCHER_MAP.put(spdxId, matcher);
108 }
109 return matcher;
110 }
111
112 /**
113 * Each matcher calls this method to present the documentText it is working on.
114 *
115 * @param documentText The documentText the caller is looking at.
116 * @param caller the Match that is calling this method.
117 * @return true if the caller matches the text.
118 */
119 private boolean check(final String documentText, final Match caller) {
120 /*
121 If the documentText has not been seen yet see if we can extract the SPDX id from the documentText.
122 If so then see for each match extract and add the name to lastMatch.
123 */
124 if (!checked) {
125 checked = true;
126 if (documentText.contains(LICENSE_IDENTIFIER)) {
127 Matcher matcher = GROUP_SELECTOR.matcher(documentText);
128 while (matcher.find()) {
129 lastMatch.add(matcher.group(1));
130 }
131 }
132 }
133 // see if the caller is in the lastMatch.
134 return lastMatch.contains(caller.spdxId);
135 }
136
137 /**
138 * Matches an SPDX identifier.
139 */
140 @ConfigComponent(type = ComponentType.MATCHER, name = "spdx",
141 desc = "A matcher that matches SPDX tags. SPDX tags have the form: \"SPDX-License-Identifier: short-name\", " +
142 "where short-name matches the regex pattern \"[A-Za-z0-9\\.-]+\". " +
143 "The SPDX matcher takes the short name as an argument.")
144 public class Match extends AbstractHeaderMatcher {
145 /**
146 * The SPDX identifier.
147 */
148 @ConfigComponent(type = ComponentType.PARAMETER, name = "name", desc = "The SPDX identifier string")
149 private final String spdxId;
150
151 /**
152 * Gets the name of this matcher. Same as the SPDX identifier.
153 * @return name of this matcher, that equals the SPDX identifier.
154 */
155 public String getName() {
156 return spdxId;
157 }
158
159 /**
160 * Constructor.
161 *
162 * @param spdxId A regular expression that matches the @{short-name} of the SPDX
163 * Identifier.
164 */
165 Match(final String spdxId) {
166 super("SPDX:" + spdxId);
167 Objects.requireNonNull(spdxId, "SpdxId is required");
168 this.spdxId = spdxId;
169 }
170
171 @Override
172 public boolean matches(final IHeaders headers) {
173 return SPDXMatcherFactory.this.check(headers.raw(), this);
174 }
175
176 @Override
177 public void reset() {
178 super.reset();
179 SPDXMatcherFactory.this.reset();
180 }
181 }
182 }