1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   */
19  package org.apache.rat.analysis.matchers;
20  
21  import java.util.HashMap;
22  import java.util.HashSet;
23  import java.util.Map;
24  import java.util.Objects;
25  import java.util.Set;
26  import java.util.regex.Matcher;
27  import java.util.regex.Pattern;
28  
29  import org.apache.commons.lang3.StringUtils;
30  import org.apache.rat.ConfigurationException;
31  import org.apache.rat.analysis.IHeaders;
32  import org.apache.rat.config.parameters.ComponentType;
33  import org.apache.rat.config.parameters.ConfigComponent;
34  
35  /**
36   * Defines a factory to produce matchers for an SPDX tag. SPDX tag is of the
37   * format {@code SPDX-License-Identifier: short-name} where {@code short-name}
38   * matches the regex pattern [A-Za-z0-9\.\-]+
39   * <p>
40   * SPDX identifiers are specified by the Software Package Data Exchange(R) also
41   * known as SPDX(R) project from the Linux foundation.
42   * </p>
43   *
44   * @see <a href="https://spdx.dev/ids/">List of Ids at spdx.dev</a>
45   */
46  public final class SPDXMatcherFactory {
47  
48      /**
49       * The collection of all matchers produced by this factory.
50       */
51      private static final Map<String, SPDXMatcherFactory.Match> MATCHER_MAP = new HashMap<>();
52  
53      /**
54       * The instance of this factory.
55       */
56      public static final SPDXMatcherFactory INSTANCE = new SPDXMatcherFactory();
57  
58      /**
59       * The text for the group selector.
60       */
61      static final String LICENSE_IDENTIFIER = "SPDX-License-Identifier:";
62  
63      /**
64       * The regular expression to locate the SPDX license identifier in the text
65       * stream.
66       */
67      private static final Pattern GROUP_SELECTOR = Pattern.compile(".*" + LICENSE_IDENTIFIER + "\\s([A-Za-z0-9\\.\\-]+)");
68  
69      /**
70       * The set of SPDX Ids that matched the last text.
71       */
72      private final Set<String> lastMatch;
73  
74      /**
75       * Flag to indicate this document has been checked for SPDX tags.
76       */
77      private boolean checked;
78  
79      /**
80       * Constructor.
81       */
82      private SPDXMatcherFactory() {
83          lastMatch = new HashSet<>();
84      }
85  
86      /**
87       * Reset the matching for the next document.
88       */
89      private void reset() {
90          lastMatch.clear();
91          checked = false;
92      }
93  
94      /**
95       * Creates the SPDX matcher.
96       *
97       * @param spdxId the SPDX name to match.
98       * @return a SPDX matcher.
99       */
100     public Match create(final String spdxId) {
101         if (StringUtils.isBlank(spdxId)) {
102             throw new ConfigurationException("'SPDX' type matcher requires a name");
103         }
104         Match matcher = MATCHER_MAP.get(spdxId);
105         if (matcher == null) {
106             matcher = new Match(spdxId);
107             MATCHER_MAP.put(spdxId, matcher);
108         }
109         return matcher;
110     }
111 
112     /**
113      * Each matcher calls this method to present the documentText it is working on.
114      *
115      * @param documentText The documentText the caller is looking at.
116      * @param caller the Match that is calling this method.
117      * @return true if the caller matches the text.
118      */
119     private boolean check(final String documentText, final Match caller) {
120         /*
121         If the documentText has not been seen yet see if we can extract the SPDX id from the documentText.
122         If so then see for each match extract and add the name to lastMatch.
123         */
124         if (!checked) {
125             checked = true;
126             if (documentText.contains(LICENSE_IDENTIFIER)) {
127                 Matcher matcher = GROUP_SELECTOR.matcher(documentText);
128                 while (matcher.find()) {
129                     lastMatch.add(matcher.group(1));
130                 }
131             }
132         }
133         // see if the caller is in the lastMatch.
134         return lastMatch.contains(caller.spdxId);
135     }
136 
137     /**
138      * Matches an SPDX identifier.
139      */
140     @ConfigComponent(type = ComponentType.MATCHER, name = "spdx",
141             desc = "A matcher that matches SPDX tags. SPDX tags have the form: \"SPDX-License-Identifier: short-name\", " +
142                     "where short-name matches the regex pattern \"[A-Za-z0-9\\.-]+\". " +
143                     "The SPDX matcher takes the short name as an argument.")
144     public class Match extends AbstractHeaderMatcher {
145         /**
146          * The SPDX identifier.
147          */
148         @ConfigComponent(type = ComponentType.PARAMETER, name = "name", desc = "The SPDX identifier string")
149         private final String spdxId;
150 
151         /**
152          * Gets the name of this matcher. Same as the SPDX identifier.
153          * @return name of this matcher, that equals the SPDX identifier.
154          */
155         public String getName() {
156             return spdxId;
157         }
158 
159         /**
160          * Constructor.
161          *
162          * @param spdxId A regular expression that matches the @{short-name} of the SPDX
163          * Identifier.
164          */
165         Match(final String spdxId) {
166             super("SPDX:" + spdxId);
167             Objects.requireNonNull(spdxId, "SpdxId is required");
168             this.spdxId = spdxId;
169         }
170 
171         @Override
172         public boolean matches(final IHeaders headers) {
173             return SPDXMatcherFactory.this.check(headers.raw(), this);
174         }
175 
176         @Override
177         public void reset() {
178             super.reset();
179             SPDXMatcherFactory.this.reset();
180         }
181     }
182 }