View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   */
19  package org.apache.rat.analysis.matchers;
20  
21  import java.util.regex.Matcher;
22  import java.util.regex.Pattern;
23  
24  import org.apache.commons.lang3.StringUtils;
25  import org.apache.rat.ConfigurationException;
26  import org.apache.rat.analysis.IHeaders;
27  import org.apache.rat.config.parameters.ComponentType;
28  import org.apache.rat.config.parameters.ConfigComponent;
29  
30  /**
31   * Matches a typical Copyright header line only based on a regex pattern which
32   * allows for one (starting) year or year range, and a configurable copyright
33   * owner. <br>
34   * <br>
35   * The matching is done case insensitive<br>
36   * <br>
37   * Example supported Copyright header lines, using copyright owner
38   * &quot;FooBar&quot;
39   * <ul>
40   * <li>Copyright 2010 FooBar.</li>
41   * <li>Copyright 2010-2012 FooBar.</li>
42   * <li>copyright 2012 foobar</li>
43   * </ul>
44   * <p>
45   * Note also that the copyright owner is appended to the regex pattern and so
46   * can support additional regex but also requires escaping where needed,<br>
47   * e.g. use &quot;FooBar \\(www\\.foobar\\.com\\)&quot; or &quot;FooBar
48   * \\Q(www.foobar.com)\\E&quot; to match &quot;FooBar (www.foobar.com)&quot;
49   * </p>
50   * <p>
51   * The matcher also accepts "(C)", "(c)", and "©" in place of (or in addition
52   * to) the "Copyright" or "copyright" keyword
53   * </p>
54   */
55  @ConfigComponent(type = ComponentType.MATCHER, name = "copyright", desc = "Matches copyright statements.")
56  public class CopyrightMatcher extends AbstractHeaderMatcher {
57  
58      private static final String COPYRIGHT_SYMBOL_DEFN = "\\([Cc]\\)|©|\\&[Cc][Oo][Pp][Yy]\\;";
59      private static final String COPYRIGHT_PATTERN_DEFN = "(\\b)?" + COPYRIGHT_SYMBOL_DEFN + "|Copyright\\b";
60      private static final Pattern COPYRIGHT_PATTERN = Pattern.compile(COPYRIGHT_PATTERN_DEFN);
61      private static final String ONE_PART = "\\s+((" + COPYRIGHT_SYMBOL_DEFN + ")\\s+)?%s";
62      private static final String TWO_PART = "\\s+((" + COPYRIGHT_SYMBOL_DEFN + ")\\s+)?%s,?\\s+%s";
63  
64      private final Pattern dateOwnerPattern;
65      private final Pattern ownerDatePattern;
66      @ConfigComponent(type = ComponentType.PARAMETER, desc = "The initial date of the copyright")
67      private final String start;
68      @ConfigComponent(type = ComponentType.PARAMETER, desc = "The last date the copyright was modifed")
69      private final String end;
70      @ConfigComponent(type = ComponentType.PARAMETER, desc = "The owner of the copyright")
71      private final String owner;
72  
73      /**
74       * Constructs the CopyrightMatcher with the specified start, stop and owner
75       * strings and a unique random id..
76       *
77       * @param start the start date for the copyright may be null.
78       * @param end the stop date for the copyright, may be null. May not be
79       * specified if start is not specified.
80       * @param owner the owner of the copyright. may be null.
81       */
82      public CopyrightMatcher(String start, String end, String owner) {
83          this(null, start, end, owner);
84      }
85  
86      private static void assertNumber(String label, String value) {
87          try {
88              if (StringUtils.isNotEmpty(value)) {
89                  Integer.parseInt(value);
90              }
91          } catch (NumberFormatException e) {
92              throw new ConfigurationException(String.format("'%s' must be numeric (value provided: '%s')", label, value));
93          }
94      }
95      /**
96       * Constructs the CopyrightMatcher with the specified start, stop and owner
97       * strings.
98       *
99       * @param id the id for the matcher.
100      * @param start the start date for the copyright may be null.
101      * @param end the end date for the copyright, may be null. May not be
102      * specified if start is not specified.
103      * @param owner the owner of the copyright. may be null.
104      */
105     public CopyrightMatcher(String id, String start, String end, String owner) {
106         super(id);
107         if (StringUtils.isBlank(start) && !StringUtils.isBlank(end)) {
108             throw new ConfigurationException("'end' may not be set if 'start' is not set.");
109         }
110         assertNumber("start", start);
111         assertNumber("end", end);
112         this.start = start;
113         this.end = end;
114         this.owner = owner;
115         String dateDefn = "";
116         if (StringUtils.isNotEmpty(start)) {
117             if (StringUtils.isNotEmpty(end)) {
118                 dateDefn = String.format("%s\\s*-\\s*%s", this.start, this.end);
119             } else {
120                 dateDefn = this.start;
121             }
122         }
123         if (StringUtils.isEmpty(owner)) {
124             // no owner
125             if (StringUtils.isEmpty(dateDefn)) {
126                 dateDefn = "[0-9]{4}";
127             }
128             dateOwnerPattern = Pattern.compile(String.format(ONE_PART, dateDefn));
129             ownerDatePattern = null;
130         } else {
131             if (StringUtils.isEmpty(dateDefn)) {
132                 // no date
133                 dateOwnerPattern = Pattern.compile(String.format(ONE_PART, owner));
134                 ownerDatePattern = null;
135             } else {
136                 dateOwnerPattern = Pattern.compile(String.format(TWO_PART, dateDefn, owner));
137                 ownerDatePattern = Pattern.compile(String.format(TWO_PART, owner, dateDefn));
138             }
139         }
140     }
141 
142     /**
143      * Gets the start date of the copyright.
144      * @return the start date of the copyright or {@code null} if not set
145      */
146     public String getStart() {
147         return start;
148     }
149 
150     /**
151      * Gets the end date of the copyright.
152      * @return the end date of the copyright or {@code null} if not set
153      */
154     public String getEnd() {
155         return end;
156     }
157 
158     /**
159      * Gets the owner of the copyright.
160      * @return the owner of the copyright or {@code null} if not set
161      */
162     public String getOwner() {
163         return owner;
164     }
165 
166     @Override
167     public boolean matches(IHeaders headers) {
168         String lowerLine = headers.raw().toLowerCase();
169         if (lowerLine.contains("copyright") || lowerLine.contains("(c)") || lowerLine.contains("©") ||
170                 lowerLine.contains("&copy;")) {
171             Matcher matcher = COPYRIGHT_PATTERN.matcher(headers.raw());
172             if (matcher.find()) {
173                 String buffer = headers.raw().substring(matcher.end());
174                 matcher = dateOwnerPattern.matcher(buffer);
175                 if (matcher.find() && matcher.start() == 0) {
176                     return true;
177                 }
178                 if (ownerDatePattern != null) {
179                     matcher = ownerDatePattern.matcher(buffer);
180                     return matcher.find() && matcher.start() == 0;
181                 }
182             }
183         }
184         return false;
185     }
186 }