View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   */
19  package org.apache.rat.analysis.matchers;
20  
21  import java.util.regex.Matcher;
22  import java.util.regex.Pattern;
23  
24  import org.apache.commons.lang3.StringUtils;
25  import org.apache.rat.ConfigurationException;
26  import org.apache.rat.analysis.IHeaders;
27  import org.apache.rat.config.parameters.ComponentType;
28  import org.apache.rat.config.parameters.ConfigComponent;
29  
30  /**
31   * Matches a typical Copyright header line only based on a regex pattern which
32   * allows for one (starting) year or year range, and a configurable copyright
33   * owner. <br>
34   * <br>
35   * The matching is done case insensitive<br>
36   * <br>
37   * Example supported Copyright header lines, using copyright owner
38   * &quot;FooBar&quot;
39   * <ul>
40   * <li>Copyright 2010 FooBar.</li>
41   * <li>Copyright 2010-2012 FooBar.</li>
42   * <li>copyright 2012 foobar</li>
43   * </ul>
44   * <p>
45   * Note also that the copyright owner is appended to the regex pattern and so
46   * can support additional regex but also requires escaping where needed,<br>
47   * e.g. use &quot;FooBar \\(www\\.foobar\\.com\\)&quot; or &quot;FooBar
48   * \\Q(www.foobar.com)\\E&quot; to match &quot;FooBar (www.foobar.com)&quot;
49   * </p>
50   * <p>
51   * The matcher also accepts "(C)", "(c)", and "©" in place of (or in addition
52   * to) the "Copyright" or "copyright" keyword
53   * </p>
54   */
55  @ConfigComponent(type = ComponentType.MATCHER, name = "copyright", desc = "Matches copyright statements.")
56  public class CopyrightMatcher extends AbstractHeaderMatcher {
57      /** String to build a pattern to match the various recognized copyright symbols */
58      private static final String COPYRIGHT_SYMBOL_DEFN = "\\([Cc]\\)|©|\\&[Cc][Oo][Pp][Yy]\\;";
59      /** String to build a pattern to match symbols or word "copyright" */
60      private static final String COPYRIGHT_PATTERN_DEFN = "(\\b)?" + COPYRIGHT_SYMBOL_DEFN + "|Copyright\\b";
61      /** The compiled pattern from {@link #COPYRIGHT_PATTERN_DEFN} */
62      private static final Pattern COPYRIGHT_PATTERN = Pattern.compile(COPYRIGHT_PATTERN_DEFN);
63      /** The string to build a Pattern to match a copyright with a single part (date or name) */
64      private static final String ONE_PART = "\\s+((" + COPYRIGHT_SYMBOL_DEFN + ")\\s+)?%s";
65      /** The string to build a Pattern to match a copyright iwth both name and date */
66      private static final String TWO_PART = "\\s+((" + COPYRIGHT_SYMBOL_DEFN + ")\\s+)?%s,?\\s+%s";
67      /** Format string to build a pattern to match two dates */
68      private static final String DOUBLE_DATE_FMT = "%s\\s*-\\s*%s";
69      /** String to build pattern to match an arbitrary date (year) */
70      private static final String ARBITRARY_DATE = "[0-9]{4}";
71      /** The built Pattern for matching "Copyright date owner" */
72      private final Pattern dateOwnerPattern;
73      /** The built pattern for matching "Copyright owner date" */
74      private final Pattern ownerDatePattern;
75      /** The start date of the copyright. May be null. */
76      @ConfigComponent(type = ComponentType.PARAMETER, desc = "The initial date of the copyright")
77      private final String start;
78      /** The end date of the copyright. May be null. */
79      @ConfigComponent(type = ComponentType.PARAMETER, desc = "The last date the copyright was modifed")
80      private final String end;
81      /** The owner of the copyright. May be null */
82      @ConfigComponent(type = ComponentType.PARAMETER, desc = "The owner of the copyright")
83      private final String owner;
84  
85      /**
86       * Constructs the CopyrightMatcher with the specified start, stop and owner
87       * strings and a unique random id.
88       *
89       * @param start the start date for the copyright, may be null.
90       * @param end the stop date for the copyright, may be null. May not be
91       * specified if start is not specified.
92       * @param owner the owner of the copyright, may be null.
93       */
94      public CopyrightMatcher(final String start, final String end, final String owner) {
95          this(null, start, end, owner);
96      }
97  
98      private static void assertNumber(final String label, final String value) {
99          try {
100             if (StringUtils.isNotEmpty(value)) {
101                 Integer.parseInt(value);
102             }
103         } catch (NumberFormatException e) {
104             throw new ConfigurationException(String.format("'%s' must be numeric (value provided: '%s')", label, value));
105         }
106     }
107     /**
108      * Constructs the CopyrightMatcher with the specified start, stop and owner
109      * strings.
110      *
111      * @param id the id for the matcher.
112      * @param start the start date for the copyright, may be null.
113      * @param end the end date for the copyright, may be null. May not be
114      * specified if start is not specified.
115      * @param owner the owner of the copyright. may be null.
116      */
117     public CopyrightMatcher(final String id, final String start, final String end, final String owner) {
118         super(id);
119         if (StringUtils.isBlank(start) && !StringUtils.isBlank(end)) {
120             throw new ConfigurationException("'end' may not be set if 'start' is not set.");
121         }
122         assertNumber("start", start);
123         assertNumber("end", end);
124         this.start = start;
125         this.end = end;
126         this.owner = owner;
127         String dateDefn = "";
128         if (StringUtils.isNotEmpty(start)) {
129             if (StringUtils.isNotEmpty(end)) {
130                 dateDefn = String.format(DOUBLE_DATE_FMT, this.start, this.end);
131             } else {
132                 dateDefn = this.start;
133             }
134         }
135         if (StringUtils.isEmpty(owner)) {
136             // no owner
137             if (StringUtils.isEmpty(dateDefn)) {
138                 dateDefn = ARBITRARY_DATE;
139             }
140             dateOwnerPattern = Pattern.compile(String.format(ONE_PART, dateDefn));
141             ownerDatePattern = null;
142         } else {
143             if (StringUtils.isEmpty(dateDefn)) {
144                 dateDefn = String.format(DOUBLE_DATE_FMT, "(((" + ARBITRARY_DATE, ")?" + ARBITRARY_DATE + "))?");
145                 dateOwnerPattern = Pattern.compile(String.format(TWO_PART, dateDefn, owner));
146                 ownerDatePattern = Pattern.compile(String.format(ONE_PART, owner));
147             } else {
148                 dateOwnerPattern = Pattern.compile(String.format(TWO_PART, dateDefn, owner));
149                 ownerDatePattern = Pattern.compile(String.format(TWO_PART, owner, dateDefn));
150             }
151         }
152     }
153 
154     /**
155      * Gets the start date of the copyright.
156      * @return the start date of the copyright or {@code null} if not set
157      */
158     public String getStart() {
159         return start;
160     }
161 
162     /**
163      * Gets the end date of the copyright.
164      * @return the end date of the copyright or {@code null} if not set
165      */
166     public String getEnd() {
167         return end;
168     }
169 
170     /**
171      * Gets the owner of the copyright.
172      * @return the owner of the copyright or {@code null} if not set
173      */
174     public String getOwner() {
175         return owner;
176     }
177 
178     @Override
179     public boolean matches(final IHeaders headers) {
180         String lowerLine = headers.raw().toLowerCase();
181         if (lowerLine.contains("copyright") || lowerLine.contains("(c)") || lowerLine.contains("©") ||
182                 lowerLine.contains("&copy;")) {
183             Matcher matcher = COPYRIGHT_PATTERN.matcher(headers.raw());
184             if (matcher.find()) {
185                 String buffer = headers.raw().substring(matcher.end());
186                 matcher = dateOwnerPattern.matcher(buffer);
187                 if (matcher.find() && matcher.start() == 0) {
188                     return true;
189                 }
190                 if (ownerDatePattern != null) {
191                     matcher = ownerDatePattern.matcher(buffer);
192                     return matcher.find() && matcher.start() == 0;
193                 }
194             }
195         }
196         return false;
197     }
198 }