View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   */
19  package org.apache.rat.analysis.matchers;
20  
21  import java.util.regex.Matcher;
22  import java.util.regex.Pattern;
23  
24  import org.apache.commons.lang3.StringUtils;
25  
26  /**
27   * Matches a typical Copyright header line only based on a regex pattern which
28   * allows for one (starting) year or year range, and a configurable copyright
29   * owner. <br>
30   * <br>
31   * The matching is done case insensitive<br>
32   * <br>
33   * Example supported Copyright header lines, using copyright owner
34   * &quot;FooBar&quot;
35   * <ul>
36   * <li>Copyright 2010 FooBar.</li>
37   * <li>Copyright 2010-2012 FooBar.</li>
38   * <li>copyright 2012 foobar</li>
39   * </ul>
40   * <p>
41   * Note also that the copyright owner is appended to the regex pattern and so can
42   * support additional regex but also requires escaping where needed,<br>
43   * e.g. use &quot;FooBar \\(www\\.foobar\\.com\\)&quot; or 
44   * &quot;FooBar \\Q(www.foobar.com)\\E&quot; to match &quot;FooBar
45   * (www.foobar.com)&quot;
46   * </p>
47   * <p>The matcher also accepts "(C)", "(c)", and "©" in place of (or in addition to) the "Copyright" or "copyright" 
48   * keyword</p>
49   */
50  public class CopyrightMatcher extends AbstractSimpleMatcher {
51  
52      private static final String COPYRIGHT_SYMBOL_DEFN = "\\([Cc]\\)|©";
53      private static final String COPYRIGHT_PATTERN_DEFN = "(\\b)?" + COPYRIGHT_SYMBOL_DEFN + "|Copyright\\b";
54      private static final Pattern COPYRIGHT_PATTERN = Pattern.compile(COPYRIGHT_PATTERN_DEFN);
55      private static final String ONE_PART = "\\s+((" + COPYRIGHT_SYMBOL_DEFN + ")\\s+)?%s";
56      private static final String TWO_PART = "\\s+((" + COPYRIGHT_SYMBOL_DEFN + ")\\s+)?%s,?\\s+%s";
57  
58      private final Pattern dateOwnerPattern;
59      private final Pattern ownerDatePattern;
60  
61      /**
62       * Constructs the CopyrightMatcher with the specified start, stop and owner strings and a unique random id..
63       * @param start the start date for the copyright may be null.
64       * @param stop the stop date for the copyright, may be null.  May not be specified if start is not specified.
65       * @param owner the owner of the copyright. may be null.
66       */
67      public CopyrightMatcher(String start, String stop, String owner) {
68          this(null, start, stop, owner);
69      }
70  
71      /**
72       * Constructs the CopyrightMatcher with the specified start, stop and owner strings.
73       * @param id the id for the matcher.
74       * @param start the start date for the copyright may be null.
75       * @param stop the stop date for the copyright, may be null.  May not be specified if start is not specified.
76       * @param owner the owner of the copyright. may be null.
77       */
78      public CopyrightMatcher(String id, String start, String stop, String owner) {
79          super(id);
80          String dateDefn = "";
81          if (StringUtils.isNotEmpty(start)) {
82              if (StringUtils.isNotEmpty(stop)) {
83                  dateDefn = String.format("%s\\s*-\\s*%s", start.trim(), stop.trim());
84              } else {
85                  dateDefn = start.trim();
86              }
87          }
88          if (StringUtils.isEmpty(owner)) {
89              // no owner
90              if (StringUtils.isEmpty(dateDefn)) {
91                  dateDefn = "[0-9]{4}";
92              }
93              dateOwnerPattern = Pattern.compile(String.format(ONE_PART, dateDefn));
94              ownerDatePattern = null;
95          } else {
96              if (StringUtils.isEmpty(dateDefn)) {
97                  // no date
98                  dateOwnerPattern = Pattern.compile(String.format(ONE_PART, owner));
99                  ownerDatePattern = null;
100             } else {
101                 dateOwnerPattern = Pattern.compile(String.format(TWO_PART, dateDefn, owner));
102                 ownerDatePattern = Pattern.compile(String.format(TWO_PART, owner, dateDefn));
103             }
104         }
105     }
106 
107     @Override
108     protected boolean doMatch(String line) {
109         String lowerLine = line.toLowerCase();
110         if (lowerLine.contains("copyright") || lowerLine.contains("(c)") || line.contains("©")) {
111             Matcher matcher = COPYRIGHT_PATTERN.matcher(line);
112             if (matcher.find()) {
113                 String buffer = line.substring(matcher.end());
114                 matcher = dateOwnerPattern.matcher(buffer);
115                 if (matcher.find() && matcher.start() == 0) {
116                     return true;
117                 }
118                 if (ownerDatePattern != null) {
119                     matcher = ownerDatePattern.matcher(buffer);
120                     return matcher.find() && matcher.start() == 0;
121                 }
122             }
123         }
124         return false;
125     }
126 }