View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   */
19  package org.apache.rat.analysis.matchers;
20  
21  import java.util.regex.Matcher;
22  import java.util.regex.Pattern;
23  
24  import org.apache.commons.lang3.StringUtils;
25  import org.apache.rat.ConfigurationException;
26  import org.apache.rat.analysis.IHeaders;
27  import org.apache.rat.config.parameters.ComponentType;
28  import org.apache.rat.config.parameters.ConfigComponent;
29  
30  /**
31   * Matches a typical Copyright header line only based on a regex pattern which
32   * allows for one (starting) year or year range, and a configurable copyright
33   * owner.<br>
34   * <br>
35   * The matching is done case-insensitive<br>
36   * <br>
37   * Example supported Copyright header lines, using copyright owner
38   * &quot;FooBar&quot;
39   * <ul>
40   * <li>Copyright 2010 FooBar.</li>
41   * <li>Copyright 2010-2012 FooBar.</li>
42   * <li>copyright 2012 foobar</li>
43   * </ul>
44   * <p>
45   * Note also that the copyright owner is appended to the regex pattern and so
46   * can support additional regex but also requires escaping where needed,<br>
47   * e.g. use &quot;FooBar \\(www\\.foobar\\.com\\)&quot; or &quot;FooBar
48   * \\Q(www.foobar.com)\\E&quot; to match &quot;FooBar (www.foobar.com)&quot;
49   * </p>
50   * <p>
51   * The matcher also accepts "(C)", "(c)", and "©" in place of (or in addition
52   * to) the "Copyright" or "copyright" keyword
53   * </p>
54   */
55  @ConfigComponent(type = ComponentType.MATCHER, name = "copyright",
56          desc = "A matcher that matches Copyright text. " +
57                  "Uses regular expressions and so should only be used when looking for copyrights with specific " +
58                  "patterns that are not caught by a standard text matcher. This matcher will match \"(C)\", \"copyright\", " +
59                  "or \"©\". (text is not case sensitive). It will also match things like Copyright (c) joe 1995 as well " +
60                  "as Copyright (C) 1995 joe and Copyright (C) joe 1995.")
61  public class CopyrightMatcher extends AbstractHeaderMatcher {
62      /** String to build a pattern to match the various recognized copyright symbols */
63      private static final String COPYRIGHT_SYMBOL_DEFN = "\\([Cc]\\)|©|\\&[Cc][Oo][Pp][Yy]\\;";
64      /** String to build a pattern to match symbols or word "copyright" */
65      private static final String COPYRIGHT_PATTERN_DEFN = "(\\b)?" + COPYRIGHT_SYMBOL_DEFN + "|Copyright\\b";
66      /** The compiled pattern from {@link #COPYRIGHT_PATTERN_DEFN} */
67      private static final Pattern COPYRIGHT_PATTERN = Pattern.compile(COPYRIGHT_PATTERN_DEFN);
68      /** The string to build a Pattern to match a copyright with a single part (date or name) */
69      private static final String ONE_PART = "\\s+((" + COPYRIGHT_SYMBOL_DEFN + ")\\s+)?%s";
70      /** The string to build a Pattern to match a copyright iwth both name and date */
71      private static final String TWO_PART = "\\s+((" + COPYRIGHT_SYMBOL_DEFN + ")\\s+)?%s,?\\s+%s";
72      /** Format string to build a pattern to match two dates */
73      private static final String DOUBLE_DATE_FMT = "%s\\s*-\\s*%s";
74      /** String to build a pattern to match an arbitrary date (year) */
75      private static final String ARBITRARY_DATE = "[0-9]{4}";
76      /** The built Pattern for matching "Copyright date owner" */
77      private final Pattern dateOwnerPattern;
78      /** The built pattern for matching "Copyright owner date" */
79      private final Pattern ownerDatePattern;
80      /** The start date of the copyright. May be null. */
81      @ConfigComponent(type = ComponentType.PARAMETER, desc = "The initial year of the copyright if any")
82      private final String start;
83      /** The end date of the copyright. May be null. */
84      @ConfigComponent(type = ComponentType.PARAMETER, desc = "The last year the copyright. Only valid with 'start'")
85      private final String end;
86      /** The owner of the copyright. May be null */
87      @ConfigComponent(type = ComponentType.PARAMETER, desc = "The owner of the copyright")
88      private final String owner;
89  
90      /**
91       * Constructs the CopyrightMatcher with the specified start, stop and owner
92       * strings and a unique random id.
93       *
94       * @param start the start date for the copyright, may be null.
95       * @param end the stop date for the copyright, may be null. May not be
96       * specified if start is not specified.
97       * @param owner the owner of the copyright, may be null.
98       */
99      public CopyrightMatcher(final String start, final String end, final String owner) {
100         this(null, start, end, owner);
101     }
102 
103     private static void assertNumber(final String label, final String value) {
104         try {
105             if (StringUtils.isNotEmpty(value)) {
106                 Integer.parseInt(value);
107             }
108         } catch (NumberFormatException e) {
109             throw new ConfigurationException(String.format("'%s' must be numeric (value provided: '%s')", label, value));
110         }
111     }
112 
113     /**
114      * Constructs the CopyrightMatcher with the specified start, stop and owner
115      * strings.
116      *
117      * @param id the id for the matcher.
118      * @param start the start date for the copyright, may be null.
119      * @param end the end date for the copyright, may be null. May not be
120      * specified if start is not specified.
121      * @param owner the owner of the copyright. May be null.
122      */
123     public CopyrightMatcher(final String id, final String start, final String end, final String owner) {
124         super(id);
125         if (StringUtils.isBlank(start) && !StringUtils.isBlank(end)) {
126             throw new ConfigurationException("'end' may not be set if 'start' is not set.");
127         }
128         assertNumber("start", start);
129         assertNumber("end", end);
130         this.start = start;
131         this.end = end;
132         this.owner = owner;
133         String dateDefn = "";
134         if (StringUtils.isNotEmpty(start)) {
135             if (StringUtils.isNotEmpty(end)) {
136                 dateDefn = String.format(DOUBLE_DATE_FMT, this.start, this.end);
137             } else {
138                 dateDefn = this.start;
139             }
140         }
141         if (StringUtils.isEmpty(owner)) {
142             // no owner
143             if (StringUtils.isEmpty(dateDefn)) {
144                 dateDefn = ARBITRARY_DATE;
145             }
146             dateOwnerPattern = Pattern.compile(String.format(ONE_PART, dateDefn));
147             ownerDatePattern = null;
148         } else {
149             if (StringUtils.isEmpty(dateDefn)) {
150                 dateDefn = String.format(DOUBLE_DATE_FMT, "(((" + ARBITRARY_DATE, ")?" + ARBITRARY_DATE + "))?");
151                 dateOwnerPattern = Pattern.compile(String.format(TWO_PART, dateDefn, owner));
152                 ownerDatePattern = Pattern.compile(String.format(ONE_PART, owner));
153             } else {
154                 dateOwnerPattern = Pattern.compile(String.format(TWO_PART, dateDefn, owner));
155                 ownerDatePattern = Pattern.compile(String.format(TWO_PART, owner, dateDefn));
156             }
157         }
158     }
159 
160     /**
161      * Gets the start date of the copyright.
162      * @return the start date of the copyright or {@code null} if not set
163      */
164     public String getStart() {
165         return start;
166     }
167 
168     /**
169      * Gets the end date of the copyright.
170      * @return the end date of the copyright or {@code null} if not set
171      */
172     public String getEnd() {
173         return end;
174     }
175 
176     /**
177      * Gets the owner of the copyright.
178      * @return the owner of the copyright or {@code null} if not set
179      */
180     public String getOwner() {
181         return owner;
182     }
183 
184     @Override
185     public boolean matches(final IHeaders headers) {
186         String lowerLine = headers.raw().toLowerCase();
187         if (lowerLine.contains("copyright") || lowerLine.contains("(c)") || lowerLine.contains("©") ||
188                 lowerLine.contains("&copy;")) {
189             Matcher matcher = COPYRIGHT_PATTERN.matcher(headers.raw());
190             if (matcher.find()) {
191                 String buffer = headers.raw().substring(matcher.end());
192                 matcher = dateOwnerPattern.matcher(buffer);
193                 if (matcher.find() && matcher.start() == 0) {
194                     return true;
195                 }
196                 if (ownerDatePattern != null) {
197                     matcher = ownerDatePattern.matcher(buffer);
198                     return matcher.find() && matcher.start() == 0;
199                 }
200             }
201         }
202         return false;
203     }
204 }