1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.rat.analysis.matchers;
20
21 import java.util.regex.Matcher;
22 import java.util.regex.Pattern;
23
24 import org.apache.commons.lang3.StringUtils;
25 import org.apache.rat.ConfigurationException;
26 import org.apache.rat.analysis.IHeaders;
27 import org.apache.rat.config.parameters.ComponentType;
28 import org.apache.rat.config.parameters.ConfigComponent;
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55 @ConfigComponent(type = ComponentType.MATCHER, name = "copyright",
56 desc = "A matcher that matches Copyright text. " +
57 "Uses regular expressions and so should only be used when looking for copyrights with specific " +
58 "patterns that are not caught by a standard text matcher. This matcher will match \"(C)\", \"copyright\", " +
59 "or \"©\". (text is not case sensitive). It will also match things like Copyright (c) joe 1995 as well " +
60 "as Copyright (C) 1995 joe and Copyright (C) joe 1995.")
61 public class CopyrightMatcher extends AbstractHeaderMatcher {
62
63 private static final String COPYRIGHT_SYMBOL_DEFN = "\\([Cc]\\)|©|\\&[Cc][Oo][Pp][Yy]\\;";
64
65 private static final String COPYRIGHT_PATTERN_DEFN = "(\\b)?" + COPYRIGHT_SYMBOL_DEFN + "|Copyright\\b";
66
67 private static final Pattern COPYRIGHT_PATTERN = Pattern.compile(COPYRIGHT_PATTERN_DEFN);
68
69 private static final String ONE_PART = "\\s+((" + COPYRIGHT_SYMBOL_DEFN + ")\\s+)?%s";
70
71 private static final String TWO_PART = "\\s+((" + COPYRIGHT_SYMBOL_DEFN + ")\\s+)?%s,?\\s+%s";
72
73 private static final String DOUBLE_DATE_FMT = "%s\\s*-\\s*%s";
74
75 private static final String ARBITRARY_DATE = "[0-9]{4}";
76
77 private final Pattern dateOwnerPattern;
78
79 private final Pattern ownerDatePattern;
80
81 @ConfigComponent(type = ComponentType.PARAMETER, desc = "The initial year of the copyright if any")
82 private final String start;
83
84 @ConfigComponent(type = ComponentType.PARAMETER, desc = "The last year the copyright. Only valid with 'start'")
85 private final String end;
86
87 @ConfigComponent(type = ComponentType.PARAMETER, desc = "The owner of the copyright")
88 private final String owner;
89
90
91
92
93
94
95
96
97
98
99 public CopyrightMatcher(final String start, final String end, final String owner) {
100 this(null, start, end, owner);
101 }
102
103 private static void assertNumber(final String label, final String value) {
104 try {
105 if (StringUtils.isNotEmpty(value)) {
106 Integer.parseInt(value);
107 }
108 } catch (NumberFormatException e) {
109 throw new ConfigurationException(String.format("'%s' must be numeric (value provided: '%s')", label, value));
110 }
111 }
112
113
114
115
116
117
118
119
120
121
122
123 public CopyrightMatcher(final String id, final String start, final String end, final String owner) {
124 super(id);
125 if (StringUtils.isBlank(start) && !StringUtils.isBlank(end)) {
126 throw new ConfigurationException("'end' may not be set if 'start' is not set.");
127 }
128 assertNumber("start", start);
129 assertNumber("end", end);
130 this.start = start;
131 this.end = end;
132 this.owner = owner;
133 String dateDefn = "";
134 if (StringUtils.isNotEmpty(start)) {
135 if (StringUtils.isNotEmpty(end)) {
136 dateDefn = String.format(DOUBLE_DATE_FMT, this.start, this.end);
137 } else {
138 dateDefn = this.start;
139 }
140 }
141 if (StringUtils.isEmpty(owner)) {
142
143 if (StringUtils.isEmpty(dateDefn)) {
144 dateDefn = ARBITRARY_DATE;
145 }
146 dateOwnerPattern = Pattern.compile(String.format(ONE_PART, dateDefn));
147 ownerDatePattern = null;
148 } else {
149 if (StringUtils.isEmpty(dateDefn)) {
150 dateDefn = String.format(DOUBLE_DATE_FMT, "(((" + ARBITRARY_DATE, ")?" + ARBITRARY_DATE + "))?");
151 dateOwnerPattern = Pattern.compile(String.format(TWO_PART, dateDefn, owner));
152 ownerDatePattern = Pattern.compile(String.format(ONE_PART, owner));
153 } else {
154 dateOwnerPattern = Pattern.compile(String.format(TWO_PART, dateDefn, owner));
155 ownerDatePattern = Pattern.compile(String.format(TWO_PART, owner, dateDefn));
156 }
157 }
158 }
159
160
161
162
163
164 public String getStart() {
165 return start;
166 }
167
168
169
170
171
172 public String getEnd() {
173 return end;
174 }
175
176
177
178
179
180 public String getOwner() {
181 return owner;
182 }
183
184 @Override
185 public boolean matches(final IHeaders headers) {
186 String lowerLine = headers.raw().toLowerCase();
187 if (lowerLine.contains("copyright") || lowerLine.contains("(c)") || lowerLine.contains("©") ||
188 lowerLine.contains("©")) {
189 Matcher matcher = COPYRIGHT_PATTERN.matcher(headers.raw());
190 if (matcher.find()) {
191 String buffer = headers.raw().substring(matcher.end());
192 matcher = dateOwnerPattern.matcher(buffer);
193 if (matcher.find() && matcher.start() == 0) {
194 return true;
195 }
196 if (ownerDatePattern != null) {
197 matcher = ownerDatePattern.matcher(buffer);
198 return matcher.find() && matcher.start() == 0;
199 }
200 }
201 }
202 return false;
203 }
204 }