1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.rat.analysis.matchers;
20
21 import java.util.Arrays;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24 import java.util.stream.Collectors;
25
26 import org.apache.commons.lang3.StringUtils;
27 import org.apache.rat.ConfigurationException;
28 import org.apache.rat.analysis.IHeaders;
29 import org.apache.rat.config.parameters.ComponentType;
30 import org.apache.rat.config.parameters.ConfigComponent;
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57 @ConfigComponent(type = ComponentType.MATCHER, name = "copyright",
58 desc = "A matcher that matches Copyright text. " +
59 "Uses regular expressions and so should only be used when looking for copyrights with specific " +
60 "patterns that are not caught by a standard text matcher. This matcher will match \"(C)\", \"copyright\", " +
61 "or \"©\". (text is not case sensitive). It will also match things like Copyright (c) joe 1995 as well " +
62 "as Copyright (C) 1995 joe and Copyright (C) joe 1995.")
63 public class CopyrightMatcher extends AbstractHeaderMatcher {
64
65 private static final String COPYRIGHT_SYMBOL_DEFN = "\\([Cc]\\)|©|\\&[Cc][Oo][Pp][Yy]\\;";
66
67 private static final String COPYRIGHT_PATTERN_DEFN = "(\\b)?" + COPYRIGHT_SYMBOL_DEFN + "|Copyright\\b";
68
69 private static final Pattern COPYRIGHT_PATTERN = Pattern.compile(COPYRIGHT_PATTERN_DEFN);
70
71 private static final String ONE_PART = "\\W+((" + COPYRIGHT_SYMBOL_DEFN + ")\\W+)?%s";
72
73 private static final String TWO_PART = "\\W+((" + COPYRIGHT_SYMBOL_DEFN + ")\\W+)?%s,?\\W+%s";
74
75 private static final String DOUBLE_DATE_FMT = "%s\\W*-\\W*%s";
76
77 private static final String ARBITRARY_DATE = "[0-9]{4}";
78
79 private final Pattern dateOwnerPattern;
80
81 private final Pattern ownerDatePattern;
82
83 @ConfigComponent(type = ComponentType.PARAMETER, desc = "The initial year of the copyright if any")
84 private final String start;
85
86 @ConfigComponent(type = ComponentType.PARAMETER, desc = "The last year the copyright. Only valid with 'start'")
87 private final String end;
88
89 @ConfigComponent(type = ComponentType.PARAMETER, desc = "The owner of the copyright")
90 private final String owner;
91
92
93
94
95
96
97
98
99
100
101 public CopyrightMatcher(final String start, final String end, final String owner) {
102 this(null, start, end, owner);
103 }
104
105 private static void assertNumber(final String label, final String value) {
106 try {
107 if (StringUtils.isNotEmpty(value)) {
108 Integer.parseInt(value);
109 }
110 } catch (NumberFormatException e) {
111 throw new ConfigurationException(String.format("'%s' must be numeric (value provided: '%s')", label, value));
112 }
113 }
114
115
116
117
118
119
120
121
122 private String parseOwner(final String owner) {
123 return Arrays.stream(owner.split("\\s+")).map(s -> "\\Q" + s + "\\E").collect(Collectors.joining("\\W+"));
124 }
125
126
127
128
129
130
131
132
133
134
135
136 public CopyrightMatcher(final String id, final String start, final String end, final String owner) {
137 super(id);
138 if (StringUtils.isBlank(start) && !StringUtils.isBlank(end)) {
139 throw new ConfigurationException("'end' may not be set if 'start' is not set.");
140 }
141 assertNumber("start", start);
142 assertNumber("end", end);
143 this.start = start;
144 this.end = end;
145 this.owner = owner;
146 String dateDefn = "";
147 if (StringUtils.isNotEmpty(start)) {
148 if (StringUtils.isNotEmpty(end)) {
149 dateDefn = String.format(DOUBLE_DATE_FMT, this.start, this.end);
150 } else {
151 dateDefn = this.start;
152 }
153 }
154 if (StringUtils.isEmpty(owner)) {
155
156 if (StringUtils.isEmpty(dateDefn)) {
157 dateDefn = ARBITRARY_DATE;
158 }
159 dateOwnerPattern = Pattern.compile(String.format(ONE_PART, dateDefn));
160 ownerDatePattern = null;
161 } else {
162 String formattedOwner = parseOwner(owner);
163 if (StringUtils.isEmpty(dateDefn)) {
164 dateDefn = String.format(DOUBLE_DATE_FMT, "(((" + ARBITRARY_DATE, ")?" + ARBITRARY_DATE + "))?");
165 dateOwnerPattern = Pattern.compile(String.format(TWO_PART, dateDefn, formattedOwner));
166 ownerDatePattern = Pattern.compile(String.format(ONE_PART, formattedOwner));
167 } else {
168 dateOwnerPattern = Pattern.compile(String.format(TWO_PART, dateDefn, formattedOwner));
169 ownerDatePattern = Pattern.compile(String.format(TWO_PART, formattedOwner, dateDefn));
170 }
171 }
172 }
173
174
175
176
177
178 public String getStart() {
179 return start;
180 }
181
182
183
184
185
186 public String getEnd() {
187 return end;
188 }
189
190
191
192
193
194 public String getOwner() {
195 return owner;
196 }
197
198 @Override
199 public boolean matches(final IHeaders headers) {
200 String lowerLine = headers.raw().toLowerCase();
201 if (lowerLine.contains("copyright") || lowerLine.contains("(c)") || lowerLine.contains("©") ||
202 lowerLine.contains("©")) {
203 Matcher matcher = COPYRIGHT_PATTERN.matcher(headers.raw());
204 if (matcher.find()) {
205 String buffer = headers.raw().substring(matcher.end());
206 matcher = dateOwnerPattern.matcher(buffer);
207 if (matcher.find() && matcher.start() == 0) {
208 return true;
209 }
210 if (ownerDatePattern != null) {
211 matcher = ownerDatePattern.matcher(buffer);
212 return matcher.find() && matcher.start() == 0;
213 }
214 }
215 }
216 return false;
217 }
218 }