View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.rat.document;
20  
21  import java.io.File;
22  import java.io.IOException;
23  import java.nio.file.FileSystem;
24  import java.nio.file.FileSystems;
25  import java.nio.file.Files;
26  import java.nio.file.Path;
27  import java.nio.file.Paths;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Objects;
33  import java.util.Optional;
34  import java.util.concurrent.ConcurrentHashMap;
35  import java.util.stream.Collectors;
36  
37  import org.apache.commons.lang3.StringUtils;
38  import org.apache.commons.lang3.builder.CompareToBuilder;
39  import org.apache.commons.lang3.builder.EqualsBuilder;
40  import org.apache.commons.lang3.builder.HashCodeBuilder;
41  import org.apache.commons.lang3.tuple.ImmutablePair;
42  import org.apache.commons.lang3.tuple.Pair;
43  
44  /**
45   * The name for a document. The {@code DocumentName} is an immutable structure that handles all the intricacies of file
46   * naming on various operating systems. DocumentNames have several components:
47   * <ul>
48   *     <li>{@code root} - where in the file system the name starts (e.g C: on windows). May be empty but not null.</li>
49   *     <li>{@code dirSeparator} - the separator between name segments (e.g. "\\" on windows, "/" on linux). May not be
50   *     empty or null.</li>
51   *     <li>{@code name} - the name of the file relative to the {@code root}. May not be null. Does NOT begin with a {@code dirSeparator}</li>
52   *     <li>{@code baseName} - the name of a directory or file from which this file is reported. A DocumentName with a
53   *     {@code name} of "foo/bar/baz.txt" and a {@code baseName} of "foo" will be reported as "bar/baz.txt". May not be null.</li>
54   *     <li>{@code isCaseSensitive} - identifies if the underlying file system is case-sensitive.</li>
55   * </ul>
56   * <p>
57   *     {@code DocumentName}s are generally used to represent files on the files system. However, they are also used to represent files
58   *     within an archive. When representing a file in an archive the baseName is the name of the enclosing archive document.
59   * </p>
60   */
61  public class DocumentName implements Comparable<DocumentName> {
62      /** The full name for the document. */
63      private final String name;
64      /** The name of the base directory for the document. */
65      private final DocumentName baseName;
66      /** The file system info for this document. */
67      private final FSInfo fsInfo;
68      /** The root for the DocumentName. May be empty but not null. */
69      private final String root;
70  
71      /**
72       * Creates a Builder with the default file system info.
73       * @return the builder.
74       * @see FSInfo
75       */
76      public static Builder builder() {
77          return new Builder(FSInfo.getDefault());
78      }
79  
80      /**
81       * Creates a builder with the specified FSInfo instance.
82       * @param fsInfo the FSInfo to use for the builder.
83       * @return a new builder.
84       */
85      public static Builder builder(final FSInfo fsInfo) {
86          return new Builder(fsInfo);
87      }
88  
89      /**
90       * Creates a builder for the specified file system.
91       * @param fileSystem the file system to create the builder on.
92       * @return a new builder.
93       */
94      public static Builder builder(final FileSystem fileSystem) {
95          return new Builder(fileSystem);
96      }
97  
98      /**
99       * Creates a builder from a File. The {@link #baseName} is set to the file name if it is a directory otherwise
100      * it is set to the directory containing the file.
101      * @param file The file to set defaults from.
102      * @return the builder.
103      */
104     public static Builder builder(final File file) {
105         return new Builder(file);
106     }
107 
108     /**
109      * Creates a builder from a document name. The builder will be configured to create a clone of the DocumentName.
110      * @param documentName the document name to set the defaults from.
111      * @return the builder.
112      */
113     public static Builder builder(final DocumentName documentName) {
114         return new Builder(documentName);
115     }
116 
117     /**
118      * Builds the DocumentName from the builder.
119      * @param builder the builder to provide the values.
120      */
121     DocumentName(final Builder builder) {
122         this.name = builder.name;
123         this.fsInfo = builder.fsInfo;
124         this.root = builder.root;
125         this.baseName = builder.sameNameFlag ? this : builder.baseName;
126     }
127 
128     /**
129      * Creates a file from the document name.
130      * @return a new File object.
131      */
132     public File asFile() {
133         return new File(getName());
134     }
135 
136     /**
137      * Creates a path from the document name.
138      * @return a new Path object.
139      */
140     public Path asPath() {
141         return Paths.get(name);
142     }
143 
144     /**
145      * Creates a new DocumentName by adding the child to the current name.
146      * Resulting documentName will have the same base name.
147      * @param child the child to add (must use directory separator from this document name).
148      * @return the new document name with the same {@link #baseName}, directory sensitivity and case sensitivity as
149      * this one.
150      */
151     public DocumentName resolve(final String child) {
152         if (StringUtils.isBlank(child)) {
153             return this;
154         }
155         String separator = getDirectorySeparator();
156         String pattern = separator.equals("/") ? child.replace('\\', '/') :
157                 child.replace('/', '\\');
158 
159         if (!pattern.startsWith(separator)) {
160              pattern = name + separator + pattern;
161         }
162 
163         return new Builder(this).setName(fsInfo.normalize(pattern)).build();
164     }
165 
166     /**
167      * Gets the fully qualified name of the document.
168      * @return the fully qualified name of the document.
169      */
170     public String getName() {
171         return root + fsInfo.dirSeparator() + name;
172     }
173 
174     /**
175      * Gets the fully qualified basename of the document.
176      * @return the fully qualified basename of the document.
177      */
178     public String getBaseName() {
179         return baseName.getName();
180     }
181 
182     /**
183      * Gets the root for this document.
184      * @return the root for this document.
185      */
186     public String getRoot() {
187         return root;
188     }
189 
190     /**
191      * Gets the DocumentName for the basename of this DocumentName.
192      * @return the DocumentName for the basename of this document name.
193      */
194     public DocumentName getBaseDocumentName() {
195         return baseName;
196     }
197 
198     /**
199      * Returns the directory separator.
200      * @return the directory separator.
201      */
202     public String getDirectorySeparator() {
203         return fsInfo.dirSeparator();
204     }
205 
206     /**
207      * Determines if the candidate starts with the root or separator strings.
208      * @param candidate the candidate to check. If blank method will return {@code false}.
209      * @param root the root to check. If blank the root check is skipped.
210      * @param separator the separator to check. If blank the check is skipped.
211      * @return true if either the root or separator check returned {@code true}.
212      */
213     boolean startsWithRootOrSeparator(final String candidate, final String root, final String separator) {
214         if (StringUtils.isBlank(candidate)) {
215             return false;
216         }
217         boolean result = !StringUtils.isBlank(root) && candidate.startsWith(root);
218         if (!result) {
219             result = !StringUtils.isBlank(separator) && candidate.startsWith(separator);
220         }
221         return result;
222     }
223 
224     /**
225      * Gets the portion of the name that is not part of the base name.
226      * The resulting name will always start with the directory separator.
227      * @return the portion of the name that is not part of the base name.
228      */
229     public String localized() {
230         String result = getName();
231         String baseNameStr = baseName.getName();
232         if (result.startsWith(baseNameStr)) {
233             result = result.substring(baseNameStr.length());
234         }
235         if (!startsWithRootOrSeparator(result, getRoot(), fsInfo.dirSeparator())) {
236             result = fsInfo.dirSeparator() + result;
237         }
238         return result;
239     }
240 
241     /**
242      * Gets the portion of the name that is not part of the base name.
243      * The resulting name will always start with the directory separator.
244      * @param dirSeparator The character(s) to use to separate directories in the result.
245      * @return the portion of the name that is not part of the base name.
246      */
247     public String localized(final String dirSeparator) {
248         String[] tokens = fsInfo.tokenize(localized());
249         if (tokens.length == 0) {
250             return dirSeparator;
251         }
252         if (tokens.length == 1) {
253             return dirSeparator + tokens[0];
254         }
255 
256         String modifiedRoot =  dirSeparator.equals("/") ? root.replace('\\', '/') :
257                 root.replace('/', '\\');
258         String result = String.join(dirSeparator, tokens);
259         return startsWithRootOrSeparator(result, modifiedRoot, dirSeparator) ? result : dirSeparator + result;
260     }
261 
262     /**
263      * Gets the last segment of the name. This is the part after the last directory separator.
264      * @return the last segment of the name.
265      */
266     public String getShortName() {
267         int pos = name.lastIndexOf(fsInfo.dirSeparator());
268         return pos == -1 ? name : name.substring(pos + 1);
269     }
270 
271     /**
272      * Gets the case sensitivity flag.
273      * @return {@code true} if the name is case-sensitive.
274      */
275     public boolean isCaseSensitive() {
276         return fsInfo.isCaseSensitive();
277     }
278 
279     /**
280      * Returns the localized file name.
281      * @return the localized file name.
282      */
283     @Override
284     public String toString() {
285         return localized();
286     }
287 
288     @Override
289     public int compareTo(final DocumentName other) {
290         return CompareToBuilder.reflectionCompare(this, other);
291     }
292 
293     @Override
294     public boolean equals(final Object other) {
295         return EqualsBuilder.reflectionEquals(this, other);
296     }
297 
298     @Override
299     public int hashCode() {
300         return HashCodeBuilder.reflectionHashCode(this);
301     }
302 
303     private static final class FSInfoData {
304         /** The case sensitivity flag */
305         private final boolean isCaseSensitive;
306         /** The list of roots for the file system. */
307         private final List<String> roots;
308         /** The separator between directory names. */
309         private final String separator;
310 
311         /**
312          * Constructor for known properties.
313          * @param separator the directory separator character(s).
314          * @param isCaseSensitive {@code true} if the file system is cases sensitive.
315          * @param roots THe list of roots for the file system.
316          */
317         FSInfoData(final String separator, final boolean isCaseSensitive, final List<String> roots) {
318             this.isCaseSensitive = isCaseSensitive;
319             this.roots = roots;
320             this.separator = separator;
321         }
322 
323         /**
324          * Constructor for an arbitrary file system.
325          * This constructor can be processor intensive as it has to check the file system for case sensitivity.
326          * @param fileSystem the file system.
327          */
328         FSInfoData(final FileSystem fileSystem) {
329             isCaseSensitive = isCaseSensitive(fileSystem);
330             roots = new ArrayList<>();
331             fileSystem.getRootDirectories().forEach(r -> roots.add(r.toString()));
332             separator = fileSystem.getSeparator();
333         }
334 
335         /**
336          * Determines if the file system is case-sensitive.
337          * @param fileSystem the file system to check.
338          * @return {@code true} if the file system is case-sensitive.
339          */
340         private static boolean isCaseSensitive(final FileSystem fileSystem) {
341             boolean isCaseSensitive = false;
342             Path nameSet = null;
343             Path filea = null;
344             Path fileA = null;
345             try {
346                 try {
347                     Path root = fileSystem.getPath("");
348                     nameSet = Files.createTempDirectory(root, "NameSet");
349                     filea = nameSet.resolve("a");
350                     fileA = nameSet.resolve("A");
351                     Files.createFile(filea);
352                     Files.createFile(fileA);
353                     isCaseSensitive = true;
354                 } catch (IOException e) {
355                     // do nothing
356                 } finally {
357                     if (filea != null) {
358                         Files.deleteIfExists(filea);
359                     }
360                     if (fileA != null) {
361                         Files.deleteIfExists(fileA);
362                     }
363                     if (nameSet != null) {
364                         Files.deleteIfExists(nameSet);
365                     }
366                 }
367             } catch (IOException e) {
368                 // do nothing.
369             }
370             return isCaseSensitive;
371         }
372 
373     }
374     /**
375      * The file system information needed to process document names.
376      */
377     public static final class FSInfo implements Comparable<FSInfo> {
378         /**
379          * The map of FileSystem to FSInfoData used to avoid expensive FileSystem processing.
380          */
381         private static final Map<FileSystem, FSInfoData> REGISTRY = new ConcurrentHashMap<>();
382 
383         /** The case-sensitivity flag. */
384         private final FSInfoData data;
385 
386         /** The common name for the file system */
387         private final String name;
388 
389         /**
390          * Gets the FSInfo for the default file system.
391          * If the System property {@code FSInfo} is set, the {@code FSInfo} stored there is used, otherwise
392          * the {@link FileSystem} returned from {@link FileSystems#getDefault()} is used.
393          * @return the FSInfo for the default file system.
394          */
395         public static FSInfo getDefault() {
396             FSInfo result = (FSInfo) System.getProperties().get("FSInfo");
397             return result == null ?
398                     new FSInfo(FileSystems.getDefault())
399                     : result;
400         }
401 
402         /**
403          * Constructor. Extracts the necessary data from the file system.
404          * @param fileSystem the file system to extract data from.
405          */
406         public FSInfo(final FileSystem fileSystem) {
407             this("anon", fileSystem);
408         }
409 
410         /**
411          * Constructor. Extracts the necessary data from the file system.
412          * @param name the common name for the file system.
413          * @param fileSystem the file system to extract data from.
414          */
415         FSInfo(final String name, final FileSystem fileSystem) {
416             this.data = REGISTRY.computeIfAbsent(fileSystem, k -> new FSInfoData(fileSystem));
417             this.name = name;
418         }
419 
420         /**
421          * Constructor for virtual/abstract file systems for example the entry names within an archive.
422          * @param name the common name for the file system.
423          * @param separator the separator string to use.
424          * @param isCaseSensitive the case-sensitivity flag.
425          * @param roots the roots for the file system.
426          */
427         FSInfo(final String name, final String separator, final boolean isCaseSensitive, final List<String> roots) {
428             data = new FSInfoData(separator, isCaseSensitive, roots);
429             this.name = name;
430         }
431 
432         /**
433          * Gets the common name for the underlying file system.
434          * @return the common file system name.
435          */
436         @Override
437         public String toString() {
438             return name;
439         }
440 
441         /**
442          * Gets the directory separator.
443          * @return The directory separator.
444          */
445         public String dirSeparator() {
446             return data.separator;
447         }
448 
449         /**
450          * Gets the case-sensitivity flag.
451          * @return the case-sensitivity flag.
452          */
453         public boolean isCaseSensitive() {
454             return data.isCaseSensitive;
455         }
456 
457         /**
458          * Retrieves the root extracted from the name.
459          * @param name the name to extract the root from
460          * @return an optional containing the root or empty.
461          */
462         public Optional<String> rootFor(final String name) {
463             for (String sysRoot : data.roots) {
464                 if (name.startsWith(sysRoot)) {
465                     return Optional.of(sysRoot);
466                 }
467             }
468             return Optional.empty();
469         }
470 
471         /**
472          * Tokenizes the string based on the directory separator of this DocumentName.
473          * @param source the source to tokenize.
474          * @return the array of tokenized strings.
475          */
476         public String[] tokenize(final String source) {
477             return source.split("\\Q" + dirSeparator() + "\\E");
478         }
479 
480         /**
481          * Removes {@code .} and {@code ..} from filenames.
482          * @param pattern the file name pattern
483          * @return the normalized file name.
484          */
485         public String normalize(final String pattern) {
486             if (StringUtils.isBlank(pattern)) {
487                 return "";
488             }
489             List<String> parts = new ArrayList<>(Arrays.asList(tokenize(pattern)));
490             for (int i = 0; i < parts.size(); i++) {
491                 String part = parts.get(i);
492                 if (part.equals("..")) {
493                     if (i == 0) {
494                         throw new IllegalStateException("Unable to create path before root");
495                     }
496                     parts.set(i - 1, null);
497                     parts.set(i, null);
498                 } else if (part.equals(".")) {
499                     parts.set(i, null);
500                 }
501             }
502             return parts.stream().filter(Objects::nonNull).collect(Collectors.joining(dirSeparator()));
503         }
504 
505         @Override
506         public int compareTo(final FSInfo other) {
507             return CompareToBuilder.reflectionCompare(this, other);
508         }
509 
510         @Override
511         public boolean equals(final Object other) {
512             return EqualsBuilder.reflectionEquals(this, other);
513         }
514 
515         @Override
516         public int hashCode() {
517             return HashCodeBuilder.reflectionHashCode(this);
518         }
519     }
520 
521     /**
522      * The Builder for a DocumentName.
523      */
524     public static final class Builder {
525         /** The name for the document. */
526         private String name;
527         /** The base name for the document. */
528         private DocumentName baseName;
529         /** The file system info. */
530         private final FSInfo fsInfo;
531         /** The file system root. */
532         private String root;
533         /** A flag for baseName same as this. */
534         private boolean sameNameFlag;
535 
536         /**
537          * Create with default settings.
538          */
539         private Builder(final FSInfo fsInfo) {
540             this.fsInfo = fsInfo;
541             root = "";
542         }
543 
544         /**
545          * Create with default settings.
546          */
547         private Builder(final FileSystem fileSystem) {
548             this(new FSInfo(fileSystem));
549         }
550 
551         /**
552          * Create based on the file provided.
553          * @param file the file to base the builder on.
554          */
555         private Builder(final File file) {
556             this(FSInfo.getDefault());
557             setName(file);
558         }
559 
560         /**
561          * Used in testing.
562          * @param fsInfo the FSInfo for the file.
563          * @param file the file to process.
564          */
565         Builder(final FSInfo fsInfo, final File file) {
566             this(fsInfo);
567             setName(file);
568         }
569 
570         /**
571          * Create a Builder that clones the specified DocumentName.
572          * @param documentName the DocumentName to clone.
573          */
574         Builder(final DocumentName documentName) {
575             this.root = documentName.root;
576             this.name = documentName.name;
577             this.baseName = documentName.baseName;
578             this.fsInfo = documentName.fsInfo;
579         }
580 
581         /**
582          * Get the directory separator for this builder.
583          * @return the directory separator fo this builder.
584          */
585         public String directorySeparator() {
586             return fsInfo.dirSeparator();
587         }
588 
589         /**
590          * Verify that the builder will build a proper DocumentName.
591          */
592         private void verify() {
593             Objects.requireNonNull(name, "Name must not be null");
594             if (name.startsWith(fsInfo.dirSeparator())) {
595                 name = name.substring(fsInfo.dirSeparator().length());
596             }
597             if (!sameNameFlag) {
598                 Objects.requireNonNull(baseName, "Basename must not be null");
599             }
600         }
601 
602         /**
603          * Sets the root for the DocumentName.
604          * @param root the root for the DocumentName.
605          * @return this.
606          */
607         public Builder setRoot(final String root) {
608             this.root = StringUtils.defaultIfBlank(root, "");
609             return this;
610         }
611 
612         /**
613          * Sets the name for this DocumentName relative to the baseName.
614          * If the {@code name} is {@code null} an empty string is used.
615          * <p>
616          *     To correctly parse the string it must use the directory separator specified by
617          *     this Document.
618          * </p>
619          * @param name the name for this Document name. Will be made relative to the baseName.
620          * @return this
621          */
622         public Builder setName(final String name) {
623             Pair<String, String> pair = splitRoot(StringUtils.defaultIfBlank(name, ""));
624             if (this.root.isEmpty()) {
625                 this.root = pair.getLeft();
626             }
627             this.name = fsInfo.normalize(pair.getRight());
628             if (this.baseName != null && !baseName.name.isEmpty()) {
629                 if (!this.name.startsWith(baseName.name)) {
630                     this.name = this.name.isEmpty() ? baseName.name :
631                             baseName.name + fsInfo.dirSeparator() + this.name;
632                 }
633             }
634             return this;
635         }
636 
637         /**
638          * Extracts the root/name pair from a name string.
639          * <p>
640          *     Package private for testing.
641          * </p>
642          * @param name the name to extract the root/name pair from.
643          * @return the root/name pair.
644          */
645         Pair<String, String> splitRoot(final String name) {
646             String workingName = name;
647             Optional<String> maybeRoot = fsInfo.rootFor(name);
648             String root = maybeRoot.orElse("");
649             if (!root.isEmpty()) {
650                 if (workingName.startsWith(root)) {
651                     workingName = workingName.substring(root.length());
652                     if (!workingName.startsWith(fsInfo.dirSeparator())) {
653                         if (root.endsWith(fsInfo.dirSeparator())) {
654                             root = root.substring(0, root.length() - fsInfo.dirSeparator().length());
655                         }
656                     }
657                 }
658             }
659             return ImmutablePair.of(root, workingName);
660         }
661 
662         /**
663          * Sets the builder root if it is empty.
664          * @param root the root to set the builder root to if it is empty.
665          */
666         private void setEmptyRoot(final String root) {
667             if (this.root.isEmpty()) {
668                 this.root = root;
669             }
670         }
671 
672         /**
673          * Sets the properties from the file. Will reset the baseName appropriately.
674          * @param file the file to set the properties from.
675          * @return this.
676          */
677         public Builder setName(final File file) {
678             Pair<String, String> pair = splitRoot(file.getAbsolutePath());
679             setEmptyRoot(pair.getLeft());
680             this.name = fsInfo.normalize(pair.getRight());
681             if (file.isDirectory()) {
682                 sameNameFlag = true;
683             } else {
684                 File p = file.getParentFile();
685                 if (p != null) {
686                     setBaseName(p);
687                 } else {
688                     Builder baseBuilder = new Builder(this.fsInfo).setName(this.directorySeparator());
689                     baseBuilder.sameNameFlag = true;
690                     setBaseName(baseBuilder.build());
691                 }
692             }
693             return this;
694         }
695 
696         /**
697          * Sets the baseName.
698          * Will set the root if it is not set.
699          * <p>
700          *     To correctly parse the string it must use the directory separator specified by this builder.
701          * </p>
702          * @param baseName the basename to use.
703          * @return this.
704          */
705         public Builder setBaseName(final String baseName) {
706             DocumentName.Builder builder = DocumentName.builder(fsInfo).setName(baseName);
707             builder.sameNameFlag = true;
708             setBaseName(builder);
709             return this;
710         }
711 
712         /**
713          * Sets the basename from the {@link #name} of the specified DocumentName.
714          * Will set the root the baseName has the root set.
715          * @param baseName the DocumentName to set the basename from.
716          * @return this.
717          */
718         public Builder setBaseName(final DocumentName baseName) {
719             this.baseName = baseName;
720             if (!baseName.getRoot().isEmpty()) {
721                 this.root = baseName.getRoot();
722             }
723             return this;
724         }
725 
726         /**
727          * Executes the builder, sets the base name and clears the sameName flag.
728          * @param builder the builder for the base name.
729          */
730         private void setBaseName(final DocumentName.Builder builder) {
731             this.baseName = builder.build();
732             this.sameNameFlag = false;
733         }
734 
735         /**
736          * Sets the basename from a File. Sets {@link #root} and the {@link #baseName}
737          * Will set the root.
738          * @param file the file to set the base name from.
739          * @return this.
740          */
741         public Builder setBaseName(final File file) {
742             DocumentName.Builder builder = DocumentName.builder(fsInfo).setName(file);
743             builder.sameNameFlag = true;
744             setBaseName(builder);
745             return this;
746         }
747 
748         /**
749          * Build a DocumentName from this builder.
750          * @return A new DocumentName.
751          */
752         public DocumentName build() {
753             verify();
754             return new DocumentName(this);
755         }
756     }
757 }