View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   */
19  package org.apache.rat.api;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.Reader;
24  import java.util.SortedSet;
25  
26  import org.apache.rat.analysis.TikaProcessor;
27  import org.apache.rat.document.DocumentName;
28  import org.apache.rat.document.DocumentNameMatcher;
29  import org.apache.tika.parser.txt.CharsetDetector;
30  
31  /**
32   * The representation of a document being scanned.
33   */
34  public abstract class Document implements Comparable<Document> {
35  
36      /**
37       * An enumeration of document types.
38       */
39      public enum Type {
40          /** An archive type document. */
41          ARCHIVE,
42          /** A binary file. */
43          BINARY,
44          /** A generated document that is ignored. */
45          IGNORED,
46          /** A notice document (e.g. LICENSE file). */
47          NOTICE,
48          /** A standard document. */
49          STANDARD,
50          /** An unknown document type. */
51          UNKNOWN
52      }
53  
54      /** The path matcher used by this document */
55      protected final DocumentNameMatcher nameMatcher;
56      /** The metadata for this document */
57      private final MetaData metaData;
58      /** The fully qualified name of this document */
59      protected final DocumentName name;
60  
61      /**
62       * Creates an instance.
63       * @param name the native NameSet of the resource.
64       * @param nameMatcher the document name matcher to filter directories/files.
65       */
66      protected Document(final DocumentName name, final DocumentNameMatcher nameMatcher) {
67          this.name = name;
68          this.nameMatcher = nameMatcher;
69          this.metaData = new MetaData();
70      }
71  
72      /**
73       * Gets the name of the current document.
74       * @return the name of the current document.
75       */
76      public final DocumentName getName() {
77          return name;
78      }
79  
80      /**
81       * Gets the file filter this document was created with.
82       * @return the file filter this document was created with.
83       */
84      public final DocumentNameMatcher getNameMatcher() {
85          return nameMatcher;
86      }
87  
88      @Override
89      public int compareTo(final Document doc) {
90          return name.compareTo(doc.name);
91      }
92  
93      @Override
94      public int hashCode() {
95          return name.hashCode();
96      }
97  
98      @Override
99      public boolean equals(final Object obj) {
100         if (!(obj instanceof Document)) {
101             return false;
102         }
103         return name.equals(((Document) obj).name);
104     }
105 
106     /**
107      * Reads the contents of this document.
108      * @return <code>Reader</code> not null
109      * @throws IOException if this document cannot be read.
110      */
111     public Reader reader() throws IOException {
112         return new CharsetDetector().getReader(TikaProcessor.markSupportedInputStream(inputStream()), getMetaData().getCharset().name());
113     }
114 
115     /**
116      * Streams the document's contents.
117      * @return a non-null input stream of the document.
118      * @throws IOException when stream could not be opened.
119      */
120     public abstract InputStream inputStream() throws IOException;
121 
122     /**
123      * Gets data describing this resource.
124      * @return a non-null MetaData object.
125      */
126     public final MetaData getMetaData() {
127         return metaData;
128     }
129 
130     /**
131      * Checks if document is ignored or not.
132      * @return {@code true} if the document is of type {@code IGNORED}.
133      */
134     public final boolean isIgnored() {
135         return Type.IGNORED == metaData.getDocumentType();
136     }
137 
138     /**
139      * Representations suitable for logging.
140      * @return a <code>String</code> representation
141      * of this object.
142      */
143     @Override
144     public String toString() {
145         return String.format("%s( name = %s metaData = %s )", this.getClass().getSimpleName(), getName().localized(), getMetaData());
146     }
147 
148     /**
149      * Determines if this document is a directory type.
150      * @return {@code true} if this is a directory.
151      */
152     public abstract boolean isDirectory();
153 
154     /**
155      * Gets a sorted set of documents that are children of this document.
156      * @return A sorted set of child Documents. May be empty.
157      */
158     public abstract SortedSet<Document> listChildren();
159 }