1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one *
3 * or more contributor license agreements. See the NOTICE file *
4 * distributed with this work for additional information *
5 * regarding copyright ownership. The ASF licenses this file *
6 * to you under the Apache License, Version 2.0 (the *
7 * "License"); you may not use this file except in compliance *
8 * with the License. You may obtain a copy of the License at *
9 * *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, *
13 * software distributed under the License is distributed on an *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
15 * KIND, either express or implied. See the License for the *
16 * specific language governing permissions and limitations *
17 * under the License. *
18 */
19 package org.apache.rat.api;
20
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.Reader;
24 import java.util.SortedSet;
25
26 import org.apache.rat.analysis.TikaProcessor;
27 import org.apache.rat.document.DocumentName;
28 import org.apache.rat.document.DocumentNameMatcher;
29 import org.apache.tika.parser.txt.CharsetDetector;
30
31 /**
32 * The representation of a document being scanned.
33 */
34 public abstract class Document implements Comparable<Document> {
35
36 /**
37 * An enumeration of document types.
38 */
39 public enum Type {
40 /** An archive type document. */
41 ARCHIVE,
42 /** A binary file. */
43 BINARY,
44 /** A generated document that is ignored. */
45 IGNORED,
46 /** A notice document (e.g. LICENSE file). */
47 NOTICE,
48 /** A standard document. */
49 STANDARD,
50 /** An unknown document type. */
51 UNKNOWN
52 }
53
54 /** The path matcher used by this document */
55 protected final DocumentNameMatcher nameMatcher;
56 /** The metadata for this document */
57 private final MetaData metaData;
58 /** The fully qualified name of this document */
59 protected final DocumentName name;
60
61 /**
62 * Creates an instance.
63 * @param name the native NameSet of the resource.
64 * @param nameMatcher the document name matcher to filter directories/files.
65 */
66 protected Document(final DocumentName name, final DocumentNameMatcher nameMatcher) {
67 this.name = name;
68 this.nameMatcher = nameMatcher;
69 this.metaData = new MetaData();
70 }
71
72 /**
73 * Gets the name of the current document.
74 * @return the name of the current document.
75 */
76 public final DocumentName getName() {
77 return name;
78 }
79
80 /**
81 * Gets the file filter this document was created with.
82 * @return the file filter this document was created with.
83 */
84 public final DocumentNameMatcher getNameMatcher() {
85 return nameMatcher;
86 }
87
88 @Override
89 public int compareTo(final Document doc) {
90 return name.compareTo(doc.name);
91 }
92
93 @Override
94 public int hashCode() {
95 return name.hashCode();
96 }
97
98 @Override
99 public boolean equals(final Object obj) {
100 if (!(obj instanceof Document)) {
101 return false;
102 }
103 return name.equals(((Document) obj).name);
104 }
105
106 /**
107 * Reads the contents of this document.
108 * @return <code>Reader</code> not null
109 * @throws IOException if this document cannot be read.
110 */
111 public Reader reader() throws IOException {
112 return new CharsetDetector().getReader(TikaProcessor.markSupportedInputStream(inputStream()), getMetaData().getCharset().name());
113 }
114
115 /**
116 * Streams the document's contents.
117 * @return a non-null input stream of the document.
118 * @throws IOException when stream could not be opened.
119 */
120 public abstract InputStream inputStream() throws IOException;
121
122 /**
123 * Gets data describing this resource.
124 * @return a non-null MetaData object.
125 */
126 public final MetaData getMetaData() {
127 return metaData;
128 }
129
130 /**
131 * Checks if document is ignored or not.
132 * @return {@code true} if the document is of type {@code IGNORED}.
133 */
134 public final boolean isIgnored() {
135 return Type.IGNORED == metaData.getDocumentType();
136 }
137
138 /**
139 * Representations suitable for logging.
140 * @return a <code>String</code> representation
141 * of this object.
142 */
143 @Override
144 public String toString() {
145 return String.format("%s( name = %s metaData = %s )", this.getClass().getSimpleName(), getName().localized(), getMetaData());
146 }
147
148 /**
149 * Determines if this document is a directory type.
150 * @return {@code true} if this is a directory.
151 */
152 public abstract boolean isDirectory();
153
154 /**
155 * Gets a sorted set of documents that are children of this document.
156 * @return A sorted set of child Documents. May be empty.
157 */
158 public abstract SortedSet<Document> listChildren();
159 }