View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   */
19  package org.apache.rat.document.impl.guesser;
20  
21  import org.apache.commons.io.IOUtils;
22  import org.apache.rat.document.MockDocument;
23  import org.apache.rat.document.impl.FileDocument;
24  import org.apache.rat.test.utils.Resources;
25  import org.junit.jupiter.api.Test;
26  
27  import java.io.IOException;
28  import java.io.Reader;
29  import java.util.Arrays;
30  import java.util.List;
31  
32  import static org.junit.jupiter.api.Assertions.assertEquals;
33  import static org.junit.jupiter.api.Assertions.assertFalse;
34  import static org.junit.jupiter.api.Assertions.assertTrue;
35  
36  public class BinaryGuesserTest {
37  
38      private static final List<String> BINARY_FILES = Arrays.asList(//
39              "image.png",//
40              "image.pdf",//
41              "image.psd",//
42              "image.gif",//
43              "image.giff",//
44              "image.jpg",//
45              "image.jpeg",//
46              "image.exe",//
47              "Whatever.class",//
48              "data.dat",//
49              "libicuda.so.34",//
50              "my.truststore",//
51              //"foo.Java", //
52              //"manifest.Mf",//
53              "deprecatedtechnology.swf",
54              "xyz.aif",
55              "abc.iff",
56              // Audio Files
57              "test.m3u", "test.m4a",
58              "test-audio.mid", "test-audio.mp3",
59              "test-audio.mpa", "test-audio.wav",
60              "test-audio.wma"
61      );
62  
63      @Test
64      public void testMatches() {
65          for (String name : BINARY_FILES) {
66              assertTrue(BinaryGuesser.isBinary(new MockDocument(name)), ()->"'" + name + "' should be detected as a binary");
67          }
68  
69      }
70  
71      @Test
72      public void testIsBinary() {
73          for (String name : BINARY_FILES) {
74              assertTrue(BinaryGuesser.isBinary(name), ()->"'" + name + "' should be detected as a binary");
75          }
76      }
77  
78      /**
79       * Used to swallow a MalformedInputException and return false
80       * because the encoding of the stream was different from the
81       * platform's default encoding.
82       *
83       * @throws Exception
84       * @see "RAT-81"
85       */
86      @Test
87      public void binaryWithMalformedInputRAT81() throws Exception {
88          FileDocument doc = new FileDocument(Resources.getResourceFile("/binaries/UTF16_with_signature.xml"));
89          Reader r = doc.reader(); // this will fail test if file is not readable
90          try {
91              char[] dummy = new char[100];
92              r.read(dummy);
93              // if we get here, the UTF-16 encoded file didn't throw
94              // any exception, try the UTF-8 encoded one
95              r.close();
96              r = null; // ensure we detect failure to read second file
97              doc = new FileDocument(Resources.getResourceFile("/binaries/UTF8_with_signature.xml"));
98              r = doc.reader();
99              r.read(dummy);
100             // still here?  can't test on this platform
101             System.err.println("Skipping testBinaryWithMalformedInput");
102         } catch (IOException e) {
103             if (r != null) {
104                 IOUtils.closeQuietly(r);
105             } else {
106                 throw e; // could not open the second file
107             }
108             r = null;
109             assertTrue(BinaryGuesser.isBinary(doc), "Expected binary for " + doc.getName());
110         } finally {
111             IOUtils.closeQuietly(r);
112         }
113     }
114 
115     @Test
116     public void realBinaryContent() throws IOException {
117         // This test is not accurate on all platforms
118         final String encoding = System.getProperty("file.encoding");
119         final boolean isBinary = BinaryGuesser.isBinary(new FileDocument(Resources.getResourceFile("/binaries/Image-png.not")));
120         if (encoding.startsWith("ANSI")) {
121             assertTrue(isBinary);
122         } else {
123             if (isBinary) {
124                 System.out.println("BinaryGuesserTest.realBinaryContent() succeeded when using encoding " + encoding);
125             } else {
126                 System.err.println("BinaryGuesserTest.realBinaryContent() failed when using encoding " + encoding);
127             }
128         }
129     }
130 
131     @Test
132     public void textualContent() throws IOException {
133         assertFalse(BinaryGuesser.isBinary(new FileDocument(Resources.getResourceFile("/elements/Text.txt"))));
134     }
135 
136     @Test
137     public void emptyFile() throws IOException {
138         assertFalse(BinaryGuesser.isBinary(new FileDocument(Resources.getResourceFile("/elements/sub/Empty.txt"))));
139     }
140 
141     @Test
142     public void testFileEncodingCanBeSetAndHasFallbackInCaseOfErrors() {
143         System.setProperty(BinaryGuesser.FILE_ENCODING, "shouldThrowAnExceptionBecauseNotFound");
144         assertEquals("UTF-8", BinaryGuesser.getFileEncodingOrUTF8AsFallback().displayName());
145 
146         final String usAscii = "US-ASCII";
147         System.setProperty(BinaryGuesser.FILE_ENCODING, usAscii);
148         assertEquals(usAscii, BinaryGuesser.getFileEncodingOrUTF8AsFallback().displayName());
149     }
150 }