View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  package org.apache.creadur.tentacles;
18  
19  import org.apache.http.Header;
20  import org.apache.http.HttpHeaders;
21  import org.apache.http.client.methods.CloseableHttpResponse;
22  import org.apache.http.client.methods.HttpGet;
23  import org.apache.http.client.methods.HttpHead;
24  import org.apache.http.client.methods.HttpUriRequest;
25  import org.apache.http.impl.client.CloseableHttpClient;
26  import org.apache.http.impl.client.HttpClientBuilder;
27  import org.apache.logging.log4j.*;
28  import org.codehaus.swizzle.stream.StreamLexer;
29  
30  import java.io.File;
31  import java.io.IOException;
32  import java.io.InputStream;
33  import java.net.URI;
34  import java.util.LinkedHashSet;
35  import java.util.Set;
36  
37  public class NexusClient {
38  
39      private static final Logger log = LogManager.getLogger(NexusClient.class);
40      private static final String SLASH = "/";
41      private static final String ONE_UP = "../";
42      private static final String USER_AGENT_CONTENTS = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13";
43  
44      private final CloseableHttpClient client;
45      private final FileSystem fileSystem;
46      private final IOSystem ioSystem;
47      private final int retries;
48  
49      public NexusClient(final Platform platform) {
50  
51          System.setProperty("http.keepAlive", "false");
52          System.setProperty("http.maxConnections", "50");
53  
54          this.retries = Integer.parseInt(System.getProperty("NexusClient.retries", "5"));
55  
56          this.client = HttpClientBuilder.create().disableContentCompression()
57                  .build();
58          this.fileSystem = platform.getFileSystem();
59          this.ioSystem = platform.getIoSystem();
60      }
61  
62      public File download(final URI uri, final File file) throws IOException {
63          if (file.exists()) {
64  
65              final long length = getContentLength(uri);
66  
67              if (file.length() == length) {
68                  log.info("Exists " + uri);
69                  return file;
70              } else {
71                  log.info("Incomplete " + uri);
72              }
73          }
74  
75          log.info("Download " + uri);
76  
77          final CloseableHttpResponse response = get(uri);
78  
79          InputStream content = null;
80          try {
81              content = response.getEntity().getContent();
82  
83              this.fileSystem.mkparent(file);
84  
85              this.ioSystem.copy(content, file);
86          } finally {
87              if (content != null) {
88                  content.close();
89              }
90  
91              response.close();
92          }
93  
94          return file;
95      }
96  
97      private Long getContentLength(final URI uri) throws IOException {
98          final CloseableHttpResponse head = head(uri);
99          final Header[] headers = head.getHeaders(HttpHeaders.CONTENT_LENGTH);
100 
101         if (headers != null && headers.length >= 1) {
102             return Long.valueOf(headers[0].getValue());
103         }
104 
105         head.close();
106 
107         return (long) -1;
108     }
109 
110     private CloseableHttpResponse get(final URI uri) throws IOException {
111         return get(new HttpGet(uri), this.retries);
112     }
113 
114     private CloseableHttpResponse head(final URI uri) throws IOException {
115         return get(new HttpHead(uri), this.retries);
116     }
117 
118     private CloseableHttpResponse get(final HttpUriRequest request, int tries) throws IOException {
119         try {
120             request.setHeader(HttpHeaders.USER_AGENT, USER_AGENT_CONTENTS);
121             return this.client.execute(request);
122         } catch (final IOException e) {
123             if (tries > 0) {
124                 try {
125                     Thread.sleep(250);
126                 } catch (final InterruptedException ie) {
127                     Thread.interrupted();
128                     throw new IOException("Interrupted", ie);
129                 }
130                 return get(request, tries--);
131             } else {
132                 throw e;
133             }
134         }
135     }
136 
137     public Set<URI> crawl(final URI index) throws IOException {
138         log.info("Crawl " + index);
139         final Set<URI> resources = new LinkedHashSet<URI>();
140 
141         final CloseableHttpResponse response = get(index);
142 
143         final InputStream content = response.getEntity().getContent();
144         final StreamLexer lexer = new StreamLexer(content);
145 
146         final Set<URI> crawl = new LinkedHashSet<URI>();
147 
148         // <a
149         // href="https://repository.apache.org/content/repositories/orgapacheopenejb-094/archetype-catalog.xml">archetype-catalog.xml</a>
150         while (lexer.readAndMark("<a ", "/a>")) {
151 
152             try {
153                 final String link = lexer.peek("href=\"", "\"");
154                 final String name = lexer.peek(">", "<");
155 
156                 final URI uri = index.resolve(link);
157 
158                 if (name.equals(ONE_UP)) {
159                     continue;
160                 }
161                 if (link.equals(ONE_UP)) {
162                     continue;
163                 }
164 
165                 if (name.endsWith(SLASH)) {
166                     crawl.add(uri);
167                     continue;
168                 }
169 
170                 resources.add(uri);
171 
172             } finally {
173                 lexer.unmark();
174             }
175         }
176 
177         content.close();
178         response.close();
179 
180         for (final URI uri : crawl) {
181             resources.addAll(crawl(uri));
182         }
183 
184         return resources;
185     }
186 }