f06e1a6212cd84bc365d04a3e2594b2c6c6463df
[bluesky.git] / libs3-1.4 / src / request.c
1 /** **************************************************************************
2  * request.c
3  * 
4  * Copyright 2008 Bryan Ischo <bryan@ischo.com>
5  * 
6  * This file is part of libs3.
7  * 
8  * libs3 is free software: you can redistribute it and/or modify it under the
9  * terms of the GNU General Public License as published by the Free Software
10  * Foundation, version 3 of the License.
11  *
12  * In addition, as a special exception, the copyright holders give
13  * permission to link the code of this library and its programs with the
14  * OpenSSL library, and distribute linked combinations including the two.
15  *
16  * libs3 is distributed in the hope that it will be useful, but WITHOUT ANY
17  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
19  * details.
20  *
21  * You should have received a copy of the GNU General Public License version 3
22  * along with libs3, in a file named COPYING.  If not, see
23  * <http://www.gnu.org/licenses/>.
24  *
25  ************************************************************************** **/
26
27 #include <ctype.h>
28 #include <pthread.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <sys/utsname.h>
32 #include "request.h"
33 #include "request_context.h"
34 #include "response_headers_handler.h"
35 #include "util.h"
36
37
38 #define USER_AGENT_SIZE 256
39 #define REQUEST_STACK_SIZE 32
40
41 static char userAgentG[USER_AGENT_SIZE];
42
43 static pthread_mutex_t requestStackMutexG;
44
45 static Request *requestStackG[REQUEST_STACK_SIZE];
46
47 static int requestStackCountG;
48
49
50 typedef struct RequestComputedValues
51 {
52     // All x-amz- headers, in normalized form (i.e. NAME: VALUE, no other ws)
53     char *amzHeaders[S3_MAX_METADATA_COUNT + 2]; // + 2 for acl and date
54
55     // The number of x-amz- headers
56     int amzHeadersCount;
57
58     // Storage for amzHeaders (the +256 is for x-amz-acl and x-amz-date)
59     char amzHeadersRaw[COMPACTED_METADATA_BUFFER_SIZE + 256 + 1];
60
61     // Canonicalized x-amz- headers
62     string_multibuffer(canonicalizedAmzHeaders,
63                        COMPACTED_METADATA_BUFFER_SIZE + 256 + 1);
64
65     // URL-Encoded key
66     char urlEncodedKey[MAX_URLENCODED_KEY_SIZE + 1];
67
68     // Canonicalized resource
69     char canonicalizedResource[MAX_CANONICALIZED_RESOURCE_SIZE + 1];
70
71     // Cache-Control header (or empty)
72     char cacheControlHeader[128];
73
74     // Content-Type header (or empty)
75     char contentTypeHeader[128];
76
77     // Content-MD5 header (or empty)
78     char md5Header[128];
79
80     // Content-Disposition header (or empty)
81     char contentDispositionHeader[128];
82
83     // Content-Encoding header (or empty)
84     char contentEncodingHeader[128];
85
86     // Expires header (or empty)
87     char expiresHeader[128];
88
89     // If-Modified-Since header
90     char ifModifiedSinceHeader[128];
91
92     // If-Unmodified-Since header
93     char ifUnmodifiedSinceHeader[128];
94
95     // If-Match header
96     char ifMatchHeader[128];
97
98     // If-None-Match header
99     char ifNoneMatchHeader[128];
100
101     // Range header
102     char rangeHeader[128];
103
104     // Authorization header
105     char authorizationHeader[128];
106 } RequestComputedValues;
107
108
109 // Called whenever we detect that the request headers have been completely
110 // processed; which happens either when we get our first read/write callback,
111 // or the request is finished being procesed.  Returns nonzero on success,
112 // zero on failure.
113 static void request_headers_done(Request *request)
114 {
115     if (request->propertiesCallbackMade) {
116         return;
117     }
118
119     request->propertiesCallbackMade = 1;
120
121     // Get the http response code
122     long httpResponseCode;
123     request->httpResponseCode = 0;
124     if (curl_easy_getinfo(request->curl, CURLINFO_RESPONSE_CODE, 
125                           &httpResponseCode) != CURLE_OK) {
126         // Not able to get the HTTP response code - error
127         request->status = S3StatusInternalError;
128         return;
129     }
130     else {
131         request->httpResponseCode = httpResponseCode;
132     }
133
134     response_headers_handler_done(&(request->responseHeadersHandler), 
135                                   request->curl);
136
137     // Only make the callback if it was a successful request; otherwise we're
138     // returning information about the error response itself
139     if (request->propertiesCallback &&
140         (request->httpResponseCode >= 200) &&
141         (request->httpResponseCode <= 299)) {
142         request->status = (*(request->propertiesCallback))
143             (&(request->responseHeadersHandler.responseProperties), 
144              request->callbackData);
145     }
146 }
147
148
149 static size_t curl_header_func(void *ptr, size_t size, size_t nmemb,
150                                void *data)
151 {
152     Request *request = (Request *) data;
153
154     int len = size * nmemb;
155
156     response_headers_handler_add
157         (&(request->responseHeadersHandler), (char *) ptr, len);
158
159     return len;
160 }
161
162
163 static size_t curl_read_func(void *ptr, size_t size, size_t nmemb, void *data)
164 {
165     Request *request = (Request *) data;
166
167     int len = size * nmemb;
168
169     request_headers_done(request);
170
171     if (request->status != S3StatusOK) {
172         return CURL_READFUNC_ABORT;
173     }
174
175     // If there is no data callback, or the data callback has already returned
176     // contentLength bytes, return 0;
177     if (!request->toS3Callback || !request->toS3CallbackBytesRemaining) {
178         return 0;
179     }
180     
181     // Don't tell the callback that we are willing to accept more data than we
182     // really are
183     if (len > request->toS3CallbackBytesRemaining) {
184         len = request->toS3CallbackBytesRemaining;
185     }
186
187     // Otherwise, make the data callback
188     int ret = (*(request->toS3Callback))
189         (len, (char *) ptr, request->callbackData);
190     if (ret < 0) {
191         request->status = S3StatusAbortedByCallback;
192         return CURL_READFUNC_ABORT;
193     }
194     else {
195         if (ret > request->toS3CallbackBytesRemaining) {
196             ret = request->toS3CallbackBytesRemaining;
197         }
198         request->toS3CallbackBytesRemaining -= ret;
199         return ret;
200     }
201 }
202
203
204 static size_t curl_write_func(void *ptr, size_t size, size_t nmemb,
205                               void *data)
206 {
207     Request *request = (Request *) data;
208
209     int len = size * nmemb;
210
211     request_headers_done(request);
212
213     if (request->status != S3StatusOK) {
214         return 0;
215     }
216
217     // On HTTP error, we expect to parse an HTTP error response
218     if ((request->httpResponseCode < 200) || 
219         (request->httpResponseCode > 299)) {
220         request->status = error_parser_add
221             (&(request->errorParser), (char *) ptr, len);
222     }
223     // If there was a callback registered, make it
224     else if (request->fromS3Callback) {
225         request->status = (*(request->fromS3Callback))
226             (len, (char *) ptr, request->callbackData);
227     }
228     // Else, consider this an error - S3 has sent back data when it was not
229     // expected
230     else {
231         request->status = S3StatusInternalError;
232     }
233
234     return ((request->status == S3StatusOK) ? len : 0);
235 }
236
237
238 // This function 'normalizes' all x-amz-meta headers provided in
239 // params->requestHeaders, which means it removes all whitespace from
240 // them such that they all look exactly like this:
241 // x-amz-meta-${NAME}: ${VALUE}
242 // It also adds the x-amz-acl, x-amz-copy-source, and x-amz-metadata-directive
243 // headers if necessary, and always adds the x-amz-date header.  It copies the
244 // raw string values into params->amzHeadersRaw, and creates an array of
245 // string pointers representing these headers in params->amzHeaders (and also
246 // sets params->amzHeadersCount to be the count of the total number of x-amz-
247 // headers thus created).
248 static S3Status compose_amz_headers(const RequestParams *params,
249                                     RequestComputedValues *values)
250 {
251     const S3PutProperties *properties = params->putProperties;
252
253     values->amzHeadersCount = 0;
254     values->amzHeadersRaw[0] = 0;
255     int len = 0;
256
257     // Append a header to amzHeaders, trimming whitespace from the end.
258     // Does NOT trim whitespace from the beginning.
259 #define headers_append(isNewHeader, format, ...)                        \
260     do {                                                                \
261         if (isNewHeader) {                                              \
262             values->amzHeaders[values->amzHeadersCount++] =             \
263                 &(values->amzHeadersRaw[len]);                          \
264         }                                                               \
265         len += snprintf(&(values->amzHeadersRaw[len]),                  \
266                         sizeof(values->amzHeadersRaw) - len,            \
267                         format, __VA_ARGS__);                           \
268         if (len >= (int) sizeof(values->amzHeadersRaw)) {               \
269             return S3StatusMetaDataHeadersTooLong;                      \
270         }                                                               \
271         while ((len > 0) && (values->amzHeadersRaw[len - 1] == ' ')) {  \
272             len--;                                                      \
273         }                                                               \
274         values->amzHeadersRaw[len++] = 0;                               \
275     } while (0)
276
277 #define header_name_tolower_copy(str, l)                                \
278     do {                                                                \
279         values->amzHeaders[values->amzHeadersCount++] =                 \
280             &(values->amzHeadersRaw[len]);                              \
281         if ((len + l) >= (int) sizeof(values->amzHeadersRaw)) {         \
282             return S3StatusMetaDataHeadersTooLong;                      \
283         }                                                               \
284         int todo = l;                                                   \
285         while (todo--) {                                                \
286             if ((*(str) >= 'A') && (*(str) <= 'Z')) {                   \
287                 values->amzHeadersRaw[len++] = 'a' + (*(str) - 'A');    \
288             }                                                           \
289             else {                                                      \
290                 values->amzHeadersRaw[len++] = *(str);                  \
291             }                                                           \
292             (str)++;                                                    \
293         }                                                               \
294     } while (0)
295
296     // Check and copy in the x-amz-meta headers
297     if (properties) {
298         int i;
299         for (i = 0; i < properties->metaDataCount; i++) {
300             const S3NameValue *property = &(properties->metaData[i]);
301             char headerName[S3_MAX_METADATA_SIZE - sizeof(": v")];
302             int l = snprintf(headerName, sizeof(headerName),
303                              S3_METADATA_HEADER_NAME_PREFIX "%s",
304                              property->name);
305             char *hn = headerName;
306             header_name_tolower_copy(hn, l);
307             // Copy in the value
308             headers_append(0, ": %s", property->value);
309         }
310
311         // Add the x-amz-acl header, if necessary
312         const char *cannedAclString;
313         switch (params->putProperties->cannedAcl) {
314         case S3CannedAclPrivate:
315             cannedAclString = 0;
316             break;
317         case S3CannedAclPublicRead:
318             cannedAclString = "public-read";
319             break;
320         case S3CannedAclPublicReadWrite:
321             cannedAclString = "public-read-write";
322             break;
323         default: // S3CannedAclAuthenticatedRead
324             cannedAclString = "authenticated-read";
325             break;
326         }
327         if (cannedAclString) {
328             headers_append(1, "x-amz-acl: %s", cannedAclString);
329         }
330     }
331
332     // Add the x-amz-date header
333     time_t now = time(NULL);
334     char date[64];
335     strftime(date, sizeof(date), "%a, %d %b %Y %H:%M:%S GMT", gmtime(&now));
336     headers_append(1, "x-amz-date: %s", date);
337
338     if (params->httpRequestType == HttpRequestTypeCOPY) {
339         // Add the x-amz-copy-source header
340         if (params->copySourceBucketName && params->copySourceBucketName[0] &&
341             params->copySourceKey && params->copySourceKey[0]) {
342             headers_append(1, "x-amz-copy-source: /%s/%s",
343                            params->copySourceBucketName,
344                            params->copySourceKey);
345         }
346         // And the x-amz-metadata-directive header
347         if (params->putProperties) {
348             headers_append(1, "%s", "x-amz-metadata-directive: REPLACE");
349         }
350     }
351
352     return S3StatusOK;
353 }
354
355
356 // Composes the other headers
357 static S3Status compose_standard_headers(const RequestParams *params,
358                                          RequestComputedValues *values)
359 {
360
361 #define do_put_header(fmt, sourceField, destField, badError, tooLongError)  \
362     do {                                                                    \
363         if (params->putProperties &&                                        \
364             params->putProperties-> sourceField &&                          \
365             params->putProperties-> sourceField[0]) {                       \
366             /* Skip whitespace at beginning of val */                       \
367             const char *val = params->putProperties-> sourceField;          \
368             while (*val && is_blank(*val)) {                                \
369                 val++;                                                      \
370             }                                                               \
371             if (!*val) {                                                    \
372                 return badError;                                            \
373             }                                                               \
374             /* Compose header, make sure it all fit */                      \
375             int len = snprintf(values-> destField,                          \
376                                sizeof(values-> destField), fmt, val);       \
377             if (len >= (int) sizeof(values-> destField)) {                  \
378                 return tooLongError;                                        \
379             }                                                               \
380             /* Now remove the whitespace at the end */                      \
381             while (is_blank(values-> destField[len])) {                     \
382                 len--;                                                      \
383             }                                                               \
384             values-> destField[len] = 0;                                    \
385         }                                                                   \
386         else {                                                              \
387             values-> destField[0] = 0;                                      \
388         }                                                                   \
389     } while (0)
390
391 #define do_get_header(fmt, sourceField, destField, badError, tooLongError)  \
392     do {                                                                    \
393         if (params->getConditions &&                                        \
394             params->getConditions-> sourceField &&                          \
395             params->getConditions-> sourceField[0]) {                       \
396             /* Skip whitespace at beginning of val */                       \
397             const char *val = params->getConditions-> sourceField;          \
398             while (*val && is_blank(*val)) {                                \
399                 val++;                                                      \
400             }                                                               \
401             if (!*val) {                                                    \
402                 return badError;                                            \
403             }                                                               \
404             /* Compose header, make sure it all fit */                      \
405             int len = snprintf(values-> destField,                          \
406                                sizeof(values-> destField), fmt, val);       \
407             if (len >= (int) sizeof(values-> destField)) {                  \
408                 return tooLongError;                                        \
409             }                                                               \
410             /* Now remove the whitespace at the end */                      \
411             while (is_blank(values-> destField[len])) {                     \
412                 len--;                                                      \
413             }                                                               \
414             values-> destField[len] = 0;                                    \
415         }                                                                   \
416         else {                                                              \
417             values-> destField[0] = 0;                                      \
418         }                                                                   \
419     } while (0)
420
421     // Cache-Control
422     do_put_header("Cache-Control: %s", cacheControl, cacheControlHeader,
423                   S3StatusBadCacheControl, S3StatusCacheControlTooLong);
424     
425     // ContentType
426     do_put_header("Content-Type: %s", contentType, contentTypeHeader,
427                   S3StatusBadContentType, S3StatusContentTypeTooLong);
428
429     // MD5
430     do_put_header("Content-MD5: %s", md5, md5Header, S3StatusBadMD5,
431                   S3StatusMD5TooLong);
432
433     // Content-Disposition
434     do_put_header("Content-Disposition: attachment; filename=\"%s\"",
435                   contentDispositionFilename, contentDispositionHeader,
436                   S3StatusBadContentDispositionFilename,
437                   S3StatusContentDispositionFilenameTooLong);
438     
439     // ContentEncoding
440     do_put_header("Content-Encoding: %s", contentEncoding, 
441                   contentEncodingHeader, S3StatusBadContentEncoding,
442                   S3StatusContentEncodingTooLong);
443     
444     // Expires
445     if (params->putProperties && (params->putProperties->expires >= 0)) {
446         time_t t = (time_t) params->putProperties->expires;
447         strftime(values->expiresHeader, sizeof(values->expiresHeader),
448                  "Expires: %a, %d %b %Y %H:%M:%S UTC", gmtime(&t));
449     }
450     else {
451         values->expiresHeader[0] = 0;
452     }
453
454     // If-Modified-Since
455     if (params->getConditions &&
456         (params->getConditions->ifModifiedSince >= 0)) {
457         time_t t = (time_t) params->getConditions->ifModifiedSince;
458         strftime(values->ifModifiedSinceHeader,
459                  sizeof(values->ifModifiedSinceHeader),
460                  "If-Modified-Since: %a, %d %b %Y %H:%M:%S UTC", gmtime(&t));
461     }
462     else {
463         values->ifModifiedSinceHeader[0] = 0;
464     }
465
466     // If-Unmodified-Since header
467     if (params->getConditions &&
468         (params->getConditions->ifNotModifiedSince >= 0)) {
469         time_t t = (time_t) params->getConditions->ifNotModifiedSince;
470         strftime(values->ifUnmodifiedSinceHeader,
471                  sizeof(values->ifUnmodifiedSinceHeader),
472                  "If-Unmodified-Since: %a, %d %b %Y %H:%M:%S UTC", gmtime(&t));
473     }
474     else {
475         values->ifUnmodifiedSinceHeader[0] = 0;
476     }
477     
478     // If-Match header
479     do_get_header("If-Match: %s", ifMatchETag, ifMatchHeader,
480                   S3StatusBadIfMatchETag, S3StatusIfMatchETagTooLong);
481     
482     // If-None-Match header
483     do_get_header("If-None-Match: %s", ifNotMatchETag, ifNoneMatchHeader,
484                   S3StatusBadIfNotMatchETag, 
485                   S3StatusIfNotMatchETagTooLong);
486     
487     // Range header
488     if (params->startByte || params->byteCount) {
489         if (params->byteCount) {
490             snprintf(values->rangeHeader, sizeof(values->rangeHeader),
491                      "Range: bytes=%llu-%llu", 
492                      (unsigned long long) params->startByte,
493                      (unsigned long long) (params->startByte + 
494                                            params->byteCount - 1));
495         }
496         else {
497             snprintf(values->rangeHeader, sizeof(values->rangeHeader),
498                      "Range: bytes=%llu-", 
499                      (unsigned long long) params->startByte);
500         }
501     }
502     else {
503         values->rangeHeader[0] = 0;
504     }
505
506     return S3StatusOK;
507 }
508
509
510 // URL encodes the params->key value into params->urlEncodedKey
511 static S3Status encode_key(const RequestParams *params,
512                            RequestComputedValues *values)
513 {
514     return (urlEncode(values->urlEncodedKey, params->key, S3_MAX_KEY_SIZE) ?
515             S3StatusOK : S3StatusUriTooLong);
516 }
517
518
519 // Simple comparison function for comparing two HTTP header names that are
520 // embedded within an HTTP header line, returning true if header1 comes
521 // before header2 alphabetically, false if not
522 static int headerle(const char *header1, const char *header2)
523 {
524     while (1) {
525         if (*header1 == ':') {
526             return (*header2 == ':');
527         }
528         else if (*header2 == ':') {
529             return 0;
530         }
531         else if (*header2 < *header1) {
532             return 0;
533         }
534         else if (*header2 > *header1) {
535             return 1;
536         }
537         header1++, header2++;
538     }
539 }
540
541
542 // Replace this with merge sort eventually, it's the best stable sort.  But
543 // since typically the number of elements being sorted is small, it doesn't
544 // matter that much which sort is used, and gnome sort is the world's simplest
545 // stable sort.  Added a slight twist to the standard gnome_sort - don't go
546 // forward +1, go forward to the last highest index considered.  This saves
547 // all the string comparisons that would be done "going forward", and thus
548 // only does the necessary string comparisons to move values back into their
549 // sorted position.
550 static void header_gnome_sort(const char **headers, int size)
551 {
552     int i = 0, last_highest = 0;
553
554     while (i < size) {
555         if ((i == 0) || headerle(headers[i - 1], headers[i])) {
556             i = ++last_highest;
557         }
558         else {
559             const char *tmp = headers[i];
560             headers[i] = headers[i - 1];
561             headers[--i] = tmp;
562         }
563     }
564 }
565
566
567 // Canonicalizes the x-amz- headers into the canonicalizedAmzHeaders buffer
568 static void canonicalize_amz_headers(RequestComputedValues *values)
569 {
570     // Make a copy of the headers that will be sorted
571     const char *sortedHeaders[S3_MAX_METADATA_COUNT];
572
573     memcpy(sortedHeaders, values->amzHeaders,
574            (values->amzHeadersCount * sizeof(sortedHeaders[0])));
575
576     // Now sort these
577     header_gnome_sort(sortedHeaders, values->amzHeadersCount);
578
579     // Now copy this sorted list into the buffer, all the while:
580     // - folding repeated headers into single lines, and
581     // - folding multiple lines
582     // - removing the space after the colon
583     int lastHeaderLen = 0, i;
584     char *buffer = values->canonicalizedAmzHeaders;
585     for (i = 0; i < values->amzHeadersCount; i++) {
586         const char *header = sortedHeaders[i];
587         const char *c = header;
588         // If the header names are the same, append the next value
589         if ((i > 0) && 
590             !strncmp(header, sortedHeaders[i - 1], lastHeaderLen)) {
591             // Replacing the previous newline with a comma
592             *(buffer - 1) = ',';
593             // Skip the header name and space
594             c += (lastHeaderLen + 1);
595         }
596         // Else this is a new header
597         else {
598             // Copy in everything up to the space in the ": "
599             while (*c != ' ') {
600                 *buffer++ = *c++;
601             }
602             // Save the header len since it's a new header
603             lastHeaderLen = c - header;
604             // Skip the space
605             c++;
606         }
607         // Now copy in the value, folding the lines
608         while (*c) {
609             // If c points to a \r\n[whitespace] sequence, then fold
610             // this newline out
611             if ((*c == '\r') && (*(c + 1) == '\n') && is_blank(*(c + 2))) {
612                 c += 3;
613                 while (is_blank(*c)) {
614                     c++;
615                 }
616                 // Also, what has most recently been copied into buffer amy
617                 // have been whitespace, and since we're folding whitespace
618                 // out around this newline sequence, back buffer up over
619                 // any whitespace it contains
620                 while (is_blank(*(buffer - 1))) {
621                     buffer--;
622                 }
623                 continue;
624             }
625             *buffer++ = *c++;
626         }
627         // Finally, add the newline
628         *buffer++ = '\n';
629     }
630
631     // Terminate the buffer
632     *buffer = 0;
633 }
634
635
636 // Canonicalizes the resource into params->canonicalizedResource
637 static void canonicalize_resource(const char *bucketName,
638                                   const char *subResource,
639                                   const char *urlEncodedKey,
640                                   char *buffer)
641 {
642     int len = 0;
643
644     *buffer = 0;
645
646 #define append(str) len += sprintf(&(buffer[len]), "%s", str)
647
648     if (bucketName && bucketName[0]) {
649         buffer[len++] = '/';
650         append(bucketName);
651     }
652
653     append("/");
654
655     if (urlEncodedKey && urlEncodedKey[0]) {
656         append(urlEncodedKey);
657     }
658
659     if (subResource && subResource[0]) {
660         append("?");
661         append(subResource);
662     }
663 }
664
665
666 // Convert an HttpRequestType to an HTTP Verb string
667 static const char *http_request_type_to_verb(HttpRequestType requestType)
668 {
669     switch (requestType) {
670     case HttpRequestTypeGET:
671         return "GET";
672     case HttpRequestTypeHEAD:
673         return "HEAD";
674     case HttpRequestTypePUT:
675     case HttpRequestTypeCOPY:
676         return "PUT";
677     default: // HttpRequestTypeDELETE
678         return "DELETE";
679     }
680 }
681
682
683 // Composes the Authorization header for the request
684 static S3Status compose_auth_header(const RequestParams *params,
685                                     RequestComputedValues *values)
686 {
687     // We allow for:
688     // 17 bytes for HTTP-Verb + \n
689     // 129 bytes for Content-MD5 + \n
690     // 129 bytes for Content-Type + \n
691     // 1 byte for empty Date + \n
692     // CanonicalizedAmzHeaders & CanonicalizedResource
693     char signbuf[17 + 129 + 129 + 1 + 
694                  (sizeof(values->canonicalizedAmzHeaders) - 1) +
695                  (sizeof(values->canonicalizedResource) - 1) + 1];
696     int len = 0;
697
698 #define signbuf_append(format, ...)                             \
699     len += snprintf(&(signbuf[len]), sizeof(signbuf) - len,     \
700                     format, __VA_ARGS__)
701
702     signbuf_append
703         ("%s\n", http_request_type_to_verb(params->httpRequestType));
704
705     // For MD5 and Content-Type, use the value in the actual header, because
706     // it's already been trimmed
707     signbuf_append("%s\n", values->md5Header[0] ? 
708                    &(values->md5Header[sizeof("Content-MD5: ") - 1]) : "");
709
710     signbuf_append
711         ("%s\n", values->contentTypeHeader[0] ? 
712          &(values->contentTypeHeader[sizeof("Content-Type: ") - 1]) : "");
713
714     signbuf_append("%s", "\n"); // Date - we always use x-amz-date
715
716     signbuf_append("%s", values->canonicalizedAmzHeaders);
717
718     signbuf_append("%s", values->canonicalizedResource);
719
720     // Generate an HMAC-SHA-1 of the signbuf
721     unsigned char hmac[20];
722
723     HMAC_SHA1(hmac, (unsigned char *) params->bucketContext.secretAccessKey,
724               strlen(params->bucketContext.secretAccessKey),
725               (unsigned char *) signbuf, len);
726
727     // Now base-64 encode the results
728     char b64[((20 + 1) * 4) / 3];
729     int b64Len = base64Encode(hmac, 20, b64);
730     
731     snprintf(values->authorizationHeader, sizeof(values->authorizationHeader),
732              "Authorization: AWS %s:%.*s", params->bucketContext.accessKeyId,
733              b64Len, b64);
734
735     return S3StatusOK;
736 }
737
738
739 // Compose the URI to use for the request given the request parameters
740 static S3Status compose_uri(char *buffer, int bufferSize,
741                             const S3BucketContext *bucketContext,
742                             const char *urlEncodedKey,
743                             const char *subResource, const char *queryParams)
744 {
745     int len = 0;
746     
747 #define uri_append(fmt, ...)                                                 \
748     do {                                                                     \
749         len += snprintf(&(buffer[len]), bufferSize - len, fmt, __VA_ARGS__); \
750         if (len >= bufferSize) {                                             \
751             return S3StatusUriTooLong;                                       \
752         }                                                                    \
753     } while (0)
754
755     uri_append("http%s://", 
756                (bucketContext->protocol == S3ProtocolHTTP) ? "" : "s");
757
758     if (bucketContext->bucketName && 
759         bucketContext->bucketName[0]) {
760         if (bucketContext->uriStyle == S3UriStyleVirtualHost) {
761             uri_append("%s.s3.amazonaws.com", bucketContext->bucketName);
762         }
763         else {
764             uri_append("s3.amazonaws.com/%s", bucketContext->bucketName);
765         }
766     }
767     else {
768         uri_append("%s", "s3.amazonaws.com");
769     }
770
771     uri_append("%s", "/");
772
773     uri_append("%s", urlEncodedKey);
774     
775     if (subResource && subResource[0]) {
776         uri_append("?%s", subResource);
777     }
778     
779     if (queryParams) {
780         uri_append("%s%s", (subResource && subResource[0]) ? "&" : "?",
781                    queryParams);
782     }
783     
784     return S3StatusOK;
785 }
786
787
788 // Sets up the curl handle given the completely computed RequestParams
789 static S3Status setup_curl(Request *request,
790                            const RequestParams *params,
791                            const RequestComputedValues *values)
792 {
793     CURLcode status;
794
795 #define curl_easy_setopt_safe(opt, val)                                 \
796     if ((status = curl_easy_setopt                                      \
797          (request->curl, opt, val)) != CURLE_OK) {                      \
798         return S3StatusFailedToInitializeRequest;                       \
799     }
800
801     // Debugging only
802     // curl_easy_setopt_safe(CURLOPT_VERBOSE, 1);
803     
804     // Set private data to request for the benefit of S3RequestContext
805     curl_easy_setopt_safe(CURLOPT_PRIVATE, request);
806     
807     // Set header callback and data
808     curl_easy_setopt_safe(CURLOPT_HEADERDATA, request);
809     curl_easy_setopt_safe(CURLOPT_HEADERFUNCTION, &curl_header_func);
810     
811     // Set read callback, data, and readSize
812     curl_easy_setopt_safe(CURLOPT_READFUNCTION, &curl_read_func);
813     curl_easy_setopt_safe(CURLOPT_READDATA, request);
814     
815     // Set write callback and data
816     curl_easy_setopt_safe(CURLOPT_WRITEFUNCTION, &curl_write_func);
817     curl_easy_setopt_safe(CURLOPT_WRITEDATA, request);
818
819     // Ask curl to parse the Last-Modified header.  This is easier than
820     // parsing it ourselves.
821     curl_easy_setopt_safe(CURLOPT_FILETIME, 1);
822
823     // Curl docs suggest that this is necessary for multithreaded code.
824     // However, it also points out that DNS timeouts will not be honored
825     // during DNS lookup, which can be worked around by using the c-ares
826     // library, which we do not do yet.
827     curl_easy_setopt_safe(CURLOPT_NOSIGNAL, 1);
828
829     // Turn off Curl's built-in progress meter
830     curl_easy_setopt_safe(CURLOPT_NOPROGRESS, 1);
831
832     // xxx todo - support setting the proxy for Curl to use (can't use https
833     // for proxies though)
834
835     // xxx todo - support setting the network interface for Curl to use
836
837     // I think this is useful - we don't need interactive performance, we need
838     // to complete large operations quickly
839     curl_easy_setopt_safe(CURLOPT_TCP_NODELAY, 1);
840     
841     // Don't use Curl's 'netrc' feature
842     curl_easy_setopt_safe(CURLOPT_NETRC, CURL_NETRC_IGNORED);
843
844     // Don't verify S3's certificate, there are known to be issues with
845     // them sometimes
846     // xxx todo - support an option for verifying the S3 CA (default false)
847     curl_easy_setopt_safe(CURLOPT_SSL_VERIFYPEER, 0);
848
849     // Follow any redirection directives that S3 sends
850     curl_easy_setopt_safe(CURLOPT_FOLLOWLOCATION, 1);
851
852     // A safety valve in case S3 goes bananas with redirects
853     curl_easy_setopt_safe(CURLOPT_MAXREDIRS, 10);
854
855     // Set the User-Agent; maybe Amazon will track these?
856     curl_easy_setopt_safe(CURLOPT_USERAGENT, userAgentG);
857
858     // Set the low speed limit and time; we abort transfers that stay at
859     // less than 1K per second for more than 15 seconds.
860     // xxx todo - make these configurable
861     // xxx todo - allow configurable max send and receive speed
862     curl_easy_setopt_safe(CURLOPT_LOW_SPEED_LIMIT, 1024);
863     curl_easy_setopt_safe(CURLOPT_LOW_SPEED_TIME, 15);
864
865     // Append standard headers
866 #define append_standard_header(fieldName)                               \
867     if (values-> fieldName [0]) {                                       \
868         request->headers = curl_slist_append(request->headers,          \
869                                              values-> fieldName);       \
870     }
871
872     // Would use CURLOPT_INFILESIZE_LARGE, but it is buggy in libcurl
873     if (params->httpRequestType == HttpRequestTypePUT) {
874         char header[256];
875         snprintf(header, sizeof(header), "Content-Length: %llu",
876                  (unsigned long long) params->toS3CallbackTotalSize);
877         request->headers = curl_slist_append(request->headers, header);
878         request->headers = curl_slist_append(request->headers, 
879                                              "Transfer-Encoding:");
880     }
881     else if (params->httpRequestType == HttpRequestTypeCOPY) {
882         request->headers = curl_slist_append(request->headers, 
883                                              "Transfer-Encoding:");
884     }
885     
886     append_standard_header(cacheControlHeader);
887     append_standard_header(contentTypeHeader);
888     append_standard_header(md5Header);
889     append_standard_header(contentDispositionHeader);
890     append_standard_header(contentEncodingHeader);
891     append_standard_header(expiresHeader);
892     append_standard_header(ifModifiedSinceHeader);
893     append_standard_header(ifUnmodifiedSinceHeader);
894     append_standard_header(ifMatchHeader);
895     append_standard_header(ifNoneMatchHeader);
896     append_standard_header(rangeHeader);
897     append_standard_header(authorizationHeader);
898
899     // Append x-amz- headers
900     int i;
901     for (i = 0; i < values->amzHeadersCount; i++) {
902         request->headers = 
903             curl_slist_append(request->headers, values->amzHeaders[i]);
904     }
905
906     // Set the HTTP headers
907     curl_easy_setopt_safe(CURLOPT_HTTPHEADER, request->headers);
908
909     // Set URI
910     curl_easy_setopt_safe(CURLOPT_URL, request->uri);
911
912     // Set request type.
913     switch (params->httpRequestType) {
914     case HttpRequestTypeHEAD:
915     curl_easy_setopt_safe(CURLOPT_NOBODY, 1);
916         break;
917     case HttpRequestTypePUT:
918     case HttpRequestTypeCOPY:
919         curl_easy_setopt_safe(CURLOPT_UPLOAD, 1);
920         break;
921     case HttpRequestTypeDELETE:
922     curl_easy_setopt_safe(CURLOPT_CUSTOMREQUEST, "DELETE");
923         break;
924     default: // HttpRequestTypeGET
925         break;
926     }
927     
928     return S3StatusOK;
929 }
930
931
932 static void request_deinitialize(Request *request)
933 {
934     if (request->headers) {
935         curl_slist_free_all(request->headers);
936     }
937     
938     error_parser_deinitialize(&(request->errorParser));
939
940     // curl_easy_reset prevents connections from being re-used for some
941     // reason.  This makes HTTP Keep-Alive meaningless and is very bad for
942     // performance.  But it is necessary to allow curl to work properly.
943     // xxx todo figure out why
944     curl_easy_reset(request->curl);
945 }
946
947
948 static S3Status request_get(const RequestParams *params, 
949                             const RequestComputedValues *values,
950                             Request **reqReturn)
951 {
952     Request *request = 0;
953     
954     // Try to get one from the request stack.  We hold the lock for the
955     // shortest time possible here.
956     pthread_mutex_lock(&requestStackMutexG);
957
958     if (requestStackCountG) {
959         request = requestStackG[--requestStackCountG];
960     }
961     
962     pthread_mutex_unlock(&requestStackMutexG);
963
964     // If we got one, deinitialize it for re-use
965     if (request) {
966         request_deinitialize(request);
967     }
968     // Else there wasn't one available in the request stack, so create one
969     else {
970         if (!(request = (Request *) malloc(sizeof(Request)))) {
971             return S3StatusOutOfMemory;
972         }
973         if (!(request->curl = curl_easy_init())) {
974             free(request);
975             return S3StatusFailedToInitializeRequest;
976         }
977     }
978
979     // Initialize the request
980     request->prev = 0;
981     request->next = 0;
982
983     // Request status is initialized to no error, will be updated whenever
984     // an error occurs
985     request->status = S3StatusOK;
986
987     S3Status status;
988                         
989     // Start out with no headers
990     request->headers = 0;
991
992     // Compute the URL
993     if ((status = compose_uri
994          (request->uri, sizeof(request->uri), 
995           &(params->bucketContext), values->urlEncodedKey,
996           params->subResource, params->queryParams)) != S3StatusOK) {
997         curl_easy_cleanup(request->curl);
998         free(request);
999         return status;
1000     }
1001
1002     // Set all of the curl handle options
1003     if ((status = setup_curl(request, params, values)) != S3StatusOK) {
1004         curl_easy_cleanup(request->curl);
1005         free(request);
1006         return status;
1007     }
1008
1009     request->propertiesCallback = params->propertiesCallback;
1010
1011     request->toS3Callback = params->toS3Callback;
1012
1013     request->toS3CallbackBytesRemaining = params->toS3CallbackTotalSize;
1014
1015     request->fromS3Callback = params->fromS3Callback;
1016
1017     request->completeCallback = params->completeCallback;
1018
1019     request->callbackData = params->callbackData;
1020
1021     response_headers_handler_initialize(&(request->responseHeadersHandler));
1022
1023     request->propertiesCallbackMade = 0;
1024     
1025     error_parser_initialize(&(request->errorParser));
1026
1027     *reqReturn = request;
1028     
1029     return S3StatusOK;
1030 }
1031
1032
1033 static void request_destroy(Request *request)
1034 {
1035     request_deinitialize(request);
1036     curl_easy_cleanup(request->curl);
1037     free(request);
1038 }
1039
1040
1041 static void request_release(Request *request)
1042 {
1043     pthread_mutex_lock(&requestStackMutexG);
1044
1045     // If the request stack is full, destroy this one
1046     if (requestStackCountG == REQUEST_STACK_SIZE) {
1047         pthread_mutex_unlock(&requestStackMutexG);
1048         request_destroy(request);
1049     }
1050     // Else put this one at the front of the request stack; we do this because
1051     // we want the most-recently-used curl handle to be re-used on the next
1052     // request, to maximize our chances of re-using a TCP connection before it
1053     // times out
1054     else {
1055         requestStackG[requestStackCountG++] = request;
1056         pthread_mutex_unlock(&requestStackMutexG);
1057     }
1058 }
1059
1060
1061 S3Status request_api_initialize(const char *userAgentInfo, int flags)
1062 {
1063     if (curl_global_init(CURL_GLOBAL_ALL & 
1064                          ~((flags & S3_INIT_WINSOCK) ? 0 : CURL_GLOBAL_WIN32))
1065         != CURLE_OK) {
1066         return S3StatusInternalError;
1067     }
1068
1069     pthread_mutex_init(&requestStackMutexG, 0);
1070
1071     requestStackCountG = 0;
1072
1073     if (!userAgentInfo || !*userAgentInfo) {
1074         userAgentInfo = "Unknown";
1075     }
1076
1077     char platform[96];
1078     struct utsname utsn;
1079     if (uname(&utsn)) {
1080         strncpy(platform, "Unknown", sizeof(platform));
1081         // Because strncpy doesn't always zero terminate
1082         platform[sizeof(platform) - 1] = 0;
1083     }
1084     else {
1085         snprintf(platform, sizeof(platform), "%s%s%s", utsn.sysname, 
1086                  utsn.machine[0] ? " " : "", utsn.machine);
1087     }
1088
1089     snprintf(userAgentG, sizeof(userAgentG), 
1090              "Mozilla/4.0 (Compatible; %s; libs3 %s.%s; %s)",
1091              userAgentInfo, LIBS3_VER_MAJOR, LIBS3_VER_MINOR, platform);
1092     
1093     return S3StatusOK;
1094 }
1095
1096
1097 void request_api_deinitialize()
1098 {
1099     pthread_mutex_destroy(&requestStackMutexG);
1100
1101     while (requestStackCountG--) {
1102         request_destroy(requestStackG[requestStackCountG]);
1103     }
1104 }
1105
1106
1107 void request_perform(const RequestParams *params, S3RequestContext *context)
1108 {
1109     Request *request;
1110     S3Status status;
1111
1112 #define return_status(status)                                           \
1113     (*(params->completeCallback))(status, 0, params->callbackData);     \
1114     return
1115
1116     // These will hold the computed values
1117     RequestComputedValues computed;
1118
1119     // Validate the bucket name
1120     if (params->bucketContext.bucketName && 
1121         ((status = S3_validate_bucket_name
1122           (params->bucketContext.bucketName, 
1123            params->bucketContext.uriStyle)) != S3StatusOK)) {
1124         return_status(status);
1125     }
1126
1127     // Compose the amz headers
1128     if ((status = compose_amz_headers(params, &computed)) != S3StatusOK) {
1129         return_status(status);
1130     }
1131
1132     // Compose standard headers
1133     if ((status = compose_standard_headers
1134          (params, &computed)) != S3StatusOK) {
1135         return_status(status);
1136     }
1137
1138     // URL encode the key
1139     if ((status = encode_key(params, &computed)) != S3StatusOK) {
1140         return_status(status);
1141     }
1142
1143     // Compute the canonicalized amz headers
1144     canonicalize_amz_headers(&computed);
1145
1146     // Compute the canonicalized resource
1147     canonicalize_resource(params->bucketContext.bucketName,
1148                           params->subResource, computed.urlEncodedKey,
1149                           computed.canonicalizedResource);
1150
1151     // Compose Authorization header
1152     if ((status = compose_auth_header(params, &computed)) != S3StatusOK) {
1153         return_status(status);
1154     }
1155     
1156     // Get an initialized Request structure now
1157     if ((status = request_get(params, &computed, &request)) != S3StatusOK) {
1158         return_status(status);
1159     }
1160
1161     // If a RequestContext was provided, add the request to the curl multi
1162     if (context) {
1163         CURLMcode code = curl_multi_add_handle(context->curlm, request->curl);
1164         if (code == CURLM_OK) {
1165             if (context->requests) {
1166                 request->prev = context->requests->prev;
1167                 request->next = context->requests;
1168                 context->requests->prev->next = request;
1169                 context->requests->prev = request;
1170             }
1171             else {
1172                 context->requests = request->next = request->prev = request;
1173             }
1174         }
1175         else {
1176             if (request->status == S3StatusOK) {
1177                 request->status = (code == CURLM_OUT_OF_MEMORY) ?
1178                     S3StatusOutOfMemory : S3StatusInternalError;
1179             }
1180             request_finish(request);
1181         }
1182     }
1183     // Else, perform the request immediately
1184     else {
1185         CURLcode code = curl_easy_perform(request->curl);
1186         if ((code != CURLE_OK) && (request->status == S3StatusOK)) {
1187             request->status = request_curl_code_to_status(code);
1188         }
1189         // Finish the request, ensuring that all callbacks have been made, and
1190         // also releases the request
1191         request_finish(request);
1192     }
1193 }
1194
1195
1196 void request_finish(Request *request)
1197 {
1198     // If we haven't detected this already, we now know that the headers are
1199     // definitely done being read in
1200     request_headers_done(request);
1201     
1202     // If there was no error processing the request, then possibly there was
1203     // an S3 error parsed, which should be converted into the request status
1204     if (request->status == S3StatusOK) {
1205         error_parser_convert_status(&(request->errorParser), 
1206                                     &(request->status));
1207         // If there still was no error recorded, then it is possible that
1208         // there was in fact an error but that there was no error XML
1209         // detailing the error
1210         if ((request->status == S3StatusOK) &&
1211             ((request->httpResponseCode < 200) ||
1212              (request->httpResponseCode > 299))) {
1213             switch (request->httpResponseCode) {
1214             case 0:
1215                 // This happens if the request never got any HTTP response
1216                 // headers at all, we call this a ConnectionFailed error
1217                 request->status = S3StatusConnectionFailed;
1218                 break;
1219             case 100: // Some versions of libcurl erroneously set HTTP
1220                       // status to this
1221                 break;
1222             case 301:
1223                 request->status = S3StatusErrorPermanentRedirect;
1224                 break;
1225             case 307:
1226                 request->status = S3StatusHttpErrorMovedTemporarily;
1227                 break;
1228             case 400:
1229                 request->status = S3StatusHttpErrorBadRequest;
1230                 break;
1231             case 403: 
1232                 request->status = S3StatusHttpErrorForbidden;
1233                 break;
1234             case 404:
1235                 request->status = S3StatusHttpErrorNotFound;
1236                 break;
1237             case 405:
1238                 request->status = S3StatusErrorMethodNotAllowed;
1239                 break;
1240             case 409:
1241                 request->status = S3StatusHttpErrorConflict;
1242                 break;
1243             case 411:
1244                 request->status = S3StatusErrorMissingContentLength;
1245                 break;
1246             case 412:
1247                 request->status = S3StatusErrorPreconditionFailed;
1248                 break;
1249             case 416:
1250                 request->status = S3StatusErrorInvalidRange;
1251                 break;
1252             case 500:
1253                 request->status = S3StatusErrorInternalError;
1254                 break;
1255             case 501:
1256                 request->status = S3StatusErrorNotImplemented;
1257                 break;
1258             case 503:
1259                 request->status = S3StatusErrorSlowDown;
1260                 break;
1261             default:
1262                 request->status = S3StatusHttpErrorUnknown;
1263                 break;
1264             }
1265         }
1266     }
1267
1268     (*(request->completeCallback))
1269         (request->status, &(request->errorParser.s3ErrorDetails),
1270          request->callbackData);
1271
1272     request_release(request);
1273 }
1274
1275
1276 S3Status request_curl_code_to_status(CURLcode code)
1277 {
1278     switch (code) {
1279     case CURLE_OUT_OF_MEMORY:
1280         return S3StatusOutOfMemory;
1281     case CURLE_COULDNT_RESOLVE_PROXY:
1282     case CURLE_COULDNT_RESOLVE_HOST:
1283         return S3StatusNameLookupError;
1284     case CURLE_COULDNT_CONNECT:
1285         return S3StatusFailedToConnect;
1286     case CURLE_WRITE_ERROR:
1287     case CURLE_OPERATION_TIMEDOUT:
1288         return S3StatusConnectionFailed;
1289     case CURLE_PARTIAL_FILE:
1290         return S3StatusOK;
1291     case CURLE_SSL_CACERT:
1292         return S3StatusServerFailedVerification;
1293     default:
1294         return S3StatusInternalError;
1295     }
1296 }
1297
1298
1299 S3Status S3_generate_authenticated_query_string
1300     (char *buffer, const S3BucketContext *bucketContext,
1301      const char *key, int64_t expires, const char *resource)
1302 {
1303 #define MAX_EXPIRES (((int64_t) 1 << 31) - 1)
1304     // S3 seems to only accept expiration dates up to the number of seconds
1305     // representably by a signed 32-bit integer
1306     if (expires < 0) {
1307         expires = MAX_EXPIRES;
1308     }
1309     else if (expires > MAX_EXPIRES) {
1310         expires = MAX_EXPIRES;
1311     }
1312
1313     // xxx todo: rework this so that it can be incorporated into shared code
1314     // with request_perform().  It's really unfortunate that this code is not
1315     // shared with request_perform().
1316
1317     // URL encode the key
1318     char urlEncodedKey[S3_MAX_KEY_SIZE * 3];
1319     if (key) {
1320         urlEncode(urlEncodedKey, key, strlen(key));
1321     }
1322     else {
1323         urlEncodedKey[0] = 0;
1324     }
1325
1326     // Compute canonicalized resource
1327     char canonicalizedResource[MAX_CANONICALIZED_RESOURCE_SIZE];
1328     canonicalize_resource(bucketContext->bucketName, resource, urlEncodedKey,
1329                           canonicalizedResource);
1330                           
1331     // We allow for:
1332     // 17 bytes for HTTP-Verb + \n
1333     // 1 byte for empty Content-MD5 + \n
1334     // 1 byte for empty Content-Type + \n
1335     // 20 bytes for Expires + \n
1336     // 0 bytes for CanonicalizedAmzHeaders
1337     // CanonicalizedResource
1338     char signbuf[17 + 1 + 1 + 1 + 20 + sizeof(canonicalizedResource) + 1];
1339     int len = 0;
1340
1341 #define signbuf_append(format, ...)                             \
1342     len += snprintf(&(signbuf[len]), sizeof(signbuf) - len,     \
1343                     format, __VA_ARGS__)
1344
1345     signbuf_append("%s\n", "GET"); // HTTP-Verb
1346     signbuf_append("%s\n", ""); // Content-MD5
1347     signbuf_append("%s\n", ""); // Content-Type
1348     signbuf_append("%llu\n", (unsigned long long) expires);
1349     signbuf_append("%s", canonicalizedResource);
1350
1351     // Generate an HMAC-SHA-1 of the signbuf
1352     unsigned char hmac[20];
1353
1354     HMAC_SHA1(hmac, (unsigned char *) bucketContext->secretAccessKey,
1355               strlen(bucketContext->secretAccessKey),
1356               (unsigned char *) signbuf, len);
1357
1358     // Now base-64 encode the results
1359     char b64[((20 + 1) * 4) / 3];
1360     int b64Len = base64Encode(hmac, 20, b64);
1361
1362     // Now urlEncode that
1363     char signature[sizeof(b64) * 3];
1364     urlEncode(signature, b64, b64Len);
1365
1366     // Finally, compose the uri, with params:
1367     // ?AWSAccessKeyId=xxx[&Expires=]&Signature=xxx
1368     char queryParams[sizeof("AWSAccessKeyId=") + 20 + 
1369                      sizeof("&Expires=") + 20 + 
1370                      sizeof("&Signature=") + sizeof(signature) + 1];
1371
1372     sprintf(queryParams, "AWSAccessKeyId=%s&Expires=%ld&Signature=%s",
1373             bucketContext->accessKeyId, (long) expires, signature);
1374
1375     return compose_uri(buffer, S3_MAX_AUTHENTICATED_QUERY_STRING_SIZE,
1376                        bucketContext, urlEncodedKey, resource, queryParams);
1377 }