From b7bdb07161d10827af011cab9852d97959501090 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Sat, 30 May 2020 15:21:48 +0100 Subject: [PATCH 1/4] online::clone: test a googlesource URL Google Git (googlesource.com) behaves differently than git proper. Test that we can communicate with it. --- tests/online/clone.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/online/clone.c b/tests/online/clone.c index 034d0c2e8..9107956bd 100644 --- a/tests/online/clone.c +++ b/tests/online/clone.c @@ -11,6 +11,7 @@ #define BB_REPO_URL "https://libgit3@bitbucket.org/libgit2/testgitrepository.git" #define BB_REPO_URL_WITH_PASS "https://libgit3:libgit3@bitbucket.org/libgit2/testgitrepository.git" #define BB_REPO_URL_WITH_WRONG_PASS "https://libgit3:wrong@bitbucket.org/libgit2/testgitrepository.git" +#define GOOGLESOURCE_REPO_URL "https://chromium.googlesource.com/external/github.com/sergi/go-diff" #define SSH_REPO_URL "ssh://github.com/libgit2/TestGitRepository" @@ -463,6 +464,13 @@ void test_online_clone__bitbucket_falls_back_to_specified_creds(void) cl_fixture_cleanup("./foo"); } +void test_online_clone__googlesource(void) +{ + cl_git_pass(git_clone(&g_repo, GOOGLESOURCE_REPO_URL, "./foo", &g_options)); + git_repository_free(g_repo); g_repo = NULL; + cl_fixture_cleanup("./foo"); +} + static int cancel_at_half(const git_indexer_progress *stats, void *payload) { GIT_UNUSED(payload); From 570f0340f06f49b06ff536463cec8a2e2411b948 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 1 Jun 2020 19:10:38 +0100 Subject: [PATCH 2/4] httpclient: read_body should return 0 at EOF When users call `git_http_client_read_body`, it should return 0 at the end of a message. When the `on_message_complete` callback is called, this will set `client->state` to `DONE`. In our read loop, we look for this condition and exit. Without this, when there is no data left except the end of message chunk (`0\r\n`) in the http stream, we would block by reading the three bytes off the stream but not making progress in any `on_body` callbacks. Listening to the `on_message_complete` callback allows us to stop trying to read from the socket when we've read the end of message chunk. --- src/transports/httpclient.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/transports/httpclient.c b/src/transports/httpclient.c index bde67ca9f..af90129df 100644 --- a/src/transports/httpclient.c +++ b/src/transports/httpclient.c @@ -1419,15 +1419,20 @@ int git_http_client_read_body( client->parser.data = &parser_context; /* - * Clients expect to get a non-zero amount of data from us. - * With a sufficiently small buffer, one might only read a chunk - * length. Loop until we actually have data to return. + * Clients expect to get a non-zero amount of data from us, + * so we either block until we have data to return, until we + * hit EOF or there's an error. Do this in a loop, since we + * may end up reading only some stream metadata (like chunk + * information). */ while (!parser_context.output_written) { error = client_read_and_parse(client); if (error <= 0) goto done; + + if (client->state == DONE) + break; } assert(parser_context.output_written <= INT_MAX); From aa8b2c0f6068454d30e9b0f20f424311337a37f1 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 1 Jun 2020 23:53:55 +0100 Subject: [PATCH 3/4] httpclient: don't read more than the client wants When `git_http_client_read_body` is invoked, it provides the size of the buffer that can be read into. This will be set as the parser context's `output_size` member. Use this as an upper limit on our reads, and ensure that we do not read more than the client requests. --- src/transports/httpclient.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/transports/httpclient.c b/src/transports/httpclient.c index af90129df..72a65f00f 100644 --- a/src/transports/httpclient.c +++ b/src/transports/httpclient.c @@ -1038,6 +1038,7 @@ on_error: GIT_INLINE(int) client_read(git_http_client *client) { + http_parser_context *parser_context = client->parser.data; git_stream *stream; char *buf = client->read_buf.ptr + client->read_buf.size; size_t max_len; @@ -1054,6 +1055,9 @@ GIT_INLINE(int) client_read(git_http_client *client) max_len = client->read_buf.asize - client->read_buf.size; max_len = min(max_len, INT_MAX); + if (parser_context->output_size) + max_len = min(max_len, parser_context->output_size); + if (max_len == 0) { git_error_set(GIT_ERROR_HTTP, "no room in output buffer"); return -1; From 04c7bdb42e4e8b374d5880358c7911236795fbcb Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 1 Jun 2020 22:44:14 +0100 Subject: [PATCH 4/4] httpclient: clear the read_buf on new requests The httpclient implementation keeps a `read_buf` that holds the data in the body of the response after the headers have been written. We store that data for subsequent calls to `git_http_client_read_body`. If we want to stop reading body data and send another request, we need to clear that cached data. Clear the cached body data on new requests, just like we read any outstanding data from the socket. --- src/transports/httpclient.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/transports/httpclient.c b/src/transports/httpclient.c index 72a65f00f..010baa604 100644 --- a/src/transports/httpclient.c +++ b/src/transports/httpclient.c @@ -1195,7 +1195,7 @@ static void complete_response_body(git_http_client *client) /* If we're not keeping alive, don't bother. */ if (!client->keepalive) { client->connected = 0; - return; + goto done; } parser_context.client = client; @@ -1209,6 +1209,9 @@ static void complete_response_body(git_http_client *client) git_error_clear(); client->connected = 0; } + +done: + git_buf_clear(&client->read_buf); } int git_http_client_send_request(