summaryrefslogtreecommitdiff
path: root/src/httpfetch.cpp
diff options
context:
space:
mode:
authorKahrl <kahrl@gmx.net>2013-08-29 05:04:56 +0200
committerKahrl <kahrl@gmx.net>2013-12-13 18:05:10 +0100
commit0ea3e6dbe2288854d9d4a971fc6539c2e740a95a (patch)
tree2bd1e8485e0fe14cc69fefb8cae66a76f1086aee /src/httpfetch.cpp
parent67bf7130ce3f1780a432ae03eabdec3118ecab70 (diff)
downloadminetest-0ea3e6dbe2288854d9d4a971fc6539c2e740a95a.tar.gz
minetest-0ea3e6dbe2288854d9d4a971fc6539c2e740a95a.tar.bz2
minetest-0ea3e6dbe2288854d9d4a971fc6539c2e740a95a.zip
Implement httpfetch module and initialize it from main()
Add curl_parallel_limit setting that will replace media_fetch_threads in a later commit. Fix a typo in MutexedQueue::pop_back() that made it impossible to compile code that used this function. (Noticed this while implementing httpfetch.)
Diffstat (limited to 'src/httpfetch.cpp')
-rw-r--r--src/httpfetch.cpp718
1 files changed, 718 insertions, 0 deletions
diff --git a/src/httpfetch.cpp b/src/httpfetch.cpp
new file mode 100644
index 000000000..4342a8b2a
--- /dev/null
+++ b/src/httpfetch.cpp
@@ -0,0 +1,718 @@
+/*
+Minetest
+Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#include "httpfetch.h"
+#include <iostream>
+#include <sstream>
+#include <list>
+#include <map>
+#include <errno.h>
+#include "jthread/jevent.h"
+#include "config.h"
+#include "exceptions.h"
+#include "debug.h"
+#include "log.h"
+#include "util/container.h"
+#include "util/thread.h"
+#include "socket.h" // for select()
+
+JMutex g_httpfetch_mutex;
+std::map<unsigned long, std::list<HTTPFetchResult> > g_httpfetch_results;
+
+static void httpfetch_deliver_result(const HTTPFetchResult &fetchresult)
+{
+ unsigned long caller = fetchresult.caller;
+ if (caller != HTTPFETCH_DISCARD) {
+ JMutexAutoLock lock(g_httpfetch_mutex);
+ g_httpfetch_results[caller].push_back(fetchresult);
+ }
+}
+
+static void httpfetch_request_clear(unsigned long caller);
+
+unsigned long httpfetch_caller_alloc()
+{
+ JMutexAutoLock lock(g_httpfetch_mutex);
+
+ // Check each caller ID except HTTPFETCH_DISCARD
+ const unsigned long discard = HTTPFETCH_DISCARD;
+ for (unsigned long caller = discard + 1; caller != discard; ++caller) {
+ std::map<unsigned long, std::list<HTTPFetchResult> >::iterator
+ it = g_httpfetch_results.find(caller);
+ if (it == g_httpfetch_results.end()) {
+ verbosestream<<"httpfetch_caller_alloc: allocating "
+ <<caller<<std::endl;
+ // Access element to create it
+ g_httpfetch_results[caller];
+ return caller;
+ }
+ }
+
+ assert("httpfetch_caller_alloc: ran out of caller IDs" == 0);
+ return discard;
+}
+
+void httpfetch_caller_free(unsigned long caller)
+{
+ verbosestream<<"httpfetch_caller_free: freeing "
+ <<caller<<std::endl;
+
+ httpfetch_request_clear(caller);
+ if (caller != HTTPFETCH_DISCARD) {
+ JMutexAutoLock lock(g_httpfetch_mutex);
+ g_httpfetch_results.erase(caller);
+ }
+}
+
+bool httpfetch_async_get(unsigned long caller, HTTPFetchResult &fetchresult)
+{
+ JMutexAutoLock lock(g_httpfetch_mutex);
+
+ // Check that caller exists
+ std::map<unsigned long, std::list<HTTPFetchResult> >::iterator
+ it = g_httpfetch_results.find(caller);
+ if (it == g_httpfetch_results.end())
+ return false;
+
+ // Check that result queue is nonempty
+ std::list<HTTPFetchResult> &callerresults = it->second;
+ if (callerresults.empty())
+ return false;
+
+ // Pop first result
+ fetchresult = callerresults.front();
+ callerresults.pop_front();
+ return true;
+}
+
+#if USE_CURL
+#include <curl/curl.h>
+
+/*
+ USE_CURL is on: use cURL based httpfetch implementation
+*/
+
+static size_t httpfetch_writefunction(
+ char *ptr, size_t size, size_t nmemb, void *userdata)
+{
+ std::ostringstream *stream = (std::ostringstream*)userdata;
+ size_t count = size * nmemb;
+ stream->write(ptr, count);
+ return count;
+}
+
+static size_t httpfetch_discardfunction(
+ char *ptr, size_t size, size_t nmemb, void *userdata)
+{
+ return size * nmemb;
+}
+
+class CurlHandlePool
+{
+ std::list<CURL*> handles;
+
+public:
+ CurlHandlePool() {}
+ ~CurlHandlePool()
+ {
+ for (std::list<CURL*>::iterator it = handles.begin();
+ it != handles.end(); ++it) {
+ curl_easy_cleanup(*it);
+ }
+ }
+ CURL * alloc()
+ {
+ CURL *curl;
+ if (handles.empty()) {
+ curl = curl_easy_init();
+ if (curl == NULL) {
+ errorstream<<"curl_easy_init returned NULL"<<std::endl;
+ }
+ }
+ else {
+ curl = handles.front();
+ handles.pop_front();
+ }
+ return curl;
+ }
+ void free(CURL *handle)
+ {
+ if (handle)
+ handles.push_back(handle);
+ }
+};
+
+struct HTTPFetchOngoing
+{
+ CurlHandlePool *pool;
+ CURL *curl;
+ CURLM *multi;
+ HTTPFetchRequest request;
+ HTTPFetchResult result;
+ std::ostringstream oss;
+ char *post_fields;
+ struct curl_slist *httpheader;
+
+ HTTPFetchOngoing(HTTPFetchRequest request_, CurlHandlePool *pool_):
+ pool(pool_),
+ curl(NULL),
+ multi(NULL),
+ request(request_),
+ result(request_),
+ oss(std::ios::binary),
+ httpheader(NULL)
+ {
+ curl = pool->alloc();
+ if (curl != NULL) {
+ // Set static cURL options
+ curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
+ curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
+ curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
+ curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 1);
+
+#if LIBCURL_VERSION_NUM >= 0x071304
+ // Restrict protocols so that curl vulnerabilities in
+ // other protocols don't affect us.
+ // These settings were introduced in curl 7.19.4.
+ long protocols =
+ CURLPROTO_HTTP |
+ CURLPROTO_HTTPS |
+ CURLPROTO_FTP |
+ CURLPROTO_FTPS;
+ curl_easy_setopt(curl, CURLOPT_PROTOCOLS, protocols);
+ curl_easy_setopt(curl, CURLOPT_REDIR_PROTOCOLS, protocols);
+#endif
+
+ // Set cURL options based on HTTPFetchRequest
+ curl_easy_setopt(curl, CURLOPT_URL,
+ request.url.c_str());
+ curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS,
+ request.timeout);
+ curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT_MS,
+ request.connect_timeout);
+ // Set up a write callback that writes to the
+ // ostringstream ongoing->oss, unless the data
+ // is to be discarded
+ if (request.caller == HTTPFETCH_DISCARD) {
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
+ httpfetch_discardfunction);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
+ }
+ else {
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
+ httpfetch_writefunction);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, &oss);
+ }
+ // Set POST (or GET) data
+ if (request.post_fields.empty()) {
+ curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
+ }
+ else {
+ curl_easy_setopt(curl, CURLOPT_POST, 1);
+ curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE,
+ request.post_fields.size());
+ curl_easy_setopt(curl, CURLOPT_POSTFIELDS,
+ request.post_fields.c_str());
+ // request.post_fields must now *never* be
+ // modified until CURLOPT_POSTFIELDS is cleared
+ }
+ // Set additional HTTP headers
+ for (size_t i = 0; i < request.extra_headers.size(); ++i) {
+ httpheader = curl_slist_append(
+ httpheader,
+ request.extra_headers[i].c_str());
+ }
+ curl_easy_setopt(curl, CURLOPT_HTTPHEADER, httpheader);
+ }
+ }
+
+ CURLcode start(CURLM *multi_)
+ {
+ if (curl == NULL)
+ return CURLE_FAILED_INIT;
+
+ if (multi_) {
+ // Multi interface (async)
+ CURLMcode mres = curl_multi_add_handle(multi_, curl);
+ if (mres != CURLM_OK) {
+ errorstream<<"curl_multi_add_handle"
+ <<" returned error code "<<mres
+ <<std::endl;
+ return CURLE_FAILED_INIT;
+ }
+ multi = multi_; // store for curl_multi_remove_handle
+ return CURLE_OK;
+ }
+ else {
+ // Easy interface (sync)
+ return curl_easy_perform(curl);
+ }
+ }
+
+ void complete(CURLcode res)
+ {
+ result.succeeded = (res == CURLE_OK);
+ result.timeout = (res == CURLE_OPERATION_TIMEDOUT);
+ result.data = oss.str();
+
+ // Get HTTP/FTP response code
+ result.response_code = 0;
+ if (curl != NULL) {
+ if (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE,
+ &result.response_code) != CURLE_OK) {
+ result.response_code = 0;
+ }
+ }
+
+ if (res != CURLE_OK) {
+ infostream<<request.url<<" not found ("
+ <<curl_easy_strerror(res)<<")"
+ <<" (response code "<<result.response_code<<")"
+ <<std::endl;
+ }
+ }
+
+ ~HTTPFetchOngoing()
+ {
+ if (multi != NULL) {
+ CURLMcode mres = curl_multi_remove_handle(multi, curl);
+ if (mres != CURLM_OK) {
+ errorstream<<"curl_multi_remove_handle"
+ <<" returned error code "<<mres
+ <<std::endl;
+ }
+ }
+
+ // Set safe options for the reusable cURL handle
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
+ httpfetch_discardfunction);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
+ curl_easy_setopt(curl, CURLOPT_POSTFIELDS, NULL);
+ if (httpheader != NULL) {
+ curl_easy_setopt(curl, CURLOPT_HTTPHEADER, NULL);
+ curl_slist_free_all(httpheader);
+ }
+
+ // Store the cURL handle for reuse
+ pool->free(curl);
+ }
+};
+
+class CurlFetchThread : public SimpleThread
+{
+protected:
+ enum RequestType {
+ RT_FETCH,
+ RT_CLEAR,
+ RT_WAKEUP,
+ };
+
+ struct Request {
+ RequestType type;
+ HTTPFetchRequest fetchrequest;
+ Event *event;
+ };
+
+ CURLM *m_multi;
+ MutexedQueue<Request> m_requests;
+ size_t m_parallel_limit;
+
+ // Variables exclusively used within thread
+ std::vector<HTTPFetchOngoing*> m_all_ongoing;
+ std::list<HTTPFetchRequest> m_queued_fetches;
+
+public:
+ CurlFetchThread(int parallel_limit)
+ {
+ if (parallel_limit >= 1)
+ m_parallel_limit = parallel_limit;
+ else
+ m_parallel_limit = 1;
+ }
+
+ void requestFetch(const HTTPFetchRequest &fetchrequest)
+ {
+ Request req;
+ req.type = RT_FETCH;
+ req.fetchrequest = fetchrequest;
+ req.event = NULL;
+ m_requests.push_back(req);
+ }
+
+ void requestClear(unsigned long caller, Event *event)
+ {
+ Request req;
+ req.type = RT_CLEAR;
+ req.fetchrequest.caller = caller;
+ req.event = event;
+ m_requests.push_back(req);
+ }
+
+ void requestWakeUp()
+ {
+ Request req;
+ req.type = RT_WAKEUP;
+ req.event = NULL;
+ m_requests.push_back(req);
+ }
+
+protected:
+ // Handle a request from some other thread
+ // E.g. new fetch; clear fetches for one caller; wake up
+ void processRequest(const Request &req)
+ {
+ if (req.type == RT_FETCH) {
+ // New fetch, queue until there are less
+ // than m_parallel_limit ongoing fetches
+ m_queued_fetches.push_back(req.fetchrequest);
+
+ // see processQueued() for what happens next
+
+ }
+ else if (req.type == RT_CLEAR) {
+ unsigned long caller = req.fetchrequest.caller;
+
+ // Abort all ongoing fetches for the caller
+ for (std::vector<HTTPFetchOngoing*>::iterator
+ it = m_all_ongoing.begin();
+ it != m_all_ongoing.end();) {
+ if ((*it)->request.caller == caller) {
+ delete (*it);
+ it = m_all_ongoing.erase(it);
+ }
+ else
+ ++it;
+ }
+
+ // Also abort all queued fetches for the caller
+ for (std::list<HTTPFetchRequest>::iterator
+ it = m_queued_fetches.begin();
+ it != m_queued_fetches.end();) {
+ if ((*it).caller == caller)
+ it = m_queued_fetches.erase(it);
+ else
+ ++it;
+ }
+ }
+ else if (req.type == RT_WAKEUP) {
+ // Wakeup: Nothing to do, thread is awake at this point
+ }
+
+ if (req.event != NULL)
+ req.event->signal();
+ }
+
+ // Start new ongoing fetches if m_parallel_limit allows
+ void processQueued(CurlHandlePool *pool)
+ {
+ while (m_all_ongoing.size() < m_parallel_limit &&
+ !m_queued_fetches.empty()) {
+ HTTPFetchRequest request = m_queued_fetches.front();
+ m_queued_fetches.pop_front();
+
+ // Create ongoing fetch data and make a cURL handle
+ // Set cURL options based on HTTPFetchRequest
+ HTTPFetchOngoing *ongoing =
+ new HTTPFetchOngoing(request, pool);
+
+ // Initiate the connection (curl_multi_add_handle)
+ CURLcode res = ongoing->start(m_multi);
+ if (res == CURLE_OK) {
+ m_all_ongoing.push_back(ongoing);
+ }
+ else {
+ ongoing->complete(res);
+ httpfetch_deliver_result(ongoing->result);
+ delete ongoing;
+ }
+ }
+ }
+
+ // Process CURLMsg (indicates completion of a fetch)
+ void processCurlMessage(CURLMsg *msg)
+ {
+ // Determine which ongoing fetch the message pertains to
+ size_t i = 0;
+ bool found = false;
+ for (i = 0; i < m_all_ongoing.size(); ++i) {
+ if (m_all_ongoing[i]->curl == msg->easy_handle) {
+ found = true;
+ break;
+ }
+ }
+ if (msg->msg == CURLMSG_DONE && found) {
+ // m_all_ongoing[i] succeeded or failed.
+ HTTPFetchOngoing *ongoing = m_all_ongoing[i];
+ ongoing->complete(msg->data.result);
+ httpfetch_deliver_result(ongoing->result);
+ delete ongoing;
+ m_all_ongoing.erase(m_all_ongoing.begin() + i);
+ }
+ }
+
+ // Wait for a request from another thread, or timeout elapses
+ void waitForRequest(long timeout)
+ {
+ if (m_queued_fetches.empty()) {
+ try {
+ Request req = m_requests.pop_front(timeout);
+ processRequest(req);
+ }
+ catch (ItemNotFoundException &e) {}
+ }
+ }
+
+ // Wait until some IO happens, or timeout elapses
+ void waitForIO(long timeout)
+ {
+ fd_set read_fd_set;
+ fd_set write_fd_set;
+ fd_set exc_fd_set;
+ int max_fd;
+ long select_timeout = -1;
+ struct timeval select_tv;
+ CURLMcode mres;
+
+ FD_ZERO(&read_fd_set);
+ FD_ZERO(&write_fd_set);
+ FD_ZERO(&exc_fd_set);
+
+ mres = curl_multi_fdset(m_multi, &read_fd_set,
+ &write_fd_set, &exc_fd_set, &max_fd);
+ if (mres != CURLM_OK) {
+ errorstream<<"curl_multi_fdset"
+ <<" returned error code "<<mres
+ <<std::endl;
+ select_timeout = 0;
+ }
+
+ mres = curl_multi_timeout(m_multi, &select_timeout);
+ if (mres != CURLM_OK) {
+ errorstream<<"curl_multi_timeout"
+ <<" returned error code "<<mres
+ <<std::endl;
+ select_timeout = 0;
+ }
+
+ // Limit timeout so new requests get through
+ if (select_timeout < 0 || select_timeout > timeout)
+ select_timeout = timeout;
+
+ if (select_timeout > 0) {
+ select_tv.tv_sec = select_timeout / 1000;
+ select_tv.tv_usec = (select_timeout % 1000) * 1000;
+ int retval = select(max_fd + 1, &read_fd_set,
+ &write_fd_set, &exc_fd_set,
+ &select_tv);
+ if (retval == -1) {
+ #ifdef _WIN32
+ errorstream<<"select returned error code "
+ <<WSAGetLastError()<<std::endl;
+ #else
+ errorstream<<"select returned error code "
+ <<errno<<std::endl;
+ #endif
+ }
+ }
+ }
+
+ void * Thread()
+ {
+ ThreadStarted();
+ log_register_thread("CurlFetchThread");
+ DSTACK(__FUNCTION_NAME);
+
+ CurlHandlePool pool;
+
+ m_multi = curl_multi_init();
+ if (m_multi == NULL) {
+ errorstream<<"curl_multi_init returned NULL\n";
+ return NULL;
+ }
+
+ assert(m_all_ongoing.empty());
+
+ while (getRun()) {
+ BEGIN_DEBUG_EXCEPTION_HANDLER
+
+ /*
+ Handle new async requests
+ */
+
+ while (!m_requests.empty()) {
+ Request req = m_requests.pop_front();
+ processRequest(req);
+ }
+ processQueued(&pool);
+
+ /*
+ Handle ongoing async requests
+ */
+
+ int still_ongoing = 0;
+ while (curl_multi_perform(m_multi, &still_ongoing) ==
+ CURLM_CALL_MULTI_PERFORM)
+ /* noop */;
+
+ /*
+ Handle completed async requests
+ */
+ if (still_ongoing < (int) m_all_ongoing.size()) {
+ CURLMsg *msg;
+ int msgs_in_queue;
+ msg = curl_multi_info_read(m_multi, &msgs_in_queue);
+ while (msg != NULL) {
+ processCurlMessage(msg);
+ msg = curl_multi_info_read(m_multi, &msgs_in_queue);
+ }
+ }
+
+ /*
+ If there are ongoing requests, wait for data
+ (with a timeout of 100ms so that new requests
+ can be processed).
+
+ If no ongoing requests, wait for a new request.
+ (Possibly an empty request that signals
+ that the thread should be stopped.)
+ */
+ if (m_all_ongoing.empty())
+ waitForRequest(100000000);
+ else
+ waitForIO(100);
+
+ END_DEBUG_EXCEPTION_HANDLER(errorstream)
+ }
+
+ // Call curl_multi_remove_handle and cleanup easy handles
+ for (size_t i = 0; i < m_all_ongoing.size(); ++i) {
+ delete m_all_ongoing[i];
+ }
+ m_all_ongoing.clear();
+
+ m_queued_fetches.clear();
+
+ CURLMcode mres = curl_multi_cleanup(m_multi);
+ if (mres != CURLM_OK) {
+ errorstream<<"curl_multi_cleanup"
+ <<" returned error code "<<mres
+ <<std::endl;
+ }
+
+ return NULL;
+ }
+};
+
+CurlFetchThread *g_httpfetch_thread = NULL;
+
+void httpfetch_init(int parallel_limit)
+{
+ verbosestream<<"httpfetch_init: parallel_limit="<<parallel_limit
+ <<std::endl;
+
+ CURLcode res = curl_global_init(CURL_GLOBAL_DEFAULT);
+ assert(res == CURLE_OK);
+
+ g_httpfetch_thread = new CurlFetchThread(parallel_limit);
+}
+
+void httpfetch_cleanup()
+{
+ verbosestream<<"httpfetch_cleanup: cleaning up"<<std::endl;
+
+ g_httpfetch_thread->setRun(false);
+ g_httpfetch_thread->requestWakeUp();
+ g_httpfetch_thread->stop();
+ delete g_httpfetch_thread;
+
+ curl_global_cleanup();
+}
+
+void httpfetch_async(const HTTPFetchRequest &fetchrequest)
+{
+ g_httpfetch_thread->requestFetch(fetchrequest);
+ if (!g_httpfetch_thread->IsRunning())
+ g_httpfetch_thread->Start();
+}
+
+static void httpfetch_request_clear(unsigned long caller)
+{
+ if (g_httpfetch_thread->IsRunning()) {
+ Event event;
+ g_httpfetch_thread->requestClear(caller, &event);
+ event.wait();
+ }
+ else {
+ g_httpfetch_thread->requestClear(caller, NULL);
+ }
+}
+
+void httpfetch_sync(const HTTPFetchRequest &fetchrequest,
+ HTTPFetchResult &fetchresult)
+{
+ // Create ongoing fetch data and make a cURL handle
+ // Set cURL options based on HTTPFetchRequest
+ CurlHandlePool pool;
+ HTTPFetchOngoing ongoing(fetchrequest, &pool);
+ // Do the fetch (curl_easy_perform)
+ CURLcode res = ongoing.start(NULL);
+ // Update fetchresult
+ ongoing.complete(res);
+ fetchresult = ongoing.result;
+}
+
+#else // USE_CURL
+
+/*
+ USE_CURL is off:
+
+ Dummy httpfetch implementation that always returns an error.
+*/
+
+void httpfetch_init(int parallel_limit)
+{
+}
+
+void httpfetch_cleanup()
+{
+}
+
+void httpfetch_async(const HTTPFetchRequest &fetchrequest)
+{
+ errorstream<<"httpfetch_async: unable to fetch "<<fetchrequest.url
+ <<" because USE_CURL=0"<<std::endl;
+
+ HTTPFetchResult fetchresult(fetchrequest); // sets succeeded = false etc.
+ httpfetch_deliver_result(fetchresult);
+}
+
+static void httpfetch_request_clear(unsigned long caller)
+{
+}
+
+void httpfetch_sync(const HTTPFetchRequest &fetchrequest,
+ HTTPFetchResult &fetchresult)
+{
+ errorstream<<"httpfetch_sync: unable to fetch "<<fetchrequest.url
+ <<" because USE_CURL=0"<<std::endl;
+
+ fetchresult = HTTPFetchResult(fetchrequest); // sets succeeded = false etc.
+}
+
+#endif // USE_CURL