35 #include <sys/types.h>
63 HTTPCache *HTTPCache::_instance = 0;
72 static pthread_mutex_t instance_mutex;
73 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
76 #define NO_LM_EXPIRATION 24*3600 // 24 hours
78 #define DUMP_FREQUENCY 10 // Dump index every x loads
80 #define MEGA 0x100000L
81 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
82 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
83 #define CACHE_GC_PCT 10 // 10% of cache size free after GC
84 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
85 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
91 status =
INIT(&instance_mutex);
94 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
126 HTTPCache::instance(
const string &cache_root,
bool force)
128 int status = pthread_once(&once_block, once_init_routine);
130 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
132 LOCK(&instance_mutex);
134 DBG(cerr <<
"Entering instance(); (" << hex << _instance << dec <<
")"
139 _instance =
new HTTPCache(cache_root, force);
141 DBG(cerr <<
"New instance: " << _instance <<
", cache root: "
142 << _instance->d_cache_root << endl);
144 atexit(delete_instance);
155 EventHandler *old_eh = SignalHandler::instance()->register_handler
158 SignalHandler::instance()->register_handler(SIGINT, old_eh);
160 "Could not register event handler for SIGINT without superseding an existing one.");
163 old_eh = SignalHandler::instance()->register_handler
166 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
168 "Could not register event handler for SIGPIPE without superseding an existing one.");
171 old_eh = SignalHandler::instance()->register_handler
174 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
176 "Could not register event handler for SIGTERM without superseding an existing one.");
182 DBG2(cerr <<
"The constructor threw an Error!" << endl);
188 DBGN(cerr <<
"returning " << hex << _instance << dec << endl);
197 HTTPCache::delete_instance()
199 DBG(cerr <<
"Entering delete_instance()..." << endl);
200 if (HTTPCache::_instance) {
201 DBG(cerr <<
"Deleting the cache: " << HTTPCache::_instance << endl);
202 delete HTTPCache::_instance;
203 HTTPCache::_instance = 0;
206 DBG(cerr <<
"Exiting delete_instance()" << endl);
223 HTTPCache::HTTPCache(
string cache_root,
bool force) :
224 d_locked_open_file(0),
225 d_cache_enabled(false),
226 d_cache_protected(false),
227 d_expire_ignored(false),
228 d_always_validate(false),
237 d_http_cache_table(0)
239 DBG(cerr <<
"Entering the constructor for " <<
this <<
"... ");
241 int status = pthread_once(&once_block, once_init_routine);
243 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
245 INIT(&d_cache_mutex);
257 set_cache_root(cache_root);
260 if (!get_single_user_lock(force))
261 throw Error(
"Could not get single user lock for the cache");
271 if (stat(cache_root.c_str(), &s) == 0)
272 block_size = s.st_blksize;
274 throw Error(
"Could not set file system block size.");
276 d_http_cache_table =
new HTTPCacheTable(d_cache_root, block_size);
277 d_cache_enabled =
true;
279 DBGN(cerr <<
"exiting" << endl);
296 DBG(cerr <<
"Entering the destructor for " <<
this <<
"... ");
300 perform_garbage_collection();
312 delete d_http_cache_table;
314 release_single_user_lock();
316 DBGN(cerr <<
"exiting destructor." << endl);
329 HTTPCache::stopGC()
const
331 return (d_http_cache_table->
get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
341 HTTPCache::startGC()
const
344 return (d_http_cache_table->
get_current_size() + d_folder_size > d_total_size);
362 HTTPCache::perform_garbage_collection()
364 DBG(cerr <<
"Performing garbage collection" << endl);
383 HTTPCache::expired_gc()
385 if (!d_expire_ignored) {
423 void HTTPCache::too_big_gc() {
440 bool HTTPCache::get_single_user_lock(
bool force)
442 if (!d_locked_open_file) {
448 create_cache_root(d_cache_root);
454 DBG(cerr <<
"Failure to create the cache root" << endl);
460 if ((fp = fopen(lock.c_str(),
"r")) != NULL) {
461 int res = fclose(fp);
463 DBG(cerr <<
"Failed to close " << (
void *)fp << endl);
471 if ((fp = fopen(lock.c_str(),
"w")) == NULL) {
472 DBG(cerr <<
"Could not open for write access" << endl);
476 d_locked_open_file = fp;
480 cerr <<
"locked_open_file is true" << endl;
487 HTTPCache::release_single_user_lock()
489 if (d_locked_open_file) {
490 int res = fclose(d_locked_open_file);
492 DBG(cerr <<
"Failed to close " << (
void *)d_locked_open_file << endl) ;
494 d_locked_open_file = 0;
523 HTTPCache::create_cache_root(
const string &cache_root)
525 struct stat stat_info;
526 string::size_type cur = 0;
529 cur = cache_root[1] ==
':' ? 3 : 1;
535 string dir = cache_root.substr(0, cur);
536 if (stat(dir.c_str(), &stat_info) == -1) {
537 DBG2(cerr <<
"Cache....... Creating " << dir << endl);
538 mode_t mask =
UMASK(0);
539 if (
MKDIR(dir.c_str(), 0777) < 0) {
540 DBG2(cerr <<
"Error: can't create." << endl);
542 throw Error(
string(
"Could not create the directory for the cache. Failed when building path at ") + dir +
string(
"."));
547 DBG2(cerr <<
"Cache....... Found " << dir << endl);
568 HTTPCache::set_cache_root(
const string &root)
580 char * cr = (
char *) getenv(
"DODS_CACHE");
581 if (!cr) cr = (
char *) getenv(
"TMP");
582 if (!cr) cr = (
char *) getenv(
"TEMP");
598 if (d_http_cache_table)
618 d_cache_enabled = mode;
628 DBG2(cerr <<
"In HTTPCache::is_cache_enabled: (" << d_cache_enabled <<
")"
630 return d_cache_enabled;
648 d_cache_disconnected = mode;
658 return d_cache_disconnected;
674 d_expire_ignored = mode;
685 return d_expire_ignored;
711 (size > ULONG_MAX ? ULONG_MAX : size *
MEGA);
712 unsigned long old_size = d_total_size;
713 d_total_size = new_size;
717 if (new_size < old_size && startGC()) {
718 perform_garbage_collection();
724 DBGN(cerr <<
"Unlocking interface." << endl);
728 DBG2(cerr <<
"Cache....... Total cache size: " << d_total_size
729 <<
" with " << d_folder_size
730 <<
" bytes for meta information and folders and at least "
731 << d_gc_buffer <<
" bytes free after every gc" << endl);
741 return d_total_size /
MEGA;
758 unsigned long new_size = size *
MEGA;
759 if (new_size > 0 && new_size < d_total_size - d_folder_size) {
760 unsigned long old_size = d_max_entry_size;
761 d_max_entry_size = new_size;
762 if (new_size < old_size && startGC()) {
763 perform_garbage_collection();
773 DBG2(cerr <<
"Cache...... Max entry cache size is "
774 << d_max_entry_size << endl);
786 return d_max_entry_size /
MEGA;
804 d_default_expiration = exp_time;
814 return d_default_expiration;
824 d_always_validate = validate;
833 return d_always_validate;
858 d_cache_control = cc;
860 vector<string>::const_iterator i;
861 for (i = cc.begin(); i != cc.end(); ++i) {
862 string header = (*i).substr(0, (*i).find(
':'));
863 string value = (*i).substr((*i).find(
": ") + 2);
864 if (header !=
"Cache-Control") {
865 throw InternalErr(__FILE__, __LINE__,
"Expected cache control header not found.");
868 if (value ==
"no-cache" || value ==
"no-store")
869 d_cache_enabled =
false;
870 else if (value.find(
"max-age") != string::npos) {
871 string max_age = value.substr(value.find(
"=" + 1));
874 else if (value ==
"max-stale")
876 else if (value.find(
"max-stale") != string::npos) {
877 string max_stale = value.substr(value.find(
"=" + 1));
880 else if (value.find(
"min-fresh") != string::npos) {
881 string min_fresh = value.substr(value.find(
"=" + 1));
903 return d_cache_control;
917 HTTPCache::is_url_in_cache(
const string &url)
919 DBG(cerr <<
"Is this url in the cache? (" << url <<
")" << endl);
922 bool status = entry != 0;
937 return header.find(
"Connection") != string::npos
938 || header.find(
"Keep-Alive") != string::npos
939 || header.find(
"Proxy-Authenticate") != string::npos
940 || header.find(
"Proxy-Authorization") != string::npos
941 || header.find(
"Transfer-Encoding") != string::npos
942 || header.find(
"Upgrade") != string::npos;
957 HTTPCache::write_metadata(
const string &cachename,
const vector<string> &headers)
960 d_open_files.push_back(fname);
962 FILE *dest = fopen(fname.c_str(),
"w");
964 throw InternalErr(__FILE__, __LINE__,
965 "Could not open named cache entry file.");
968 vector<string>::const_iterator i;
969 for (i = headers.begin(); i != headers.end(); ++i) {
971 int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
974 throw InternalErr(__FILE__, __LINE__,
"could not write header: '" + (*i) +
"' " +
long_to_string(s));
976 s = fwrite(
"\n", 1, 1, dest);
979 throw InternalErr(__FILE__, __LINE__,
"could not write header: " +
long_to_string(s));
984 int res = fclose(dest);
986 DBG(cerr <<
"HTTPCache::write_metadata - Failed to close "
990 d_open_files.pop_back();
1004 HTTPCache::read_metadata(
const string &cachename, vector<string> &headers)
1006 FILE *md = fopen(
string(cachename + CACHE_META).c_str(),
"r");
1008 throw InternalErr(__FILE__, __LINE__,
1009 "Could not open named cache entry meta data file.");
1013 while (!feof(md) && fgets(line, 1024, md)) {
1014 line[min(1024, static_cast<int>(strlen(line)))-1] =
'\0';
1015 headers.push_back(
string(line));
1018 int res = fclose(md);
1020 DBG(cerr <<
"HTTPCache::read_metadata - Failed to close "
1047 HTTPCache::write_body(
const string &cachename,
const FILE *src)
1049 d_open_files.push_back(cachename);
1051 FILE *dest = fopen(cachename.c_str(),
"wb");
1053 throw InternalErr(__FILE__, __LINE__,
1054 "Could not open named cache entry file.");
1062 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1063 total += fwrite(line, 1, n, dest);
1067 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1068 int res = fclose(dest);
1069 res = res & unlink(cachename.c_str());
1071 DBG(cerr <<
"HTTPCache::write_body - Failed to close/unlink "
1074 throw InternalErr(__FILE__, __LINE__,
1075 "I/O error transferring data to the cache.");
1078 rewind(const_cast<FILE *>(src));
1080 int res = fclose(dest);
1082 DBG(cerr <<
"HTTPCache::write_body - Failed to close "
1086 d_open_files.pop_back();
1100 HTTPCache::open_body(
const string &cachename)
1102 DBG(cerr <<
"cachename: " << cachename << endl);
1104 FILE *src = fopen(cachename.c_str(),
"rb");
1106 throw InternalErr(__FILE__, __LINE__,
"Could not open cache file.");
1138 const vector<string> &headers,
const FILE *body)
1142 DBG(cerr <<
"Caching url: " << url <<
"." << endl);
1146 if (url.find(
"http:") == string::npos &&
1147 url.find(
"https:") == string::npos) {
1161 d_http_cache_table->
parse_headers(entry, d_max_entry_size, headers);
1163 DBG(cerr <<
"Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1164 <<
"(" << url <<
")" << endl);
1166 delete entry; entry = 0;
1172 d_http_cache_table->
calculate_time(entry, d_default_expiration, request_time);
1186 DBG(cerr <<
"Too big; deleting HTTPCacheTable::CacheEntry: " << entry <<
"(" << url
1189 delete entry; entry = 0;
1196 perform_garbage_collection();
1235 vector<string> headers;
1237 DBG(cerr <<
"Getting conditional request headers for " << url << endl);
1240 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1242 throw Error(
"There is no cache entry for the URL: " + url);
1245 headers.push_back(
string(
"If-None-Match: ") + entry->
get_etag());
1247 if (entry->
get_lm() > 0) {
1248 time_t lm = entry->
get_lm();
1249 headers.push_back(
string(
"If-Modified-Since: ")
1254 headers.push_back(
string(
"If-Modified-Since: ")
1259 headers.push_back(
string(
"If-Modified-Since: ")
1279 struct HeaderLess: binary_function<const string&, const string&, bool>
1281 bool operator()(
const string &s1,
const string &s2)
const {
1282 return s1.substr(0, s1.find(
':')) < s2.substr(0, s2.find(
':'));
1301 const vector<string> &headers)
1306 DBG(cerr <<
"Updating the response headers for: " << url << endl);
1311 throw Error(
"There is no cache entry for the URL: " + url);
1314 d_http_cache_table->
parse_headers(entry, d_max_entry_size, headers);
1317 d_http_cache_table->
calculate_time(entry, d_default_expiration, request_time);
1325 set<string, HeaderLess> merged_headers;
1328 copy(headers.begin(), headers.end(),
1329 inserter(merged_headers, merged_headers.begin()));
1332 vector<string> old_headers;
1334 copy(old_headers.begin(), old_headers.end(),
1335 inserter(merged_headers, merged_headers.begin()));
1340 vector<string> result;
1341 copy(merged_headers.rbegin(), merged_headers.rend(),
1342 back_inserter(result));
1376 DBG(cerr <<
"Is this URL valid? (" << url <<
")" << endl);
1379 if (d_always_validate) {
1384 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1386 throw Error(
"There is no cache entry for the URL: " + url);
1404 if (d_max_age >= 0 && current_age > d_max_age) {
1405 DBG(cerr <<
"Cache....... Max-age validation" << endl);
1410 if (d_min_fresh >= 0
1412 DBG(cerr <<
"Cache....... Min-fresh validation" << endl);
1419 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1462 vector<string> &headers,
string &cacheName) {
1468 DBG(cerr <<
"Getting the cached response for " << url << endl);
1471 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1480 DBG(cerr <<
"Headers just read from cache: " << endl);
1481 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr,
"\n")));
1485 DBG(cerr <<
"Returning: " << url <<
" from the cache." << endl);
1517 string discard_name;
1534 string discard_name;
1535 vector<string> discard_headers;
1587 throw Error(
"Attempt to purge the cache with entries in use.");