Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

gwcache.c File Reference


Detailed Description

Gnutella Web Cache.

Author:
Raphael Manfredi
Date:
2001-2003

#include "common.h"
#include "gwcache.h"
#include "http.h"
#include "hosts.h"
#include "hcache.h"
#include "version.h"
#include "settings.h"
#include "lib/atoms.h"
#include "lib/cq.h"
#include "lib/file.h"
#include "lib/getline.h"
#include "lib/glib-missing.h"
#include "lib/misc.h"
#include "lib/tm.h"
#include "lib/url.h"
#include "lib/walloc.h"
#include "if/gnet_property_priv.h"
#include "if/core/nodes.h"
#include "if/bridge/c2ui.h"
#include "lib/override.h"

Data Structures

struct  gwc
struct  parse_context

Defines

#define MAX_GWC_URLS   200 /**< Max URLs we store */
 Max URLs we store.

#define MAX_GWC_REUSE   1 /**< Max amount of uses for one URL */
 Max amount of uses for one URL.

#define MAX_URL_LINES   50 /**< Max lines on a urlfile req */
 Max lines on a urlfile req.

#define MAX_IP_LINES   150 /**< Max lines on a hostfile req */
 Max lines on a hostfile req.

#define MAX_OK_LINES   3 /**< Max lines when expecting OK */
 Max lines when expecting OK.

#define MIN_IP_LINES   5 /**< Min lines expected */
 Min lines expected.

#define MIN_URL_LINES   5 /**< Min lines expected */
 Min lines expected.

#define HOUR_MS   (3600 * 1000) /**< Callout queue time in ms */
 Callout queue time in ms.

#define URL_RETRY_MS   (20 * 1000) /**< Retry timer for urlfile, in ms */
 Retry timer for urlfile, in ms.

#define REFRESH_MS   (8 * HOUR_MS) /**< Refresh every 8 hours */
 Refresh every 8 hours.

#define REUSE_PERIOD   3600 /**< Period between GET hostfile */
 Period between GET hostfile.

#define CLIENT_INFO   "client=GTKG&version=" GTA_VERSION_NUMBER

Typedefs

typedef gboolean( parse_dispatch_t )(struct parse_context *c, gchar *buf, gint len)
typedef void( parse_eof_t )(struct parse_context *c)

Functions

 RCSID ("$Id:gwcache.c, v 1.38 2005/09/11 13:39:57 rmanfredi Exp $")
void gwc_get_urls (void)
 Retrieve more web caches, asynchronously.

void gwc_update_ip_url (void)
 Publish our IP to the web cache and propagate one random URL.

void gwc_seed_cache (gchar *cache_url)
 Publish our IP to the named cache `cache_url' and propagate one random URL.

void gwc_add (const gchar *new_url)
 Add new URL to cache, possibly pushing off an older one if cache is full.

gchar * gwc_pick (void)
 Pickup a cache randomly from the known set.

void gwc_store (void)
 Store known GWC URLs.

void gwc_store_if_dirty (void)
 Store known GWC URLs if dirty.

void gwc_retrieve (void)
 Retrieve known GWC URLs.

void gwc_hourly_update (cqueue_t *unused_cq, gpointer unused_obj)
 Hourly web cache update.

void gwc_periodic_refresh (cqueue_t *unused_cq, gpointer unused_obj)
 Hourly web cache refresh.

void gwc_urlfile_retry (cqueue_t *unused_cq, gpointer unused_obj)
 Called when we failed last urlfile request, after some delay.

void gwc_init (void)
 Initialize web cache.

gboolean check_current_url (void)
 Ensures that we have a valid `current_url' or pick a new one.

void forget_url (gchar *url)
 Removes the URL from the set of known URL, but do not free its memory and keeps it in the set of failed URLs for the session.

void clear_current_url (gboolean discard)
 Dispose of current URL atom, if defined.

void free_failed_url (gpointer key, gpointer unused_value, gpointer unused_udata)
 Frees the atom used as hash table key.

void gwc_close (void)
 Called when servent shuts down.

void parse_context_free (gpointer obj)
 Free parsing context.

void parse_context_set (gpointer handle, gint maxlines)
 Allocate new parsing context for handle and record it.

void parse_dispatch_lines (gpointer handle, gchar *buf, gint len, parse_dispatch_t cb, parse_eof_t eof)
 Analyze the data we have received, and give each line to the supplied dispatcher callback `cb', after having chomped it.

gboolean gwc_url_line (struct parse_context *ctx, gchar *buf, gint len)
 Called from parse_dispatch_lines() for each complete line of output.

void gwc_url_eof (struct parse_context *ctx)
 Called from parse_dispatch_lines() on EOF.

void gwc_url_data_ind (gpointer handle, gchar *data, gint len)
 Populate callback: more data available.

void gwc_url_error_ind (gpointer handle, http_errtype_t type, gpointer v)
 HTTP request is being stopped.

gboolean gwc_is_waiting (void)
 Check whether we're waiting for a hostfile request.

gboolean gwc_host_line (struct parse_context *ctx, gchar *buf, gint len)
 Called from parse_dispatch_lines() for each complete line of output.

void gwc_host_eof (struct parse_context *ctx)
 Called from parse_dispatch_lines() on EOF.

void gwc_host_data_ind (gpointer handle, gchar *data, gint len)
 Populate callback: more data available.

void gwc_host_error_ind (gpointer handle, http_errtype_t type, gpointer v)
 HTTP request is being stopped.

void gwc_get_hosts (void)
 Retrieve more hosts from web cache, asynchronously.

gboolean gwc_update_line (struct parse_context *ctx, gchar *buf, gint len)
 Called from parse_dispatch_lines() for each complete line of output.

void gwc_update_data_ind (gpointer handle, gchar *data, gint len)
 Populate callback: more data available.

void gwc_update_error_ind (gpointer handle, http_errtype_t type, gpointer v)
 HTTP request is being stopped.

void gwc_update_this (gchar *cache_url)
 Publish our IP to the named cache `cache_url' and propagate one random URL.


Variables

gchar gwc_tmp [1024]
gwc gwc_url [MAX_GWC_URLS]
 Holds string atoms.

gint gwc_url_slot = -1
GHashTable * gwc_known_url = NULL
GHashTable * gwc_failed_url = NULL
gchar * current_url = NULL
 Cache we're currently using.

gint current_reused = 0
 Amount of times we reused it.

const gchar gwc_file [] = "gwcache"
const gchar gwc_bootfile [] = "gwcache.boot"
const gchar gwc_what [] = "web cache URLs"
gpointer hourly_update_ev = NULL
gpointer periodic_refresh_ev = NULL
gpointer urlfile_retry_ev = NULL
gboolean gwc_file_dirty = FALSE
const gchar *const  boot_url []
 The following URLs are there for bootstrapping purposes only.

gboolean hostfile_running = FALSE


Define Documentation

#define CLIENT_INFO   "client=GTKG&version=" GTA_VERSION_NUMBER
 

#define HOUR_MS   (3600 * 1000) /**< Callout queue time in ms */
 

Callout queue time in ms.

#define MAX_GWC_REUSE   1 /**< Max amount of uses for one URL */
 

Max amount of uses for one URL.

#define MAX_GWC_URLS   200 /**< Max URLs we store */
 

Max URLs we store.

#define MAX_IP_LINES   150 /**< Max lines on a hostfile req */
 

Max lines on a hostfile req.

#define MAX_OK_LINES   3 /**< Max lines when expecting OK */
 

Max lines when expecting OK.

#define MAX_URL_LINES   50 /**< Max lines on a urlfile req */
 

Max lines on a urlfile req.

#define MIN_IP_LINES   5 /**< Min lines expected */
 

Min lines expected.

#define MIN_URL_LINES   5 /**< Min lines expected */
 

Min lines expected.

#define REFRESH_MS   (8 * HOUR_MS) /**< Refresh every 8 hours */
 

Refresh every 8 hours.

#define REUSE_PERIOD   3600 /**< Period between GET hostfile */
 

Period between GET hostfile.

#define URL_RETRY_MS   (20 * 1000) /**< Retry timer for urlfile, in ms */
 

Retry timer for urlfile, in ms.


Typedef Documentation

typedef gboolean( parse_dispatch_t)(struct parse_context *c, gchar *buf, gint len)
 

typedef void( parse_eof_t)(struct parse_context *c)
 


Function Documentation

gboolean check_current_url void   )  [static]
 

Ensures that we have a valid `current_url' or pick a new one.

Also force change a the current URL after too many uses.

Returns:
TRUE if we got a valid URL.

void clear_current_url gboolean  discard  )  [static]
 

Dispose of current URL atom, if defined.

When `discard' is set, we remove the current URL physically from our cache.

void forget_url gchar *  url  )  [static]
 

Removes the URL from the set of known URL, but do not free its memory and keeps it in the set of failed URLs for the session.

void free_failed_url gpointer  key,
gpointer  unused_value,
gpointer  unused_udata
[static]
 

Frees the atom used as hash table key.

void gwc_add const gchar *  new_url  )  [static]
 

Add new URL to cache, possibly pushing off an older one if cache is full.

void gwc_close void   ) 
 

Called when servent shuts down.

void gwc_get_hosts void   ) 
 

Retrieve more hosts from web cache, asynchronously.

void gwc_get_urls void   )  [static]
 

Retrieve more web caches, asynchronously.

We'll try again and again, until we reach a good cache that answers with at least one request.

void gwc_host_data_ind gpointer  handle,
gchar *  data,
gint  len
[static]
 

Populate callback: more data available.

void gwc_host_eof struct parse_context ctx  )  [static]
 

Called from parse_dispatch_lines() on EOF.

void gwc_host_error_ind gpointer  handle,
http_errtype_t  type,
gpointer  v
[static]
 

HTTP request is being stopped.

gboolean gwc_host_line struct parse_context ctx,
gchar *  buf,
gint  len
[static]
 

Called from parse_dispatch_lines() for each complete line of output.

Returns:
FALSE to stop processing of any remaining data.

void gwc_hourly_update cqueue_t unused_cq,
gpointer  unused_obj
[static]
 

Hourly web cache update.

Scheduled as a callout queue event.

void gwc_init void   ) 
 

Initialize web cache.

gboolean gwc_is_waiting void   ) 
 

Check whether we're waiting for a hostfile request.

void gwc_periodic_refresh cqueue_t unused_cq,
gpointer  unused_obj
[static]
 

Hourly web cache refresh.

Scheduled as a callout queue event.

gchar* gwc_pick void   )  [static]
 

Pickup a cache randomly from the known set.

If there's no URL used that has not been used recently, NULL will be returned. The timestamp for the picked URL is updated automatically.

Try to avoid using default bootstrapping URLs if we have more than the minimum set of caches in stock...

Returns:
a GWebCache URL or NULL on failure.

void gwc_retrieve void   )  [static]
 

Retrieve known GWC URLs.

They are normally saved in ~/.gtk-gnutella/gwcache.

void gwc_seed_cache gchar *  cache_url  )  [static]
 

Publish our IP to the named cache `cache_url' and propagate one random URL.

We sometimes forcefully call this routine with a cache that does not return anything to us, to try to "bootstrap" it by feeding some data.

void gwc_store void   )  [static]
 

Store known GWC URLs.

They are normally saved in ~/.gtk-gnutella/gwcache.

void gwc_store_if_dirty void   ) 
 

Store known GWC URLs if dirty.

void gwc_update_data_ind gpointer  handle,
gchar *  data,
gint  len
[static]
 

Populate callback: more data available.

void gwc_update_error_ind gpointer  handle,
http_errtype_t  type,
gpointer  v
[static]
 

HTTP request is being stopped.

void gwc_update_ip_url void   )  [static]
 

Publish our IP to the web cache and propagate one random URL.

gboolean gwc_update_line struct parse_context ctx,
gchar *  buf,
gint  len
[static]
 

Called from parse_dispatch_lines() for each complete line of output.

Returns:
FALSE to stop processing of any remaining data.

void gwc_update_this gchar *  cache_url  )  [static]
 

Publish our IP to the named cache `cache_url' and propagate one random URL.

void gwc_url_data_ind gpointer  handle,
gchar *  data,
gint  len
[static]
 

Populate callback: more data available.

void gwc_url_eof struct parse_context ctx  )  [static]
 

Called from parse_dispatch_lines() on EOF.

void gwc_url_error_ind gpointer  handle,
http_errtype_t  type,
gpointer  v
[static]
 

HTTP request is being stopped.

gboolean gwc_url_line struct parse_context ctx,
gchar *  buf,
gint  len
[static]
 

Called from parse_dispatch_lines() for each complete line of output.

Returns:
FALSE to stop processing of any remaining data.

void gwc_urlfile_retry cqueue_t unused_cq,
gpointer  unused_obj
[static]
 

Called when we failed last urlfile request, after some delay.

Scheduled as a callout queue event.

void parse_context_free gpointer  obj  )  [static]
 

Free parsing context.

void parse_context_set gpointer  handle,
gint  maxlines
[static]
 

Allocate new parsing context for handle and record it.

Parameters:
`handle' the asynchronous HTTP request handle.
`maxlines' the max number of lines we want to parse.

void parse_dispatch_lines gpointer  handle,
gchar *  buf,
gint  len,
parse_dispatch_t  cb,
parse_eof_t  eof
[static]
 

Analyze the data we have received, and give each line to the supplied dispatcher callback `cb', after having chomped it.

On EOF, call `eof' to finalize parsing.

RCSID "$Id:gwcache.  c,
v 1.38 2005/09/11 13:39:57 rmanfredi Exp $" 
 


Variable Documentation

const gchar* const boot_url[] [static]
 

Initial value:

 {
    "http://cache.kicks-ass.net:8000/",
    "http://galvatron.dyndns.org:59009/gwcache",
    "http://krill.shacknet.nu:20095/gwc",
}
The following URLs are there for bootstrapping purposes only.

gint current_reused = 0 [static]
 

Amount of times we reused it.

gchar* current_url = NULL [static]
 

Cache we're currently using.

const gchar gwc_bootfile[] = "gwcache.boot" [static]
 

GHashTable* gwc_failed_url = NULL [static]
 

const gchar gwc_file[] = "gwcache" [static]
 

gboolean gwc_file_dirty = FALSE [static]
 

GHashTable* gwc_known_url = NULL [static]
 

gchar gwc_tmp[1024] [static]
 

struct gwc gwc_url[MAX_GWC_URLS]
 

Holds string atoms.

gint gwc_url_slot = -1 [static]
 

const gchar gwc_what[] = "web cache URLs" [static]
 

gboolean hostfile_running = FALSE [static]
 

gpointer hourly_update_ev = NULL [static]
 

gpointer periodic_refresh_ev = NULL [static]
 

gpointer urlfile_retry_ev = NULL [static]
 


Generated on Sun Feb 12 10:50:02 2006 for Gtk-Gnutella by doxygen 1.3.6