Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

dmesh.c File Reference


Detailed Description

Download mesh.

Author:
Raphael Manfredi
Date:
2002-2003

#include "common.h"
#include "gnutella.h"
#include "downloads.h"
#include "uploads.h"
#include "dmesh.h"
#include "huge.h"
#include "http.h"
#include "hostiles.h"
#include "guid.h"
#include "share.h"
#include "fileinfo.h"
#include "settings.h"
#include "hosts.h"
#include "if/gnet_property_priv.h"
#include "lib/atoms.h"
#include "lib/base32.h"
#include "lib/cq.h"
#include "lib/endian.h"
#include "lib/file.h"
#include "lib/fuzzy.h"
#include "lib/getdate.h"
#include "lib/glib-missing.h"
#include "lib/header.h"
#include "lib/tm.h"
#include "lib/url.h"
#include "lib/urn.h"
#include "lib/walloc.h"
#include "lib/override.h"

Data Structures

struct  dmesh
struct  dmesh_entry
struct  dmesh_banned
struct  dmesh_deferred_url_t
 A simple container for the dmesh info that the deferred checking code needs, although to be honest it may be worth refactoring the dmesh code so it all works on dmesh_entries? More...


Defines

#define MAX_LIFETIME   86400 /**< 1 day */
 1 day

#define MAX_ENTRIES   64 /**< Max amount of entries kept in list */
 Max amount of entries kept in list.

#define MAX_STAMP   ((time_t) -1)
#define MIN_PFSP_SIZE   524288 /**< 512K, min size for PFSP advertising */
 512K, min size for PFSP advertising

#define MIN_PFSP_PCT   10 /**< 10%, min available data for PFSP */
 10%, min available data for PFSP

#define FUZZY_DROP   ((60 << FUZZY_SHIFT) / 100)
 If not at least 60% alike, dump!

#define FUZZY_MATCH   ((80 << FUZZY_SHIFT) / 100)
 If more than 80% alike, equal!

#define BAN_LIFETIME   7200 /**< 2 hours */
 2 hours

#define DMESH_MAX   MAX_ENTRIES

Typedefs

typedef void(* header_func_t )(FILE *out)

Functions

 RCSID ("$Id:dmesh.c, v 1.35 2005/11/10 23:54:15 cbiere Exp $")
void dmesh_retrieve (void)
 Retrieve download mesh and add entries that have not expired yet.

void dmesh_ban_retrieve (void)
 Retrieve banned mesh and add entries that have not expired yet.

gchar * dmesh_urlinfo_to_string (const dmesh_urlinfo_t *info)
 Format the `info' URL and return pointer to static string.

guint urlinfo_hash (gconstpointer key)
 Hash a URL info.

gint urlinfo_eq (gconstpointer a, gconstpointer b)
 Test equality of two URL infos.

void dmesh_init (void)
 Initialize the download mesh.

gint dmesh_entry_cmp (gconstpointer a, gconstpointer b)
 Compare two dmesh_entry, based on the timestamp.

void dmesh_entry_free (struct dmesh_entry *dme)
 Free download mesh entry.

void dmesh_urlinfo_free (dmesh_urlinfo_t *info)
 Free a dmesh_urlinfo_t structure.

void dmesh_ban_expire (cqueue_t *unused_cq, gpointer obj)
 Called from callout queue when it's time to expire the URL ban.

void dmesh_ban_add (const gchar *sha1, dmesh_urlinfo_t *info, time_t stamp)
 Add new URL to the banned hash.

gboolean dmesh_is_banned (dmesh_urlinfo_t *info)
 Check whether URL is banned from the mesh.

const gchar * dmesh_url_strerror (dmesh_url_error_t errnum)
gboolean dmesh_url_parse (const gchar *url, dmesh_urlinfo_t *info)
 Parse URL `url', and fill a structure `info' representing this URL.

void dm_expire (struct dmesh *dm, guint32 agemax, const gchar *sha1)
 Expire entries older than `agemax' in a given mesh bucket `dm'.

gboolean dm_remove (struct dmesh *dm, const host_addr_t addr, guint16 port, guint idx, const gchar *name, time_t stamp)
 Remove specified entry from mesh bucket, if it is older than `stamp'.

void dmesh_dispose (const gchar *sha1)
 Dispose of the entry slot, which must be empty.

void dmesh_fill_info (dmesh_urlinfo_t *info, const gchar *sha1, const host_addr_t addr, guint16 port, guint idx, gchar *name)
 Fill URL info from externally supplied sha1, addr, port, idx and name.

gboolean dmesh_remove (const gchar *sha1, const host_addr_t addr, guint16 port, guint idx, gchar *name)
 Remove entry from mesh due to a failed download attempt.

gint dmesh_count (const gchar *sha1)
 Get the number of dmesh entries for a given SHA1.

gboolean dmesh_raw_add (gchar *sha1, dmesh_urlinfo_t *info, time_t stamp)
 Add entry to the download mesh, indexed by the binary `sha1' digest.

gboolean dmesh_add (gchar *sha1, const host_addr_t addr, guint16 port, guint idx, gchar *name, time_t stamp)
 Same as dmesh_raw_add(), but this is for public consumption.

size_t dmesh_urlinfo (const dmesh_urlinfo_t *info, gchar *buf, size_t len, gboolean *quoting)
 Format the URL described by `info' into the provided buffer `buf', which can hold `len' bytes.

size_t dmesh_entry_compact (const struct dmesh_entry *dme, gchar *buf, size_t size)
 Format mesh_entry in the provided buffer, as a compact addr:port address.

size_t dmesh_entry_url_stamp (const struct dmesh_entry *dme, gchar *buf, size_t size)
 Format dmesh_entry in the provided buffer, as an URL with an appended timestamp in ISO format, GMT time.

const gchar * dmesh_entry_to_string (const struct dmesh_entry *dme)
 Format the `dme' mesh entry as "URL timestamp".

gint dmesh_fill_alternate (const gchar *sha1, gnet_host_t *hvec, gint hcnt)
 Fill supplied vector `hvec' whose size is `hcnt' with some alternate locations for a given SHA1 key, that can be requested by hash directly.

gint dmesh_alternate_location (const gchar *sha1, gchar *buf, size_t size, const host_addr_t addr, time_t last_sent, const gchar *vendor, fileinfo_t *fi, gboolean request)
 Build alternate location header for a given SHA1 key.

GSList * dmesh_get_nonurn_altlocs (const gchar *sha1)
 Create a list of nonurn alternative locations for a given sha1.

GSList * dmesh_defer_nonurn_altloc (GSList *list, dmesh_urlinfo_t *url, time_t stamp)
 This function defers adding a proposed alternate location that has been given as an old style string (i.e.

void dmesh_free_deferred_altloc (dmesh_deferred_url_t *info, gpointer unused_udata)
 Called to deallocate the dmesh_urlinfo and de-reference names from the GSList.

void dmesh_check_deferred_against_existing (gchar *sha1, GSList *existing_urls, GSList *deferred_urls)
 This routine checks deferred dmesh entries against any existing nonurn alternative locations.

void dmesh_check_deferred_against_themselves (gchar *sha1, GSList *deferred_urls)
 This routine checks deferred dmesh entries against themselves.

void dmesh_check_deferred_altlocs (gchar *sha1, GSList *deferred_urls)
 This function is called once dmesh_collect_locations is done and makes it then decides how its going to vet the quality of the deferred nonurn altlocs before decing if they are going to be added for the given sha1.

gboolean dmesh_collect_sha1 (const gchar *value, gchar *digest)
 Parse the value of the X-Gnutella-Content-URN header in `value', looking for a SHA1.

void dmesh_collect_compact_locations (gchar *sha1, gchar *value)
 Parse the value of the "X-Alt" header to extract alternate sources for a given SHA1 key given in the new compact form.

void dmesh_collect_locations (gchar *sha1, gchar *value, gboolean defer)
 Parse value of the "X-Gnutella-Alternate-Location" to extract alternate sources for a given SHA1 key.

gint dmesh_alt_loc_fill (const gchar *sha1, dmesh_urlinfo_t *buf, gint count)
 Fill buffer with at most `count' alternative locations for sha1.

void dmesh_check_results_set (gnet_results_set_t *rs)
 Parse query hit (result set) for entries whose SHA1 match something we have into the mesh or share, and insert them if needed.

void dmesh_multiple_downloads (gchar *sha1, filesize_t size, fileinfo_t *fi)
 This is called when swarming is first requested to get a list of all the servers with the requested file known by dmesh.

void dmesh_store_kv (gpointer key, gpointer value, gpointer udata)
 Store key/value pair in file.

void dmesh_store_hash (const gchar *what, GHashTable *hash, const gchar *file, header_func_t header_cb, GHFunc store_cb)
 Store hash table `hash' into `file'.

void dmesh_header_print (FILE *out)
 Prints header to dmesh store file.

void dmesh_store (void)
 Store download mesh onto file.

void dmesh_ban_store_kv (gpointer key, gpointer value, gpointer udata)
 Store key/value pair in file.

void dmesh_ban_header_print (FILE *out)
 Prints header to banned mesh store file.

void dmesh_ban_store (void)
 Store banned mesh onto file.

gboolean dmesh_free_kv (gpointer key, gpointer value, gpointer unused_udata)
 Free key/value pair in download mesh hash.

void dmesh_ban_prepend_list (gpointer key, gpointer value, gpointer user)
 Prepend the value to the list, given by reference.

void dmesh_close (void)
 Called at servent shutdown time.


Variables

dmesh_url_error_t dmesh_url_errno
 Error from dmesh_url_parse().

GHashTable * mesh = NULL
 The download mesh records all the known sources for a given SHA1.

const gchar dmesh_file [] = "dmesh"
GHashTable * ban_mesh = NULL
 If we get a "bad" URL into the mesh ("bad" = gives 404 or other error when trying to download it), we must remember it for some time and prevent it from re-entering the mesh again within that period to prevent rescheduling for download and a further failure: that would be hammering the poor host, and we're wasting our time and bandwidth.

GHashTable * ban_mesh_by_sha1 = NULL
 This table stores the banned entries by SHA1.

const gchar dmesh_ban_file [] = "dmesh_ban"
const gchar *const  parse_errstr []


Define Documentation

#define BAN_LIFETIME   7200 /**< 2 hours */
 

2 hours

#define DMESH_MAX   MAX_ENTRIES
 

#define FUZZY_DROP   ((60 << FUZZY_SHIFT) / 100)
 

If not at least 60% alike, dump!

#define FUZZY_MATCH   ((80 << FUZZY_SHIFT) / 100)
 

If more than 80% alike, equal!

#define MAX_ENTRIES   64 /**< Max amount of entries kept in list */
 

Max amount of entries kept in list.

#define MAX_LIFETIME   86400 /**< 1 day */
 

1 day

#define MAX_STAMP   ((time_t) -1)
 

#define MIN_PFSP_PCT   10 /**< 10%, min available data for PFSP */
 

10%, min available data for PFSP

#define MIN_PFSP_SIZE   524288 /**< 512K, min size for PFSP advertising */
 

512K, min size for PFSP advertising


Typedef Documentation

typedef void(* header_func_t)(FILE *out)
 


Function Documentation

void dm_expire struct dmesh dm,
guint32  agemax,
const gchar *  sha1
[static]
 

Expire entries older than `agemax' in a given mesh bucket `dm'.

`sha1' is only passed in case we want to log the removal.

gboolean dm_remove struct dmesh dm,
const host_addr_t  addr,
guint16  port,
guint  idx,
const gchar *  name,
time_t  stamp
[static]
 

Remove specified entry from mesh bucket, if it is older than `stamp'.

Returns:
TRUE if entry was removed or not found, FALSE otherwise.

gboolean dmesh_add gchar *  sha1,
const host_addr_t  addr,
guint16  port,
guint  idx,
gchar *  name,
time_t  stamp
 

Same as dmesh_raw_add(), but this is for public consumption.

gint dmesh_alt_loc_fill const gchar *  sha1,
dmesh_urlinfo_t buf,
gint  count
[static]
 

Fill buffer with at most `count' alternative locations for sha1.

Returns:
the amount of locations inserted.

gint dmesh_alternate_location const gchar *  sha1,
gchar *  buf,
size_t  size,
const host_addr_t  addr,
time_t  last_sent,
const gchar *  vendor,
fileinfo_t fi,
gboolean  request
 

Build alternate location header for a given SHA1 key.

We generate at most `size' bytes of data into `alt'.

Parameters:
`sha1' no brief description.
`buf' no brief description.
'size' no brief description.
`addr' is the host to which those alternate locations are meant: we skip any data pertaining to that host.
`last_sent' is the time at which we sent some previous alternate locations. If there has been no change to the mesh since then, we'll return an empty string. Otherwise we return entries inserted after `last_sent'.
`vendor' is given to determine whether it is apt to read our X-Alt and X-Nalt fields formatted with continuations or not.
`fi' when it is non-NULL, it means we're sharing that file and we're sending alternate locations to remote servers: include ourselves in the list of alternate locations if PFSP-server is enabled.
`request' if it is true, then the mesh entries are generated in an HTTP request; otherwise it's for an HTTP reply.
unless the `vendor' is GTKG, don't use continuation: most servent authors don't bother with a proper HTTP header parsing layer.

Returns:
amount of generated data.

void dmesh_ban_add const gchar *  sha1,
dmesh_urlinfo_t info,
time_t  stamp
[static]
 

Add new URL to the banned hash.

If stamp is 0, the current timestamp is used.

void dmesh_ban_expire cqueue_t unused_cq,
gpointer  obj
[static]
 

Called from callout queue when it's time to expire the URL ban.

void dmesh_ban_header_print FILE *  out  )  [static]
 

Prints header to banned mesh store file.

void dmesh_ban_prepend_list gpointer  key,
gpointer  value,
gpointer  user
[static]
 

Prepend the value to the list, given by reference.

void dmesh_ban_retrieve void   )  [static]
 

Retrieve banned mesh and add entries that have not expired yet.

The mesh is normally retrieved from ~/.gtk-gnutella/dmesh_ban.

void dmesh_ban_store void   ) 
 

Store banned mesh onto file.

The banned mesh is normally stored in ~/.gtk-gnutella/dmesh_ban.

void dmesh_ban_store_kv gpointer  key,
gpointer  value,
gpointer  udata
[static]
 

Store key/value pair in file.

void dmesh_check_deferred_against_existing gchar *  sha1,
GSList *  existing_urls,
GSList *  deferred_urls
[static]
 

This routine checks deferred dmesh entries against any existing nonurn alternative locations.

The two factors that control the quality of hits queued are:

a) Fuzzy factor of compares b) Number of the altlocs they have to match (threshold)

These factors are currently semi-empirical guesses and hardwired.

Note:
This is an O(m*n) process, when `m' is the amount of new entries and `n' the amount of existing entries.

void dmesh_check_deferred_against_themselves gchar *  sha1,
GSList *  deferred_urls
[static]
 

This routine checks deferred dmesh entries against themselves.

They have to be 100% consistent with what was being passed otherwise we are conservative a throw them away. The main factor is the fuzzy factor of the compare.

Apologies for the returns but this function doesn't need to clean up after itself yet.

void dmesh_check_deferred_altlocs gchar *  sha1,
GSList *  deferred_urls
[static]
 

This function is called once dmesh_collect_locations is done and makes it then decides how its going to vet the quality of the deferred nonurn altlocs before decing if they are going to be added for the given sha1.

A couple of approaches are taken:

a) if any non-urn urls already exist compare against them and add the ones that match a) otherwise only add urls if they are all the same(ish)

Another possible algorithm (majority wins) has been tried but empirically did allow the occasional dmesh pollution. As we are going for a non-polluted dmesh we shall be conservative. Besides using urn's for files is generally a better idea.

void dmesh_check_results_set gnet_results_set_t rs  ) 
 

Parse query hit (result set) for entries whose SHA1 match something we have into the mesh or share, and insert them if needed.

void dmesh_close void   ) 
 

Called at servent shutdown time.

void dmesh_collect_compact_locations gchar *  sha1,
gchar *  value
 

Parse the value of the "X-Alt" header to extract alternate sources for a given SHA1 key given in the new compact form.

void dmesh_collect_locations gchar *  sha1,
gchar *  value,
gboolean  defer
 

Parse value of the "X-Gnutella-Alternate-Location" to extract alternate sources for a given SHA1 key.

gboolean dmesh_collect_sha1 const gchar *  value,
gchar *  digest
 

Parse the value of the X-Gnutella-Content-URN header in `value', looking for a SHA1.

When found, the SHA1 is extracted and placed into the given `digest' buffer.

Returns:
whether we successfully extracted the SHA1.

gint dmesh_count const gchar *  sha1  ) 
 

Get the number of dmesh entries for a given SHA1.

Returns:
the number of dmesh entries

GSList* dmesh_defer_nonurn_altloc GSList *  list,
dmesh_urlinfo_t url,
time_t  stamp
[static]
 

This function defers adding a proposed alternate location that has been given as an old style string (i.e.

not a URN). After dmesh_collect_locations() has parsed all the alt locations another function (dmesh_check_deferred_altlocs) will be called to make a judgement on the quality of these results, and determine whether they should be added to the dmesh.

void dmesh_dispose const gchar *  sha1  )  [static]
 

Dispose of the entry slot, which must be empty.

gint dmesh_entry_cmp gconstpointer  a,
gconstpointer  b
[static]
 

Compare two dmesh_entry, based on the timestamp.

The greater the time stamp, the samller the entry (i.e. the more recent).

size_t dmesh_entry_compact const struct dmesh_entry dme,
gchar *  buf,
size_t  size
[static]
 

Format mesh_entry in the provided buffer, as a compact addr:port address.

The port is even omitted if it is the standard Gnutella one.

Returns:
length of formatted entry, -1 if the address would be larger than the buffer, or if no compact form can be derived for this entry (not an URN_INDEX kind).

void dmesh_entry_free struct dmesh_entry dme  )  [static]
 

Free download mesh entry.

const gchar* dmesh_entry_to_string const struct dmesh_entry dme  )  [static]
 

Format the `dme' mesh entry as "URL timestamp".

Returns:
pointer to static string.

size_t dmesh_entry_url_stamp const struct dmesh_entry dme,
gchar *  buf,
size_t  size
[static]
 

Format dmesh_entry in the provided buffer, as an URL with an appended timestamp in ISO format, GMT time.

Returns:
length of formatted entry, -1 if the URL would be larger than the buffer.

gint dmesh_fill_alternate const gchar *  sha1,
gnet_host_t hvec,
gint  hcnt
 

Fill supplied vector `hvec' whose size is `hcnt' with some alternate locations for a given SHA1 key, that can be requested by hash directly.

Returns:
the amount of locations filled.

void dmesh_fill_info dmesh_urlinfo_t info,
const gchar *  sha1,
const host_addr_t  addr,
guint16  port,
guint  idx,
gchar *  name
[static]
 

Fill URL info from externally supplied sha1, addr, port, idx and name.

When `idx' is URN_INDEX, then `name' is ignored, and we use the stringified SHA1.

void dmesh_free_deferred_altloc dmesh_deferred_url_t info,
gpointer  unused_udata
[inline, static]
 

Called to deallocate the dmesh_urlinfo and de-reference names from the GSList.

Intended to be called from a g_slist_foreach. The list itself needs to be freed afterwards

gboolean dmesh_free_kv gpointer  key,
gpointer  value,
gpointer  unused_udata
[static]
 

Free key/value pair in download mesh hash.

GSList* dmesh_get_nonurn_altlocs const gchar *  sha1  )  [static]
 

Create a list of nonurn alternative locations for a given sha1.

This is used in preference to self-consitancy checking as these urls are already "known" to be valid and hence we do not waste potentially valid urls on a "all or nothing" approach.

void dmesh_header_print FILE *  out  )  [static]
 

Prints header to dmesh store file.

void dmesh_init void   ) 
 

Initialize the download mesh.

gboolean dmesh_is_banned dmesh_urlinfo_t info  )  [static]
 

Check whether URL is banned from the mesh.

void dmesh_multiple_downloads gchar *  sha1,
filesize_t  size,
fileinfo_t fi
 

This is called when swarming is first requested to get a list of all the servers with the requested file known by dmesh.

It creates a new download for every server found.

Parameters:
`sha1' (atom) the SHA1 of the file.
`size' the original file size.
`fi' no brief description.

gboolean dmesh_raw_add gchar *  sha1,
dmesh_urlinfo_t info,
time_t  stamp
[static]
 

Add entry to the download mesh, indexed by the binary `sha1' digest.

If `stamp' is 0, then the current time is used.

If `idx' is URN_INDEX, then we can access this file only through an /uri-res request, the URN being given as `name'.

Returns:
whether the entry was added in the mesh, or was discarded because it was the oldest record and we have enough already.

gboolean dmesh_remove const gchar *  sha1,
const host_addr_t  addr,
guint16  port,
guint  idx,
gchar *  name
 

Remove entry from mesh due to a failed download attempt.

void dmesh_retrieve void   )  [static]
 

Retrieve download mesh and add entries that have not expired yet.

The mesh is normally retrieved from ~/.gtk-gnutella/dmesh.

void dmesh_store void   ) 
 

Store download mesh onto file.

The download mesh is normally stored in ~/.gtk-gnutella/dmesh.

void dmesh_store_hash const gchar *  what,
GHashTable *  hash,
const gchar *  file,
header_func_t  header_cb,
GHFunc  store_cb
[static]
 

Store hash table `hash' into `file'.

The file header is emitted by `header_cb'. The storing callback for each item is `store_cb'.

void dmesh_store_kv gpointer  key,
gpointer  value,
gpointer  udata
[static]
 

Store key/value pair in file.

gboolean dmesh_url_parse const gchar *  url,
dmesh_urlinfo_t info
 

Parse URL `url', and fill a structure `info' representing this URL.

Returns:
TRUE if OK, FALSE if we could not parse it. The variable `dmesh_url_errno' is set accordingly.

const gchar* dmesh_url_strerror dmesh_url_error_t  errnum  ) 
 

Returns:
human-readable error string corresponding to error code `errnum'.

size_t dmesh_urlinfo const dmesh_urlinfo_t info,
gchar *  buf,
size_t  len,
gboolean *  quoting
[static]
 

Format the URL described by `info' into the provided buffer `buf', which can hold `len' bytes.

Returns:
length of formatted entry, -1 if the URL would be larger than the buffer. If `quoting' is non-NULL, set it to indicate whether the formatted URL should be quoted if emitted in a header, because it contains a "," character.

void dmesh_urlinfo_free dmesh_urlinfo_t info  )  [static]
 

Free a dmesh_urlinfo_t structure.

gchar * dmesh_urlinfo_to_string const dmesh_urlinfo_t info  )  [static]
 

Format the `info' URL and return pointer to static string.

RCSID "$Id:dmesh.  c,
v 1.35 2005/11/10 23:54:15 cbiere Exp $" 
 

gint urlinfo_eq gconstpointer  a,
gconstpointer  b
[static]
 

Test equality of two URL infos.

guint urlinfo_hash gconstpointer  key  )  [static]
 

Hash a URL info.


Variable Documentation

GHashTable* ban_mesh = NULL [static]
 

If we get a "bad" URL into the mesh ("bad" = gives 404 or other error when trying to download it), we must remember it for some time and prevent it from re-entering the mesh again within that period to prevent rescheduling for download and a further failure: that would be hammering the poor host, and we're wasting our time and bandwidth.

Therefore, each time we get a "bad" URL, we insert it in a hash table. The table entry is then scheduled to be removed after some grace period occurs. The table is keyed by the dmesh_urlinfo_t, and points to a dmesh_banned structure.

The table is persisted at regular intervals.

GHashTable* ban_mesh_by_sha1 = NULL [static]
 

This table stores the banned entries by SHA1.

const gchar dmesh_ban_file[] = "dmesh_ban" [static]
 

const gchar dmesh_file[] = "dmesh" [static]
 

dmesh_url_error_t dmesh_url_errno
 

Error from dmesh_url_parse().

GHashTable* mesh = NULL [static]
 

The download mesh records all the known sources for a given SHA1.

It is implemented as a big hash table, where SHA1 are keys, each value being a struct dmesh pointer.

const gchar* const parse_errstr[] [static]
 

Initial value:

 {
    "OK",                                   
    "HTTP parsing error",                   
    "File prefix neither /uri-res nor /get",
    "Index in /get/index is reserved",      
    "No filename after /get/index",         
    "Bad URL encoding",                     
}


Generated on Sun Feb 12 10:49:59 2006 for Gtk-Gnutella by doxygen 1.3.6