#include "common.h"
#include "gnutella.h"
#include "downloads.h"
#include "uploads.h"
#include "dmesh.h"
#include "huge.h"
#include "http.h"
#include "hostiles.h"
#include "guid.h"
#include "share.h"
#include "fileinfo.h"
#include "settings.h"
#include "hosts.h"
#include "if/gnet_property_priv.h"
#include "lib/atoms.h"
#include "lib/base32.h"
#include "lib/cq.h"
#include "lib/endian.h"
#include "lib/file.h"
#include "lib/fuzzy.h"
#include "lib/getdate.h"
#include "lib/glib-missing.h"
#include "lib/header.h"
#include "lib/tm.h"
#include "lib/url.h"
#include "lib/urn.h"
#include "lib/walloc.h"
#include "lib/override.h"
Data Structures | |
struct | dmesh |
struct | dmesh_entry |
struct | dmesh_banned |
struct | dmesh_deferred_url_t |
A simple container for the dmesh info that the deferred checking code needs, although to be honest it may be worth refactoring the dmesh code so it all works on dmesh_entries? More... | |
Defines | |
#define | MAX_LIFETIME 86400 /**< 1 day */ |
1 day | |
#define | MAX_ENTRIES 64 /**< Max amount of entries kept in list */ |
Max amount of entries kept in list. | |
#define | MAX_STAMP ((time_t) -1) |
#define | MIN_PFSP_SIZE 524288 /**< 512K, min size for PFSP advertising */ |
512K, min size for PFSP advertising | |
#define | MIN_PFSP_PCT 10 /**< 10%, min available data for PFSP */ |
10%, min available data for PFSP | |
#define | FUZZY_DROP ((60 << FUZZY_SHIFT) / 100) |
If not at least 60% alike, dump! | |
#define | FUZZY_MATCH ((80 << FUZZY_SHIFT) / 100) |
If more than 80% alike, equal! | |
#define | BAN_LIFETIME 7200 /**< 2 hours */ |
2 hours | |
#define | DMESH_MAX MAX_ENTRIES |
Typedefs | |
typedef void(* | header_func_t )(FILE *out) |
Functions | |
RCSID ("$Id:dmesh.c, v 1.35 2005/11/10 23:54:15 cbiere Exp $") | |
void | dmesh_retrieve (void) |
Retrieve download mesh and add entries that have not expired yet. | |
void | dmesh_ban_retrieve (void) |
Retrieve banned mesh and add entries that have not expired yet. | |
gchar * | dmesh_urlinfo_to_string (const dmesh_urlinfo_t *info) |
Format the `info' URL and return pointer to static string. | |
guint | urlinfo_hash (gconstpointer key) |
Hash a URL info. | |
gint | urlinfo_eq (gconstpointer a, gconstpointer b) |
Test equality of two URL infos. | |
void | dmesh_init (void) |
Initialize the download mesh. | |
gint | dmesh_entry_cmp (gconstpointer a, gconstpointer b) |
Compare two dmesh_entry, based on the timestamp. | |
void | dmesh_entry_free (struct dmesh_entry *dme) |
Free download mesh entry. | |
void | dmesh_urlinfo_free (dmesh_urlinfo_t *info) |
Free a dmesh_urlinfo_t structure. | |
void | dmesh_ban_expire (cqueue_t *unused_cq, gpointer obj) |
Called from callout queue when it's time to expire the URL ban. | |
void | dmesh_ban_add (const gchar *sha1, dmesh_urlinfo_t *info, time_t stamp) |
Add new URL to the banned hash. | |
gboolean | dmesh_is_banned (dmesh_urlinfo_t *info) |
Check whether URL is banned from the mesh. | |
const gchar * | dmesh_url_strerror (dmesh_url_error_t errnum) |
gboolean | dmesh_url_parse (const gchar *url, dmesh_urlinfo_t *info) |
Parse URL `url', and fill a structure `info' representing this URL. | |
void | dm_expire (struct dmesh *dm, guint32 agemax, const gchar *sha1) |
Expire entries older than `agemax' in a given mesh bucket `dm'. | |
gboolean | dm_remove (struct dmesh *dm, const host_addr_t addr, guint16 port, guint idx, const gchar *name, time_t stamp) |
Remove specified entry from mesh bucket, if it is older than `stamp'. | |
void | dmesh_dispose (const gchar *sha1) |
Dispose of the entry slot, which must be empty. | |
void | dmesh_fill_info (dmesh_urlinfo_t *info, const gchar *sha1, const host_addr_t addr, guint16 port, guint idx, gchar *name) |
Fill URL info from externally supplied sha1, addr, port, idx and name. | |
gboolean | dmesh_remove (const gchar *sha1, const host_addr_t addr, guint16 port, guint idx, gchar *name) |
Remove entry from mesh due to a failed download attempt. | |
gint | dmesh_count (const gchar *sha1) |
Get the number of dmesh entries for a given SHA1. | |
gboolean | dmesh_raw_add (gchar *sha1, dmesh_urlinfo_t *info, time_t stamp) |
Add entry to the download mesh, indexed by the binary `sha1' digest. | |
gboolean | dmesh_add (gchar *sha1, const host_addr_t addr, guint16 port, guint idx, gchar *name, time_t stamp) |
Same as dmesh_raw_add(), but this is for public consumption. | |
size_t | dmesh_urlinfo (const dmesh_urlinfo_t *info, gchar *buf, size_t len, gboolean *quoting) |
Format the URL described by `info' into the provided buffer `buf', which can hold `len' bytes. | |
size_t | dmesh_entry_compact (const struct dmesh_entry *dme, gchar *buf, size_t size) |
Format mesh_entry in the provided buffer, as a compact addr:port address. | |
size_t | dmesh_entry_url_stamp (const struct dmesh_entry *dme, gchar *buf, size_t size) |
Format dmesh_entry in the provided buffer, as an URL with an appended timestamp in ISO format, GMT time. | |
const gchar * | dmesh_entry_to_string (const struct dmesh_entry *dme) |
Format the `dme' mesh entry as "URL timestamp". | |
gint | dmesh_fill_alternate (const gchar *sha1, gnet_host_t *hvec, gint hcnt) |
Fill supplied vector `hvec' whose size is `hcnt' with some alternate locations for a given SHA1 key, that can be requested by hash directly. | |
gint | dmesh_alternate_location (const gchar *sha1, gchar *buf, size_t size, const host_addr_t addr, time_t last_sent, const gchar *vendor, fileinfo_t *fi, gboolean request) |
Build alternate location header for a given SHA1 key. | |
GSList * | dmesh_get_nonurn_altlocs (const gchar *sha1) |
Create a list of nonurn alternative locations for a given sha1. | |
GSList * | dmesh_defer_nonurn_altloc (GSList *list, dmesh_urlinfo_t *url, time_t stamp) |
This function defers adding a proposed alternate location that has been given as an old style string (i.e. | |
void | dmesh_free_deferred_altloc (dmesh_deferred_url_t *info, gpointer unused_udata) |
Called to deallocate the dmesh_urlinfo and de-reference names from the GSList. | |
void | dmesh_check_deferred_against_existing (gchar *sha1, GSList *existing_urls, GSList *deferred_urls) |
This routine checks deferred dmesh entries against any existing nonurn alternative locations. | |
void | dmesh_check_deferred_against_themselves (gchar *sha1, GSList *deferred_urls) |
This routine checks deferred dmesh entries against themselves. | |
void | dmesh_check_deferred_altlocs (gchar *sha1, GSList *deferred_urls) |
This function is called once dmesh_collect_locations is done and makes it then decides how its going to vet the quality of the deferred nonurn altlocs before decing if they are going to be added for the given sha1. | |
gboolean | dmesh_collect_sha1 (const gchar *value, gchar *digest) |
Parse the value of the X-Gnutella-Content-URN header in `value', looking for a SHA1. | |
void | dmesh_collect_compact_locations (gchar *sha1, gchar *value) |
Parse the value of the "X-Alt" header to extract alternate sources for a given SHA1 key given in the new compact form. | |
void | dmesh_collect_locations (gchar *sha1, gchar *value, gboolean defer) |
Parse value of the "X-Gnutella-Alternate-Location" to extract alternate sources for a given SHA1 key. | |
gint | dmesh_alt_loc_fill (const gchar *sha1, dmesh_urlinfo_t *buf, gint count) |
Fill buffer with at most `count' alternative locations for sha1. | |
void | dmesh_check_results_set (gnet_results_set_t *rs) |
Parse query hit (result set) for entries whose SHA1 match something we have into the mesh or share, and insert them if needed. | |
void | dmesh_multiple_downloads (gchar *sha1, filesize_t size, fileinfo_t *fi) |
This is called when swarming is first requested to get a list of all the servers with the requested file known by dmesh. | |
void | dmesh_store_kv (gpointer key, gpointer value, gpointer udata) |
Store key/value pair in file. | |
void | dmesh_store_hash (const gchar *what, GHashTable *hash, const gchar *file, header_func_t header_cb, GHFunc store_cb) |
Store hash table `hash' into `file'. | |
void | dmesh_header_print (FILE *out) |
Prints header to dmesh store file. | |
void | dmesh_store (void) |
Store download mesh onto file. | |
void | dmesh_ban_store_kv (gpointer key, gpointer value, gpointer udata) |
Store key/value pair in file. | |
void | dmesh_ban_header_print (FILE *out) |
Prints header to banned mesh store file. | |
void | dmesh_ban_store (void) |
Store banned mesh onto file. | |
gboolean | dmesh_free_kv (gpointer key, gpointer value, gpointer unused_udata) |
Free key/value pair in download mesh hash. | |
void | dmesh_ban_prepend_list (gpointer key, gpointer value, gpointer user) |
Prepend the value to the list, given by reference. | |
void | dmesh_close (void) |
Called at servent shutdown time. | |
Variables | |
dmesh_url_error_t | dmesh_url_errno |
Error from dmesh_url_parse(). | |
GHashTable * | mesh = NULL |
The download mesh records all the known sources for a given SHA1. | |
const gchar | dmesh_file [] = "dmesh" |
GHashTable * | ban_mesh = NULL |
If we get a "bad" URL into the mesh ("bad" = gives 404 or other error when trying to download it), we must remember it for some time and prevent it from re-entering the mesh again within that period to prevent rescheduling for download and a further failure: that would be hammering the poor host, and we're wasting our time and bandwidth. | |
GHashTable * | ban_mesh_by_sha1 = NULL |
This table stores the banned entries by SHA1. | |
const gchar | dmesh_ban_file [] = "dmesh_ban" |
const gchar *const | parse_errstr [] |
|
2 hours
|
|
|
|
If not at least 60% alike, dump!
|
|
If more than 80% alike, equal!
|
|
Max amount of entries kept in list.
|
|
1 day
|
|
|
|
10%, min available data for PFSP
|
|
512K, min size for PFSP advertising
|
|
|
|
Expire entries older than `agemax' in a given mesh bucket `dm'. `sha1' is only passed in case we want to log the removal. |
|
Remove specified entry from mesh bucket, if it is older than `stamp'.
|
|
Same as dmesh_raw_add(), but this is for public consumption.
|
|
Fill buffer with at most `count' alternative locations for sha1.
|
|
Build alternate location header for a given SHA1 key. We generate at most `size' bytes of data into `alt'.
|
|
Add new URL to the banned hash. If stamp is 0, the current timestamp is used. |
|
Called from callout queue when it's time to expire the URL ban.
|
|
Prints header to banned mesh store file.
|
|
Prepend the value to the list, given by reference.
|
|
Retrieve banned mesh and add entries that have not expired yet. The mesh is normally retrieved from ~/.gtk-gnutella/dmesh_ban. |
|
Store banned mesh onto file. The banned mesh is normally stored in ~/.gtk-gnutella/dmesh_ban. |
|
Store key/value pair in file.
|
|
This routine checks deferred dmesh entries against any existing nonurn alternative locations. The two factors that control the quality of hits queued are: a) Fuzzy factor of compares b) Number of the altlocs they have to match (threshold) These factors are currently semi-empirical guesses and hardwired.
|
|
This routine checks deferred dmesh entries against themselves. They have to be 100% consistent with what was being passed otherwise we are conservative a throw them away. The main factor is the fuzzy factor of the compare. Apologies for the returns but this function doesn't need to clean up after itself yet. |
|
This function is called once dmesh_collect_locations is done and makes it then decides how its going to vet the quality of the deferred nonurn altlocs before decing if they are going to be added for the given sha1. A couple of approaches are taken: a) if any non-urn urls already exist compare against them and add the ones that match a) otherwise only add urls if they are all the same(ish) Another possible algorithm (majority wins) has been tried but empirically did allow the occasional dmesh pollution. As we are going for a non-polluted dmesh we shall be conservative. Besides using urn's for files is generally a better idea. |
|
Parse query hit (result set) for entries whose SHA1 match something we have into the mesh or share, and insert them if needed.
|
|
Called at servent shutdown time.
|
|
Parse the value of the "X-Alt" header to extract alternate sources for a given SHA1 key given in the new compact form.
|
|
Parse value of the "X-Gnutella-Alternate-Location" to extract alternate sources for a given SHA1 key.
|
|
Parse the value of the X-Gnutella-Content-URN header in `value', looking for a SHA1. When found, the SHA1 is extracted and placed into the given `digest' buffer.
|
|
Get the number of dmesh entries for a given SHA1.
|
|
This function defers adding a proposed alternate location that has been given as an old style string (i.e. not a URN). After dmesh_collect_locations() has parsed all the alt locations another function (dmesh_check_deferred_altlocs) will be called to make a judgement on the quality of these results, and determine whether they should be added to the dmesh. |
|
Dispose of the entry slot, which must be empty.
|
|
Compare two dmesh_entry, based on the timestamp. The greater the time stamp, the samller the entry (i.e. the more recent). |
|
Format mesh_entry in the provided buffer, as a compact addr:port address. The port is even omitted if it is the standard Gnutella one.
|
|
Free download mesh entry.
|
|
Format the `dme' mesh entry as "URL timestamp".
|
|
Format dmesh_entry in the provided buffer, as an URL with an appended timestamp in ISO format, GMT time.
|
|
Fill supplied vector `hvec' whose size is `hcnt' with some alternate locations for a given SHA1 key, that can be requested by hash directly.
|
|
Fill URL info from externally supplied sha1, addr, port, idx and name. When `idx' is URN_INDEX, then `name' is ignored, and we use the stringified SHA1. |
|
Called to deallocate the dmesh_urlinfo and de-reference names from the GSList. Intended to be called from a g_slist_foreach. The list itself needs to be freed afterwards |
|
Free key/value pair in download mesh hash.
|
|
Create a list of nonurn alternative locations for a given sha1. This is used in preference to self-consitancy checking as these urls are already "known" to be valid and hence we do not waste potentially valid urls on a "all or nothing" approach. |
|
Prints header to dmesh store file.
|
|
Initialize the download mesh.
|
|
Check whether URL is banned from the mesh.
|
|
This is called when swarming is first requested to get a list of all the servers with the requested file known by dmesh. It creates a new download for every server found.
|
|
Add entry to the download mesh, indexed by the binary `sha1' digest. If `stamp' is 0, then the current time is used. If `idx' is URN_INDEX, then we can access this file only through an /uri-res request, the URN being given as `name'.
|
|
Remove entry from mesh due to a failed download attempt.
|
|
Retrieve download mesh and add entries that have not expired yet. The mesh is normally retrieved from ~/.gtk-gnutella/dmesh. |
|
Store download mesh onto file. The download mesh is normally stored in ~/.gtk-gnutella/dmesh. |
|
Store hash table `hash' into `file'. The file header is emitted by `header_cb'. The storing callback for each item is `store_cb'. |
|
Store key/value pair in file.
|
|
Parse URL `url', and fill a structure `info' representing this URL.
|
|
|
|
Format the URL described by `info' into the provided buffer `buf', which can hold `len' bytes.
|
|
Free a dmesh_urlinfo_t structure.
|
|
Format the `info' URL and return pointer to static string.
|
|
|
|
Test equality of two URL infos.
|
|
Hash a URL info.
|
|
If we get a "bad" URL into the mesh ("bad" = gives 404 or other error when trying to download it), we must remember it for some time and prevent it from re-entering the mesh again within that period to prevent rescheduling for download and a further failure: that would be hammering the poor host, and we're wasting our time and bandwidth. Therefore, each time we get a "bad" URL, we insert it in a hash table. The table entry is then scheduled to be removed after some grace period occurs. The table is keyed by the dmesh_urlinfo_t, and points to a dmesh_banned structure. The table is persisted at regular intervals. |
|
This table stores the banned entries by SHA1.
|
|
|
|
|
|
Error from dmesh_url_parse().
|
|
The download mesh records all the known sources for a given SHA1. It is implemented as a big hash table, where SHA1 are keys, each value being a struct dmesh pointer. |
|
Initial value: { "OK", "HTTP parsing error", "File prefix neither /uri-res nor /get", "Index in /get/index is reserved", "No filename after /get/index", "Bad URL encoding", } |