Hash函数与取模运算

2019-04-13 17:34发布生成海报

站内文章 / 模拟电子

14287 0

1. hash 函数 哈希表也称散列表，是一种数据结构，它可以提供快速的插入操作和查找操作，不论有多少数据项，插入与删除只需要接近常量的时间 :O(1) 时间级。但哈希表也有缺点，它是基于数组的，数组一旦被创建，就难以扩展。某些哈希表被填满时，性能急剧下降。，所以程序员必须清楚表中要存储多少数据。哈希表操作的平均时间是基于统计特性而不是随机输入的期望值。哈希表的重要特性就是哈希函数。我们用一个将大数 ( 或解释为数的字符串 ) 映射成一个较小的，更容易管理的数的函数来达到这个目的。将一个项映射成一个较小的下标的函数称为哈希函数。需要哈希函数，是因为哈希表是基于数组的，而且关键字值的范围通常比数组容量大。，关键字值通过哈希函数映射为数组的下标 。哈希函数一般是通过取模运算来实现( key = tag % constant) ，这就是一个简单的哈希函数。但也带来了不可避免的麻烦 : 两个或更多的不同项可能被散列到同一个位置，引起冲突。 2. 减少冲突 2.1. 开放地址法 开放地址法 : 让指定的数组大小两倍于需要存储的数据量。因此，可能一半的单元是空的。当冲突发生时，一个方法是通过系统的方法找到数组的一个空位，并把这个数据填入，而不再用哈希函数得到的数组下标。在开放地址法中如果数据不能直接放在由哈希函数中，就得寻找空白位置，其中简单的三种探索方法。线性探索法，二次探索法，再哈希法。 2.2. 链地址法 链地址法 : 创建一个存放数据链表的数组，数组内不直接存储数据。这样，当发生冲突的时候，新的数据项直接接到这个数组下标所指的链表中。在链地址法中，数据项的关键字值还是映射到数组下标，而数据本身保存在每个单元的链表中。当然如果链表中有许多项，存取时间变长，找到初始单元的时间为 O(1) ，而搜索链表的时间与链表中的数据项数目 M 成正比，为 O(M) 。因此并不希望链表太满。 3. 经验 专家发现以下哈希函数形式很好 : key = (tag % constant ) 其中 constant 为质数，并且小于哈希表容量。这样哈希值就分布的比较均匀，效率就不会受影响。 3. 开源项目哈希实现 hashtable.h

/*
 * A hash table (hashtab) maintains associations between
 * key values and datum values.  The type of the key values
 * and the type of the datum values is arbitrary.  The
 * functions for hash computation and key comparison are
 * provided by the creator of the table.
 *
 * Author : Stephen Smalley, <sds@epoch.ncsc.mil>
 */
#ifndef _SS_HASHTAB_H_
#define _SS_HASHTAB_H_
#define HASHTAB_MAX_NODES       0xffffffff
struct hashtab_node {
        void *key;
        void *datum;
        struct hashtab_node *next;
};
struct hashtab {
        struct hashtab_node **htable;   /* hash table */
        unsigned long size;                     /* number of slots in hash table */
        unsigned long nel;                      /* number of elements in hash table */
        unsigned long  (*hash_value)(struct hashtab *h, void *key);
                                        /* hash function */
        int (*keycmp)(struct hashtab *h, void *key1, void *key2);
                                        /* key comparison function */
};
struct hashtab_info {
  unsigned long slots_used;
  unsigned long max_chain_len;
};
/*
 * Creates a new hash table with the specified characteristics.
 *
 * Returns NULL if insufficent space is available or
 * the new hash table otherwise.
 */
struct hashtab *hashtab_create(unsigned long (*hash_value)(struct hashtab *h, void *key),
                               int (*keycmp)(struct hashtab *h, void *key1, void *key2),
                               unsigned long size);
/*
 * Inserts the specified (key, datum) pair into the specified hash table.
 *
 * Returns -ENOMEM on memory allocation error,
 * -EEXIST if there is already an entry with the same key,
 * -EINVAL for general errors or
 * 0 otherwise.
 */
int hashtab_insert(struct hashtab *h, void *k, void *d);

/*
 * Deletes the specified (key, datum) pair into the specified hash table.
 *
 * Returns
 * pointer to datum on success
 * NULL if key not found
 *
 */
void * hashtab_delete(struct hashtab *h, void *k);

/*
 * Searches for the entry with the specified key in the hash table.
 *
 * Returns NULL if no entry has the specified key or
 * the datum of the entry otherwise.
 */
void *hashtab_search(struct hashtab *h, void *k);
/*
 * Destroys the specified hash table.
 */
void hashtab_destroy(struct hashtab *h,
                     void (*key_free)(void *ptr),
                     void (*datum_free)(void *ptr)
                     );
/*
 * Applies the specified apply function to (key,datum,args)
 * for each entry in the specified hash table.
 *
 * The order in which the function is applied to the entries
 * is dependent upon the internal structure of the hash table.
 *
 * If apply returns a non-zero status, then hashtab_map will cease
 * iterating through the hash table and will propagate the error
 * return to its caller.
 */
int hashtab_map(struct hashtab *h,
                int (*apply)(void *k, void *d, void *args),
                void *args);
/* Fill info with some hash table statistics */
void hashtab_stat(struct hashtab *h, struct hashtab_info *info);
/* print out hashtable node */
void hashtab_print(struct hashtab *h, void (*print)(void *key, void *data));
/* Iterate through the hashtable */
typedef struct hashtab_iterator {
  int slot_id;
  struct hashtab_node *node_ptr;
} hashtab_iterator;
hashtab_iterator * hashtab_iterate(struct hashtab *h, hashtab_iterator *iterator);
#endif  /* _SS_HASHTAB_H */

hashtable.c

/*
 * Implementation of the hash table type.
 *
 * Author : Stephen Smalley, <sds@epoch.ncsc.mil>
 */
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <hashtable.h>
struct hashtab *hashtab_create(unsigned long (*hash_value)(struct hashtab *h, void *key),
                               int (*keycmp)(struct hashtab *h, void *key1, void *key2),
                               unsigned long size)
{
  struct hashtab *p;
  unsigned long i;
  p = (struct hashtab *) malloc(sizeof(*p));
  if (p == NULL)
    return p;
  p->size = size;
  p->nel = 0;
  p->hash_value = hash_value;
  p->keycmp = keycmp;
  p->htable = malloc(sizeof(*(p->htable)) * size);
  if (p->htable == NULL) {
    perror("malloc");
    free(p);
    return NULL;
  }
  for (i = 0; i < size; i++)
    p->htable[i] = NULL;
  return p;
}
int hashtab_insert(struct hashtab *h, void *key, void *datum)
{
  unsigned long hvalue;
  struct hashtab_node *prev, *cur, *newnode;
  if (!h || h->nel == HASHTAB_MAX_NODES)
    return -EINVAL;
  hvalue = h->hash_value(h, key);
  prev = NULL;
  cur = h->htable[hvalue];
  while (cur && h->keycmp(h, key, cur->key) > 0) {
    //fprintf(stdout, "keycmp: key1: (%x) %s key2: (%x) %s/n", key, key, cur->key, cur->key);
    prev = cur;
    cur = cur->next;
  }
  if (cur && (h->keycmp(h, key, cur->key) == 0))
    return -EEXIST;
  newnode = malloc(sizeof(*newnode));
  if (newnode == NULL)
    return -ENOMEM;
  newnode->key = key;
  newnode->datum = datum;
  if (prev) {
    newnode->next = prev->next;
    prev->next = newnode;
  } else {
    newnode->next = h->htable[hvalue];
    h->htable[hvalue] = newnode;
  }
  h->nel++;
  return 0;
}
void * hashtab_delete(struct hashtab *h, void *key)
{
  unsigned long hvalue;
  void *d;
  struct hashtab_node *prev;
  struct hashtab_node *cur;
  if(!h)
    return NULL;
  hvalue = h->hash_value(h, key);
  cur = h->htable[hvalue];
  /* if need to rem first node */
  if(cur != NULL && h->keycmp(h, key, cur->key) == 0) {
    h->htable[hvalue] = cur->next;
    cur->next = 0;
    d = cur->datum;
    free(cur);
    h->nel--;
    return d;
  }
  /* some node after first node */
  /* trying to avoid SEGFAULT : Gokul */
  if(cur != NULL) {
    prev = cur;
    cur = cur->next;
    while(cur != NULL) {
      if(h->keycmp(h, key, cur->key) == 0) {
        prev->next = cur->next;
        cur->next = NULL;
        d = cur->datum;
        h->nel--;
        return d;
        break;
      } else {
        prev = cur;
        cur = cur->next;
      }
    } // end while
  } else {
    fprintf(stderr, "cur is null where it shouldn't be !!!!!/n");
  }
  return NULL;
}
void *hashtab_search(struct hashtab *h, void *key)
{
  unsigned long hvalue;
  struct hashtab_node *cur;
  if (!h)
    return NULL;
  hvalue = h->hash_value(h, key);
  cur = h->htable[hvalue];
  while (cur != NULL && h->keycmp(h, key, cur->key) > 0)
    cur = cur->next;
  if (cur == NULL || (h->keycmp(h, key, cur->key) != 0))
    return NULL;
  return cur->datum;
}
void hashtab_destroy(struct hashtab *h,
                     void (*key_free)(void *ptr),
                     void (*datum_free)(void *ptr)
                     )
{
  unsigned long i;
  struct hashtab_node *cur, *temp;
  if (!h)
    return;
  for (i = 0; i < h->size; i++) {
    cur = h->htable[i];
    while (cur != NULL) {
      temp = cur;
      cur = cur->next;
      free(temp);
    }
    h->htable[i] = NULL;
  }
  free(h->htable);
  h->htable = NULL;
  free(h);
}
int hashtab_map(struct hashtab *h,
                int (*apply)(void *k, void *d, void *args),
                void *args)
{
  unsigned long i;
  int ret;
  struct hashtab_node *cur;
  if (!h)
    return 0;
  for (i = 0; i < h->size; i++) {
    cur = h->htable[i];
    while (cur != NULL) {
      ret = apply(cur->key, cur->datum, args);
      if (ret)
        return ret;
      cur = cur->next;
    }
  }
  return 0;
}

void hashtab_stat(struct hashtab *h, struct hashtab_info *info)
{
  unsigned long i, chain_len, slots_used, max_chain_len;
  struct hashtab_node *cur;
  slots_used = 0;
  max_chain_len = 0;
  for (slots_used = max_chain_len = i = 0; i < h->size; i++) {
    cur = h->htable[i];
    if (cur) {
      slots_used++;
      chain_len = 0;
      while (cur) {
        chain_len++;
        cur = cur->next;
      }
      if (chain_len > max_chain_len)
        max_chain_len = chain_len;
    }
  }
  info->slots_used = slots_used;
  info->max_chain_len = max_chain_len;
}
void hashtab_print(struct hashtab *h, void (*print)(void *key, void *data)) {
  unsigned long i;
  struct hashtab_node *cur, *temp;
  int count = 0;
  if (!h)
    return;
  for (i = 0; i < h->size; i++) {
    cur = h->htable[i];
    printf("SLOT [%lu]:", i);
    while (cur != NULL) {
      printf("[%x]", (unsigned) cur); count++;
      temp = cur;
      cur = cur->next;
    }
    printf("/n");
  }
  fprintf(stdout, "Total items: %d/n", count);
}

hashtab_iterator * hashtab_iterate(struct hashtab *h, hashtab_iterator *iterator) {
  struct hashtab_node *cur = NULL;
  if(iterator == NULL) {
    iterator = (hashtab_iterator *) malloc(sizeof(hashtab_iterator));
    iterator->slot_id = 0;
    iterator->node_ptr = NULL;
  }
  /* find the next node */
  if(iterator->node_ptr != NULL)
    cur = iterator->node_ptr->next;
  else
    cur = h->htable[0];
  int level = iterator->slot_id;
  while(level < h->size) {
    if(cur != NULL) {
      iterator->slot_id = level;
      iterator->node_ptr = cur;
      return iterator;
    }
    //fprintf(stdout, "checking level %d/n", level);
    level ++;
    if(level < h->size) { cur = h->htable[level]; }
    else cur = NULL;
  }
  free(iterator);
  return NULL;
}

/***********************************************
*           驱动
************************************************/
static uint32_t hash(struct hashtable *h, void * key)
{
    (void)h;
    return (uint32_t)(uint64_t)key % 257;
}
static int cmp(struct hashtable *h, void * key1, void * key2)
{
    (void)h;
    return (int)((long)key1 - (long)key2);
}

Hash函数与取模运算

Ta的文章更多 >>

热门文章

Hash函数与取模运算

Ta的文章 更多 >>

热门文章

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

Ta的文章更多 >>