在start_kernel函数中调用了这个函数:build_all_zonelists();它的实现在mm/page_alloc.c中:void __meminit build_all_zonelists(void){if (system_state == SYSTEM_BOOTING) {__build_all_zonelists(NULL);cpuset_init_current_mems_allowed();} else {///* we have to stop all cpus to guaranntee there is no user//of zonelist *///stop_machine_run(__build_all_zonelists, NULL, NR_CPUS);///* cpuset refresh routine should be here */WARN();}vm_total_pages = nr_free_pagecache_pages();printk("Built %i zonelists.Total pages: %ld/n",num_online_nodes(), vm_total_pages);}对于这个函数,只会在start_kernel中调用一次,此时system_state == SYSTEM_BOOTING。当然,如果需要支持memory_hot_plug,还会有另外的调用,在此忽略它,因此实际只会执行if的第一个分支。在这个函数中对cpuset_init_current_mems_allowed的调用什么事也不做。
staticvoid __meminit build_zonelists(pg_data_t *pgdat){int node, local_node;enum zone_type i,j;local_node = pgdat->node_id;for (i = 0; i < MAX_NR_ZONES; i++) {struct zonelist *zonelist;zonelist = pgdat->node_zonelists + i;j = build_zonelists_node(pgdat, zonelist, 0, i);/** Now we build the zonelist so that it contains the zones* of all the other nodes.* We don't want to pressure a particular node, so when* building the zones for node N, we make sure that the* zones coming right after the local ones are those from* node N+1 (modulo N)*/for (node = local_node + 1; node < MAX_NUMNODES; node++) {if (!node_online(node))continue;j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);}for (node = 0; node < local_node; node++) {if (!node_online(node))continue;j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);}zonelist->zones[j] = NULL;}}在这个函数中有#define NODES_SHIFT0#define MAX_NUMNODES(1 << NODES_SHIFT)而localnode的值为0。因此这个函数实际上就相当于:staticvoid __meminit build_zonelists(pg_data_t *pgdat){int node, local_node;enum zone_type i,j;local_node = pgdat->node_id;for (i = 0; i < MAX_NR_ZONES; i++) {struct zonelist *zonelist;zonelist = pgdat->node_zonelists + i;j = build_zonelists_node(pgdat, zonelist, 0, i);zonelist->zones[j] = NULL;}}实际上,我们只要关心build_zonelists_node函数就行了。/** Builds allocation fallback zone lists.** Add all populated zones of a node to the zonelist.*/staticint __meminit build_zonelists_node(pg_data_t *pgdat,struct zonelist *zonelist, int nr_zones, enum zone_type zone_type){struct zone *zone;BUG_ON(zone_type >= MAX_NR_ZONES);zone_type++;do {zone_type--;zone = pgdat->node_zones + zone_type;if (populated_zone(zone)) {// 只要present_pages不为,则此条件为真zonelist->zones[nr_zones++] = zone;check_highest_zone(zone_type);// 空调用}} while (zone_type);return nr_zones;}在内核中有两个ZONE,ZONE_DMA和ZONE_NORMAL,但是ZONE_NORMAL的内存大小为0,其present_pages也为0,因此在初始化后,zonelist->zones数组实际只有一个元素,它指向ZONE_DMA,即contig_page_data->zone[0]。
这个函数的实现为:/** Amount of free RAM allocatable within all zones*/unsignedint nr_free_pagecache_pages(void){return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));}在这里gfp_zone(GFP_HIGHUSER)将返回GFP_HIGHUSER所在的内存区域,因为内核只使用ZONE_DMA,故这个调用返回0,即ZONE_DMA。下面看看nr_free_zone_pages的实现:staticunsignedint nr_free_zone_pages(int offset){/* Just pick one node, since fallback list is circular */pg_data_t *pgdat = NODE_DATA(numa_node_id());unsignedint sum = 0;struct zonelist *zonelist = pgdat->node_zonelists + offset;struct zone **zonep = zonelist->zones;struct zone *zone;for (zone = *zonep++; zone; zone = *zonep++) {unsignedlong size = zone->present_pages;unsignedlong high = zone->pages_high;if (size > high)sum += size - high;}return sum;}传递进来的参数为0,而且我们知道zonelist->zones实际只有一个元素,且指向pgdat->node_zones[0],即ZONE_DMA的描述结构zone。因而这个函数的功能就简单了,就是返回ZONE_DMA的空闲页数。对于64M内存(限制为60M),其值将为0x3b6a。