diff --git a/nsd-mem.c b/nsd-mem.c index 6d0ec264e..4c6362de7 100644 --- a/nsd-mem.c +++ b/nsd-mem.c @@ -10,6 +10,7 @@ #include "config.h" #include +#include #include #include #include @@ -45,6 +46,8 @@ struct zone_mem { size_t data; /* unused space (in db.region) due to alignment */ size_t data_unused; + /* space in recycle_bin */ + size_t recycle_bin; /* count of number of domains */ size_t domaincount; @@ -56,6 +59,8 @@ struct tot_mem { size_t data; /* unused space (in db.region) due to alignment */ size_t data_unused; + /* space in recycle_bin */ + size_t recycle_bin; /* count of number of domains */ size_t domaincount; @@ -64,6 +69,8 @@ struct tot_mem { size_t opt_data; /* unused in options region */ size_t opt_unused; + /* space in recycle_bin */ + size_t opt_recycle_bin; /* dname compression table */ size_t compresstable; #ifdef RATELIMIT @@ -80,29 +87,55 @@ account_zone(struct namedb* db, struct zone_mem* zmem) { zmem->data = region_get_mem(db->region); zmem->data_unused = region_get_mem_unused(db->region); + zmem->recycle_bin = region_get_recycle_size(db->region); zmem->domaincount = domain_table_count(db->domains); } +static char* +pretty_num(size_t x) +{ + static char buf[32]; + memset(buf, 0, sizeof(buf)); + if(snprintf(buf, sizeof(buf), "%12lld", (long long)x) <= 12) { + snprintf(buf, sizeof(buf), "%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c", + buf[0], buf[1], buf[2], (buf[2]==' '?' ':'.'), + buf[3], buf[4], buf[5], (buf[5]==' '?' ':'.'), + buf[6], buf[7], buf[8], (buf[8]==' '?' ':'.'), + buf[9], buf[10], buf[11]); + } + return buf; +} + +static const char* +skip_ws(const char* str) +{ + while (isspace(*str)) + str++; + return str; +} + static void pretty_mem(size_t x, const char* s) { - char buf[32]; - memset(buf, 0, sizeof(buf)); - if(snprintf(buf, sizeof(buf), "%12lld", (long long)x) > 12) { - printf("%12lld %s\n", (long long)x, s); - return; + printf("%s %s\n", pretty_num(x), s); +} + +static void +pretty_mem_recycle_bin(size_t x, const char* s, size_t recycle_bin_sz) +{ + printf("%s %s", pretty_num(x), s); + if (recycle_bin_sz) { + printf(" (of which %s in the recycle bin)", + skip_ws(pretty_num(recycle_bin_sz))); } - printf("%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c %s\n", - buf[0], buf[1], buf[2], (buf[2]==' '?' ':'.'), - buf[3], buf[4], buf[5], (buf[5]==' '?' ':'.'), - buf[6], buf[7], buf[8], (buf[8]==' '?' ':'.'), - buf[9], buf[10], buf[11], s); + printf("\n"); } + static void print_zone_mem(struct zone_mem* z) { - pretty_mem(z->data, "zone data"); + pretty_mem_recycle_bin(z->data, "zone data", z->recycle_bin); pretty_mem(z->data_unused, "zone unused space (due to alignment)"); } @@ -111,6 +144,7 @@ account_total(struct nsd_options* opt, struct tot_mem* t) { t->opt_data = region_get_mem(opt->region); t->opt_unused = region_get_mem_unused(opt->region); + t->opt_recycle_bin = region_get_recycle_size(opt->region); t->compresstable = sizeof(uint16_t) * (t->domaincount + 1 + EXTRA_DOMAIN_NUMBERS); t->compresstable *= opt->server_count; @@ -132,9 +166,9 @@ static void print_tot_mem(struct tot_mem* t) { printf("\ntotal\n"); - pretty_mem(t->data, "data"); + pretty_mem_recycle_bin(t->data, "data", t->recycle_bin); pretty_mem(t->data_unused, "unused space (due to alignment)"); - pretty_mem(t->opt_data, "options"); + pretty_mem_recycle_bin(t->opt_data, "options", t->opt_recycle_bin); pretty_mem(t->opt_unused, "options unused space (due to alignment)"); pretty_mem(t->compresstable, "name table (depends on servercount)"); #ifdef RATELIMIT @@ -150,6 +184,7 @@ add_mem(struct tot_mem* t, struct zone_mem* z) { t->data += z->data; t->data_unused += z->data_unused; + t->recycle_bin += z->recycle_bin; t->domaincount += z->domaincount; } diff --git a/zonec.c b/zonec.c index 3acbbc366..4bae0d701 100644 --- a/zonec.c +++ b/zonec.c @@ -109,11 +109,116 @@ struct zonec_state { struct namedb *database; struct domain_table *domains; struct zone *zone; - struct domain *domain; size_t errors; size_t records; + + /* Some zones, such as DNS-SD zones, have RRsets with many RRs in them. + * By minimizing the need to reallocate the list of RRs in an RRset, + * we reduce memory fragmentation significantly for such zones. + * To this end, `domain`, `type`, `rrset`, `rrset_prev`, `rr_count` and + * `rrs` are used to commit the RRs within an RRset, that are grouped + * together in a zone file, to the database in batches. + */ + struct domain *domain; + int type; + struct rrset *rrset; +#ifdef PACKED_STRUCTS + struct rrset *rrset_prev; +#endif + int rr_count; + struct rr* rrs[256]; }; +static void zonec_commit_rrset(zone_parser_t *parser, struct zonec_state *state) +{ + struct rrset *rrset; + +#ifdef PACKED_STRUCTS + rrset_type* rrset_prev; +#endif + int priority = parser->options.secondary ? ZONE_WARNING : ZONE_ERROR; + + if(!state->domain || state->rr_count == 0) + return; + if (!state->rrset) { + rrset = region_alloc(state->database->region, sizeof(*rrset) +#ifdef PACKED_STRUCTS + + sizeof(rr_type*) * state->rr_count /* Add space for RRs. */ +#endif + ); + rrset->zone = state->zone; + rrset->rr_count = state->rr_count; +#ifndef PACKED_STRUCTS + rrset->rrs = region_alloc(state->database->region, + sizeof(rr_type*) * state->rr_count); +#endif + memcpy(rrset->rrs, state->rrs, state->rr_count * sizeof(rr_type*)); + switch (state->type) { + case TYPE_CNAME: + if (!domain_find_non_cname_rrset(state->domain, state->zone)) + break; + zone_log(parser, priority, "CNAME and other data at the same name"); + break; + case TYPE_RRSIG: + case TYPE_NXT: + case TYPE_SIG: + case TYPE_NSEC: + case TYPE_NSEC3: + break; + default: + if (!domain_find_rrset(state->domain, state->zone, TYPE_CNAME)) + break; + zone_log(parser, priority, "CNAME and other data at the same name"); + break; + } + /* Add it */ + domain_add_rrset(state->domain, rrset); + } else { +#ifndef PACKED_STRUCTS + struct rr **rrs; +#else + struct rrset *rrset_orig; +#endif + /* Add it... */ + rrset = state->rrset; +#ifndef PACKED_STRUCTS + rrs = rrset->rrs; + rrset->rrs = region_alloc_array( + state->database->region, rrset->rr_count + state->rr_count, sizeof(*rrs)); + memcpy(rrset->rrs, rrs, rrset->rr_count * sizeof(*rrs)); + region_recycle(state->database->region, rrs, rrset->rr_count * sizeof(*rrs)); +#else + rrset_orig = rrset; + rrset = region_alloc(state->database->region, + sizeof(rrset_type) + + (rrset_orig->rr_count+state->rr_count)*sizeof(rr_type*)); + memcpy(rrset, rrset_orig, + sizeof(rrset_type) + + rrset_orig->rr_count*sizeof(rr_type*)); + if(state->rrset_prev) + state->rrset_prev->next = rrset; + else state->domain->rrsets = rrset; + region_recycle(state->database->region, rrset_orig, + sizeof(rrset_type) + + rrset_orig->rr_count*sizeof(rr_type*)); +#endif /* PACKED_STRUCTS */ + memcpy(rrset->rrs + rrset->rr_count, state->rrs, state->rr_count * sizeof(rr_type*)); + rrset->rr_count += state->rr_count; + } + state->records += state->rr_count;; + /* Check we have SOA */ + if (state->rrs[0]->owner == state->zone->apex) + apex_rrset_checks(state->database, rrset, state->rrs[0]->owner); + + state->domain = NULL; + state->type = -1; + state->rrset = NULL; +#ifdef PACKED_STRUCTS + state->rrset_prev = NULL; +#endif + state->rr_count = 0; +} + int32_t zonec_accept( zone_parser_t *parser, const zone_name_t *owner, @@ -125,7 +230,6 @@ int32_t zonec_accept( void *user_data) { struct rr *rr; - struct rrset *rrset; struct dname_buffer dname; struct domain *domain; struct buffer buffer; @@ -133,10 +237,6 @@ int32_t zonec_accept( int32_t code; const struct nsd_type_descriptor *descriptor; struct zonec_state *state = (struct zonec_state *)user_data; -#ifdef PACKED_STRUCTS - rrset_type* rrset_prev; -#endif - assert(state); buffer_create_from(&buffer, rdata, rdlength); @@ -150,10 +250,18 @@ int32_t zonec_accept( zone_log(parser, ZONE_ERROR, "the owner cannot be converted"); return ZONE_BAD_PARAMETER; } - domain = domain_table_insert(state->domains, (void*)&dname); assert(domain); - + if (domain != state->domain || type != state->type + || state->rr_count >= (int)(sizeof(state->rrs) / sizeof(*state->rrs))){ + zonec_commit_rrset(parser, state); + state->domain = domain; + state->type = type; + state->rrset = NULL; +#ifdef PACKED_STRUCTS + state->rrset_prev = NULL; +#endif + } descriptor = nsd_type_descriptor(type); code = descriptor->read_rdata(state->domains, rdlength, &buffer, &rr); if(code < 0) { @@ -192,61 +300,45 @@ int32_t zonec_accept( return ZONE_SEMANTIC_ERROR; } } - - /* Do we have this type of rrset already? */ + /* With the first RR for a RRset in this position in the zone file, + * find the RRset */ + if (state->rr_count == 0) { #ifndef PACKED_STRUCTS - rrset = domain_find_rrset(domain, state->zone, type); + state->rrset = domain_find_rrset(state->domain, state->zone, state->type); #else - rrset = domain_find_rrset_and_prev(domain, state->zone, type, &rrset_prev); -#endif - if (!rrset) { - rrset = region_alloc(state->database->region, sizeof(*rrset) -#ifdef PACKED_STRUCTS - + sizeof(rr_type*) /* Add space for one RR. */ -#endif - ); - rrset->zone = state->zone; - rrset->rr_count = 0; -#ifndef PACKED_STRUCTS - rrset->rrs = region_alloc(state->database->region, sizeof(rr_type*)); + state->rrset = domain_find_rrset_and_prev(state->domain, state->zone, state->type, &state->rrset_prev); #endif - + } + if (type == TYPE_RRSIG) + ; /* pass */ + else if (state->rrset && ttl != state->rrset->rrs[0]->ttl) { + zone_log(parser, ZONE_WARNING, + "%s TTL %"PRIu32" does not match TTL %u of %s RRset", + domain_to_string(domain), ttl, + state->rrset->rrs[0]->ttl, rrtype_to_string(type)); + + } else if (state->rr_count && ttl != state->rrs[0]->ttl) { + zone_log(parser, ZONE_WARNING, + "%s TTL %"PRIu32" does not match TTL %u of %s RRset", + domain_to_string(domain), ttl, + state->rrs[0]->ttl, rrtype_to_string(type)); + } + if (state->rrset || state->rr_count) { switch (type) { case TYPE_CNAME: - if (!domain_find_non_cname_rrset(domain, state->zone)) - break; - zone_log(parser, priority, "CNAME and other data at the same name"); + zone_log(parser, priority, "multiple CNAMEs at the same name"); break; - case TYPE_RRSIG: - case TYPE_NXT: - case TYPE_SIG: - case TYPE_NSEC: - case TYPE_NSEC3: + case TYPE_DNAME: + zone_log(parser, priority, "multiple DNAMEs at the same name"); break; default: - if (!domain_find_rrset(domain, state->zone, TYPE_CNAME)) - break; - zone_log(parser, priority, "CNAME and other data at the same name"); break; } - - /* Add it */ - domain_add_rrset(domain, rrset); - } else { -#ifndef PACKED_STRUCTS - struct rr **rrs; -#else - struct rrset *rrset_orig; -#endif - if (type != TYPE_RRSIG && ttl != rrset->rrs[0]->ttl) { - zone_log(parser, ZONE_WARNING, "%s TTL %"PRIu32" does not match TTL %u of %s RRset", - domain_to_string(domain), ttl, rrset->rrs[0]->ttl, - rrtype_to_string(type)); - } - - /* Search for possible duplicates... */ - for (int i = 0; i < rrset->rr_count; i++) { - if (!equal_rr_rdata(descriptor, rr, rrset->rrs[i])) + } + if (state->rrset) { + /* Search for possible duplicates in existing RRset */ + for (int i = 0; i < state->rrset->rr_count; i++) { + if (!equal_rr_rdata(descriptor, rr, state->rrset->rrs[i])) continue; /* Discard the duplicates... */ /* Lower the usage counter for domains in the rdata. */ @@ -254,49 +346,18 @@ int32_t zonec_accept( region_recycle(state->database->region, rr, sizeof(*rr) + rr->rdlength); return 0; } - - switch (type) { - case TYPE_CNAME: - zone_log(parser, priority, "multiple CNAMEs at the same name"); - break; - case TYPE_DNAME: - zone_log(parser, priority, "multiple DNAMEs at the same name"); - break; - default: - break; - } - - /* Add it... */ -#ifndef PACKED_STRUCTS - rrs = rrset->rrs; - rrset->rrs = region_alloc_array( - state->database->region, rrset->rr_count + 1, sizeof(*rrs)); - memcpy(rrset->rrs, rrs, rrset->rr_count * sizeof(*rrs)); - region_recycle(state->database->region, rrs, rrset->rr_count * sizeof(*rrs)); -#else - rrset_orig = rrset; - rrset = region_alloc(state->database->region, - sizeof(rrset_type) + - (rrset_orig->rr_count+1)*sizeof(rr_type*)); - memcpy(rrset, rrset_orig, - sizeof(rrset_type) + - rrset_orig->rr_count*sizeof(rr_type*)); - if(rrset_prev) - rrset_prev->next = rrset; - else domain->rrsets = rrset; - region_recycle(state->database->region, rrset_orig, - sizeof(rrset_type) + - rrset_orig->rr_count*sizeof(rr_type*)); -#endif /* PACKED_STRUCTS */ } - - rrset->rrs[rrset->rr_count++] = rr; - - /* Check we have SOA */ - if (rr->owner == state->zone->apex) - apex_rrset_checks(state->database, rrset, rr->owner); - - state->records++; + /* Search for possible duplicates in already batched RRs */ + for (int i = 0; i < state->rr_count; i++) { + if (!equal_rr_rdata(descriptor, rr, state->rrs[i])) + continue; + /* Discard the duplicates... */ + /* Lower the usage counter for domains in the rdata. */ + rr_lower_usage(state->database, rr); + region_recycle(state->database->region, rr, sizeof(*rr) + rr->rdlength); + return 0; + } + state->rrs[state->rr_count++] = rr; return 0; } @@ -393,10 +454,16 @@ zonec_read( state.database = database; state.domains = domains; state.zone = zone; - state.domain = NULL; state.errors = 0; state.records = 0; + state.domain = NULL; + state.type = -1; + state.rrset = NULL; +#ifdef PACKED_STRUCTS + state.rrset_prev = NULL; +#endif + state.rr_count = 0; origin = domain_dname(zone->apex); memset(&options, 0, sizeof(options)); options.origin.octets = dname_name(origin); @@ -413,6 +480,7 @@ zonec_read( if (zone_parse(&parser, &options, &buffers, zonefile, &state) != 0) { return state.errors; } + zonec_commit_rrset(&parser, &state); /* Check if zone file contained a correct SOA record */ if (!zone) {