From: Dan Sugalski Date: Sun, 23 Feb 2003 17:03:51 +0000 (-0800) Subject: import Devel-Size 0.55 from CPAN X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b98fcdb93f447c4f556f859c61b15c5fdd502bee;p=p5sagit%2FDevel-Size.git import Devel-Size 0.55 from CPAN git-cpan-module: Devel-Size git-cpan-version: 0.55 git-cpan-authorid: DSUGAL git-cpan-file: authors/id/D/DS/DSUGAL/Devel-Size-0.55.tar.gz --- diff --git a/Changes b/Changes index c1860da..e35fa9b 100644 --- a/Changes +++ b/Changes @@ -11,3 +11,9 @@ Revision history for Perl extension Devel::Size. 0.54 Sat Oct 12 14:11:00 2002 - Applied a patch to get it working on 5.8.0 under Tru64 + +0.55 Sat Feb 22 17:21:00 2003 + - Fixed a bad size calculation (we were overestimating by one byte) + - Updated the docs to show some of the places that there might be 'dark' + memory that Devel::Size can't see. + - Added in tests from Ken Williams \ No newline at end of file diff --git a/Size.pm b/Size.pm index 0dac1ce..dd1764a 100644 --- a/Size.pm +++ b/Size.pm @@ -24,7 +24,7 @@ require DynaLoader; @EXPORT = qw( ); -$VERSION = '0.54'; +$VERSION = '0.55'; bootstrap Devel::Size $VERSION; @@ -76,15 +76,138 @@ total size of a multidimensional data structure. At the moment there is no way to get the size of an array or a hash and its elements without using this function. -=head2 EXPORT +=head1 EXPORT None but default, but optionally C and C. +=head1 UNDERSTANDING MEMORY ALLOCATION + +Please note that the following discussion of memory allocation in perl +is based on the perl 5.8.0 sources. While this is generally +applicable to all versions of perl, some of the gory details are +omitted. It also makes some presumptions on how your system memory +allocator works so, while it will be generally correct, it may not +exactly reflect your system. (Generally the only issue is the size of +the constant values we'll talk about, not their existence) + +=head2 The C library + +It's important firtst to understand how your OS and libraries handle +memory. When the perl interpreter needs some memory, it asks the C +runtime library for it, using the C call. C has one +parameter, the size of the memory allocation you want, and returns a +pointer to that memory. C also makes sure that the pointer it +returns to you is properly aligned. When you're done with the memory +you hand it back to the library with the C call. C has +one parameter, the pointer that C returned. There are a couple of interesting ramifications to this. + +Because malloc has to return an aligned pointer, it will round up the +memory allocation to make sure that the memory it returns is aligned +right. What that alignment is depends on your CPU, OS, and compiler +settings, but things are generally aligned to either a 4 or 8 byte +boundary. That means that if you ask for 1 byte, C will +silently round up to either 4 or 8 bytes, though it doesn't tell the +program making the request, so the extra memory can't be used. + +Since C isn't given the size of the memory chunk you're +freeing, it has to track it another way. Most libraries do this by +tacking on a length field just before the memory it hands to your +program. (It's put before the beginning rather than after the end +because it's less likely to get mangled by program bugs) This size +field is the size of your platform integer, Generally either 4 or 8 +bytes. + +So, if you asked for 1 byte, malloc would build something like this: + + +------------------+ + | 4 byte length | + +------------------+ <----- the pointer malloc returns + | your 1 byte | + +------------------+ + | 3 bytes padding | + +------------------+ + +As you can see, you asked for 1 byte but C used 8. If your +integers were 8 bytes rather than 4, C would have used 16 bytes +to satisfy your 1 byte request. + +The C memory allocation system also keeps a list of free memory +chunks, so it can recycle freed memory. For performance reasons, some +C memory allocation systems put a limit to the number of free +segments that are on the free list, or only search through a small +number of memory chunks waiting to be recycled before just +allocating more memory from the system. + +The memory allocation system tries to keep as few chunks on the free +list as possible. It does this by trying to notice if there are two +adjacent chunks of memory on the free list and, if there are, +coalescing them into a single larger chunk. This works pretty well, +but there are ways to have a lot of memory on the free list yet still +not have anything that can be allocated. If a program allocates one +million eight-byte chunks, for example, then frees every other chunk, +there will be four million bytes of memory on the free list, but none +of that memory can be handed out to satisfy a request for 10 +bytes. This is what's referred to as a fragmented free list, and can +be one reason why your program could have a lot of free memory yet +still not be able to allocate more, or have a huge process size and +still have almost no memory actually allocated to the program running. + +=head2 Perl + +Perl's memory allocation scheme is a bit convoluted, and more complex +than can really be addressed here, but there is one common spot where perl's +memory allocation is unintuitive, and that's for hash keys. + +When you have a hash, each entry has a structure that points to the +key and the value for that entry. The value is just a pointer to the +scalar in the entry, and doesn't take up any special amount of +memory. The key structure holds the hash value for the key, the key +length, and the key string. (The entry and key structures are +separate so perl can potentially share keys across multiple hashes) + +The entry structure has three pointers in it, and takes up either 12 +or 24 bytes, depending on whether you're on a 32 bit or 64 bit +system. Since these structures are of fixed size, perl can keep a big +pool of them internally (generally called an arena) so it doesn't +have to allocate memory for each one. + +The key structure, though, is of variable length because the key +string is of variable length, so perl has to ask the system for a +memory allocation for each key. The base size of this structure is +8 or 16 bytes (once again, depending on whether you're on a 32 bit or +64 bit system) plus the string length plus two bytes. + +Since this memory has to be allocated from the system there's the +malloc size-field overhead (4 or 8 bytes) plus the alignment bytes (0 +to 7, depending on your system and the key length) +that get added on to the chunk perl requests. If the key is only 1 +character, and you're on a 32 bit system, the allocation will be 16 +bytes. If the key is 7 characters then the allocation is 24 bytes on +a 32 bit system. If you're on a 64 bit system the numbers get even +larger. + +This does mean that hashes eat up a I of memory, both in memory +Devel::Size can track (the memory actually in the structures and +strings) and that it can't (the malloc alignment and length overhead). + +=head1 DANGERS + +Devel::Size, because of the way it works, can consume a +considerable amount of memory as it runs. It will use five +pointers, two integers, and two bytes worth of storage, plus +potential alignment and bucket overhead, per thing it looks at. This +memory is released at the end, but it may fragment your free pool, +and will definitely expand your process' memory footprint. + =head1 BUGS Doesn't currently walk all the bits for code refs, formats, and IO. Those throw a warning, but a minimum size for them is returned. +Devel::Size only counts the memory that perl actually allocates. It +doesn't count 'dark' memory--memory that is lost due to fragmented free lists, +allocation alignments, or C library overhead. + =head1 AUTHOR Dan Sugalski dan@sidhe.org diff --git a/Size.xs b/Size.xs index 43b45ce..02c16c3 100644 --- a/Size.xs +++ b/Size.xs @@ -153,8 +153,7 @@ UV thing_size(SV *orig_thing, HV *tracking_hash) { if (cur_entry->hent_hek) { /* Hash keys can be shared. Have we seen this before? */ if (check_new(tracking_hash, cur_entry->hent_hek)) { - total_size += sizeof(HEK); - total_size += cur_entry->hent_hek->hek_len - 1; + total_size += HEK_BASESIZE + cur_entry->hent_hek->hek_len + 2; } } cur_entry = cur_entry->hent_next; @@ -235,6 +234,9 @@ CODE: /* Hash to track our seen pointers */ HV *tracking_hash = newHV(); AV *pending_array = newAV(); + IV size = 0; + + IV count = 0; /* Size starts at zero */ RETVAL = 0; @@ -318,11 +320,15 @@ CODE: } } - RETVAL += thing_size(thing, tracking_hash); + + size = thing_size(thing, tracking_hash); + RETVAL += size; + // printf("added thing of size %i, thing #%i\n", size, count++); } } /* Clean up after ourselves */ + // printf("For info, refcounts are %i, %i\n", SvREFCNT(tracking_hash), SvREFCNT(pending_array)); SvREFCNT_dec(tracking_hash); SvREFCNT_dec(pending_array); } diff --git a/t/basic.t b/t/basic.t index b096f73..05bdc6a 100644 --- a/t/basic.t +++ b/t/basic.t @@ -6,9 +6,9 @@ # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) -BEGIN { $| = 1; print "1..1\n"; } +BEGIN { $| = 1; print "1..5\n"; } END {print "not ok 1\n" unless $loaded;} -use Devel::Size; +use Devel::Size qw(size total_size); $loaded = 1; print "ok 1\n"; @@ -18,3 +18,33 @@ print "ok 1\n"; # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): + +my $x = "A string"; +my $y = "A longer string"; +if (size($x) < size($y)) { + print "ok 2\n"; +} else { + print "not ok 2\n"; +} + +if (total_size($x) < total_size($y)) { + print "ok 3\n"; +} else { + print "not ok 3\n"; +} + +my @x = (1..4); +my @y = (1..10); + +if (size(\@x) < size(\@y)) { + print "ok 4\n"; +} else { + print "not ok 4\n"; +} + +if (total_size(\@x) < total_size(\@y)) { + print "ok 5\n"; +} else { + print "not ok 5\n"; +} +