proposal to fix problem when we lose multipart meta info
John Napiorkowski [Tue, 20 Jan 2015 20:05:31 +0000 (14:05 -0600)]
Changes
dist.ini
lib/HTTP/Body.pm
lib/HTTP/Body/MultiPart.pm
t/10mixparamcontent.t [new file with mode: 0644]
t/utf8.txt [new file with mode: 0644]

diff --git a/Changes b/Changes
index 2087c9d..d1ab121 100644 (file)
--- a/Changes
+++ b/Changes
@@ -1,7 +1,11 @@
 This file documents the revision history for Perl extension HTTP::Body.
 
 {{$NEXT}}
-       - Release for cleanup of dzil
+        -new method 'part_data' which preserves multipart meta information just in cause
+          you have a form upload with unexpected charsets, etc.
+
+1.19
+        - Release for cleanup of dzil
 
 1.18      2013-12-06 10:05:33 America/New_York
         - Added configurable basename regexp, test added with fixed regexp for next release
index 2f153a9..953ed0f 100644 (file)
--- a/dist.ini
+++ b/dist.ini
@@ -23,6 +23,10 @@ Digest::MD5     = 0
 [Prereqs / TestRequires]
 Test::More      = 0.86
 Test::Deep      = 0
+HTTP::Request::Common = 0
+Encode          = 0
+File::Spec::Functions = 0
+File::Temp      = 0
 
 [PruneFiles]
 match           = ^t/data/benchmark/*
index 8a6e22a..807703d 100644 (file)
@@ -114,6 +114,7 @@ sub new {
         param_order    => [],
         state          => 'buffering',
         upload         => {},
+        part_data      => {},
         tmpdir         => File::Spec->tmpdir(),
     };
 
@@ -384,6 +385,45 @@ sub upload {
     return $self->{upload};
 }
 
+=item part_data
+
+Just like 'param' but gives you a hash of the full data associated with the
+part in a multipart type POST/PUT.  Example:
+
+    {
+      data => "test",
+      done => 1,
+      headers => {
+        "Content-Disposition" => "form-data; name=\"arg2\"",
+        "Content-Type" => "text/plain"
+      },
+      name => "arg2",
+      size => 4
+    }
+
+=cut
+
+sub part_data {
+    my $self = shift;
+
+    if ( @_ == 2 ) {
+
+        my ( $name, $data ) = @_;
+
+        if ( exists $self->{part_data}->{$name} ) {
+            for ( $self->{part_data}->{$name} ) {
+                $_ = [$_] unless ref($_) eq "ARRAY";
+                push( @$_, $data );
+            }
+        }
+        else {
+            $self->{part_data}->{$name} = $data;
+        }
+    }
+
+    return $self->{part_data};
+}
+
 =item tmpdir 
 
 Specify a different path for temporary files.  Defaults to the system temporary path.
index 41c7bab..cecc5d6 100644 (file)
@@ -300,8 +300,11 @@ sub handler {
                 $self->upload( $part->{name}, $part );
             }
         }
+        # If we have more than the content-disposition, we need to create a
+        # data key so that we don't waste the headers.
         else {
             $self->param( $part->{name}, $part->{data} );
+            $self->part_data( $part->{name}, $part )
         }
     }
 }
diff --git a/t/10mixparamcontent.t b/t/10mixparamcontent.t
new file mode 100644 (file)
index 0000000..88a09a4
--- /dev/null
@@ -0,0 +1,60 @@
+use utf8;
+use warnings;
+use strict;
+
+use Test::More;
+use HTTP::Body;
+use HTTP::Request::Common;
+use Encode;
+use HTTP::Message::PSGI ();
+use File::Spec::Functions;
+use File::Temp qw/ tempdir /;
+
+
+my $utf8 = 'test ♥';
+my $shiftjs = 'test テスト';
+my $path = File::Spec->catfile('t', 'utf8.txt');
+
+ok my $req = POST '/root/echo_arg',
+  Content_Type => 'form-data',
+    Content =>  [
+      arg0 => 'helloworld',
+      arg1 => [
+        undef, '',
+        'Content-Type' =>'text/plain; charset=UTF-8',
+        'Content' => Encode::encode('UTF-8', $utf8)],
+      arg2 => [
+        undef, '',
+        'Content-Type' =>'text/plain; charset=SHIFT_JIS',
+        'Content' => Encode::encode('SHIFT_JIS', $shiftjs)],
+      arg2 => [
+        undef, '',
+        'Content-Type' =>'text/plain; charset=SHIFT_JIS',
+        'Content' => Encode::encode('SHIFT_JIS', $shiftjs)],
+      file => [
+        "$path", Encode::encode_utf8('♥ttachment.txt'), 'Content-Type' =>'text/html; charset=UTF-8'
+      ],
+    ];
+
+
+ok my $env = HTTP::Message::PSGI::req_to_psgi($req);
+ok my $fh = $env->{'psgi.input'};
+ok my $body = HTTP::Body->new( $req->header('Content-Type'), $req->header('Content-Length') );
+ok my $tempdir = tempdir( 'XXXXXXX', CLEANUP => 1, DIR => File::Spec->tmpdir() );
+$body->tmpdir($tempdir);
+
+binmode $fh, ':raw';
+
+while ( $fh->read( my $buffer, 1024 ) ) {
+  $body->add($buffer);
+}
+
+is $body->param->{'arg0'}, 'helloworld';
+is Encode::decode('UTF-8', $body->param->{'arg1'}), $utf8;
+is Encode::decode('SHIFT_JIS', $body->param->{'arg2'}[0]), $shiftjs;
+
+is $body->part_data->{'arg0'}->{data}, 'helloworld';
+is Encode::decode('UTF-8', $body->part_data->{'arg1'}->{data}), $utf8;
+is Encode::decode('SHIFT_JIS', $body->part_data->{'arg2'}[0]->{data}), $shiftjs;
+
+done_testing;
diff --git a/t/utf8.txt b/t/utf8.txt
new file mode 100644 (file)
index 0000000..484d2cb
--- /dev/null
@@ -0,0 +1 @@
+<p>This is stream_body_fh action ♥</p>