| Encode documentation | Contained in the Encode distribution. |
Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
use Encode qw/encode decode/;
$utf8 = decode('MIME-Header', $header);
$header = encode('MIME-Header', $utf8);
This module implements RFC 2047 Mime Header Encoding. There are 3
variant encoding names; MIME-Header, MIME-B and MIME-Q. The
difference is described below
decode() encode() ---------------------------------------------- MIME-Header Both B and Q =?UTF-8?B?....?= MIME-B B only; Q croaks =?UTF-8?B?....?= MIME-Q Q only; B croaks =?UTF-8?Q?....?=
When you decode(=?encoding?X?ENCODED WORD?=), ENCODED WORD is extracted and decoded for X encoding (B for Base64, Q for Quoted-Printable). Then the decoded chunk is fed to decode(encoding). So long as encoding is supported by Encode, any source encoding is fine.
When you encode, it just encodes UTF-8 string with X encoding then quoted with =?UTF-8?X?....?= . The parts that RFC 2047 forbids to encode are left as is and long lines are folded within 76 bytes per line.
It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP? and =?ISO-8859-1?= but that makes the implementation too complicated. These days major mail agents all support =?UTF-8? so I think it is just good enough.
Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by Makamaka. Thre are still too many MUAs especially cellular phone handsets which does not grok UTF-8.
RFC 2047, http://www.faqs.org/rfcs/rfc2047.html and many other locations.
| Encode documentation | Contained in the Encode distribution. |
package Encode::MIME::Header; use strict; use warnings; no warnings 'redefine'; our $VERSION = do { my @r = ( q$Revision: 2.13 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(find_encoding encode_utf8 decode_utf8); use MIME::Base64; use Carp; my %seed = ( decode_b => '1', # decodes 'B' encoding ? decode_q => '1', # decodes 'Q' encoding ? encode => 'B', # encode with 'B' or 'Q' ? bpl => 75, # bytes per line ); $Encode::Encoding{'MIME-Header'} = bless { %seed, Name => 'MIME-Header', } => __PACKAGE__; $Encode::Encoding{'MIME-B'} = bless { %seed, decode_q => 0, Name => 'MIME-B', } => __PACKAGE__; $Encode::Encoding{'MIME-Q'} = bless { %seed, decode_q => 1, encode => 'Q', Name => 'MIME-Q', } => __PACKAGE__; use base qw(Encode::Encoding); sub needs_lines { 1 } sub perlio_ok { 0 } sub decode($$;$) { use utf8; my ( $obj, $str, $chk ) = @_; # zap spaces between encoded words $str =~ s/\?=\s+=\?/\?==\?/gos; # multi-line header to single line $str =~ s/(?:\r\n|[\r\n])[ \t]//gos; 1 while ( $str =~ s/(=\?[-0-9A-Za-z_]+\?[Qq]\?)(.*?)\?=\1(.*?\?=)/$1$2$3/ ) ; # Concat consecutive QP encoded mime headers # Fixes breaking inside multi-byte characters $str =~ s{ =\? # begin encoded word ([-0-9A-Za-z_]+) # charset (encoding) (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) \?([QqBb])\? # delimiter (.*?) # Base64-encodede contents \?= # end encoded word }{ if (uc($2) eq 'B'){ $obj->{decode_b} or croak qq(MIME "B" unsupported); decode_b($1, $3, $chk); } elsif (uc($2) eq 'Q'){ $obj->{decode_q} or croak qq(MIME "Q" unsupported); decode_q($1, $3, $chk); } else { croak qq(MIME "$2" encoding is nonexistent!); } }egox; $_[1] = $str if $chk; return $str; } sub decode_b { my $enc = shift; my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); my $db64 = decode_base64(shift); my $chk = shift; return $d->name eq 'utf8' ? Encode::decode_utf8($db64) : $d->decode( $db64, $chk || Encode::FB_PERLQQ ); } sub decode_q { my ( $enc, $q, $chk ) = @_; my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); $q =~ s/_/ /go; $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego; return $d->name eq 'utf8' ? Encode::decode_utf8($q) : $d->decode( $q, $chk || Encode::FB_PERLQQ ); } my $especials = join( '|' => map { quotemeta( chr($_) ) } unpack( "C*", qq{()<>,;:"'/[]?=} ) ); my $re_encoded_word = qr{ =\? # begin encoded word (?:[-0-9A-Za-z_]+) # charset (encoding) (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) \?(?:[QqBb])\? # delimiter (?:.*?) # Base64-encodede contents \?= # end encoded word }xo; my $re_especials = qr{$re_encoded_word|$especials}xo; sub encode($$;$) { my ( $obj, $str, $chk ) = @_; my @line = (); for my $line ( split /\r\n|[\r\n]/o, $str ) { my ( @word, @subline ); for my $word ( split /($re_especials)/o, $line ) { if ( $word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o ) { push @word, $obj->_encode($word); } else { push @word, $word; } } my $subline = ''; for my $word (@word) { use bytes (); if ( bytes::length($subline) + bytes::length($word) > $obj->{bpl} - 1 ) { push @subline, $subline; $subline = ''; } $subline .= ' ' if ($subline =~ /\?=$/ and $word =~ /^=\?/); $subline .= $word; } $subline and push @subline, $subline; push @line, join( "\n " => @subline ); } $_[1] = '' if $chk; return join( "\n", @line ); } use constant HEAD => '=?UTF-8?'; use constant TAIL => '?='; use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, }; sub _encode { my ( $o, $str ) = @_; my $enc = $o->{encode}; my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) ); # to coerce a floating-point arithmetics, the following contains # .0 in numbers -- dankogai $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0; my @result = (); my $chunk = ''; while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) { use bytes (); if ( bytes::length($chunk) + bytes::length($chr) > $llen ) { push @result, SINGLE->{$enc}($chunk); $chunk = ''; } $chunk .= $chr; } length($chunk) and push @result, SINGLE->{$enc}($chunk); return @result; } sub _encode_b { HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL; } sub _encode_q { my $chunk = shift; $chunk = encode_utf8($chunk); $chunk =~ s{ ([^0-9A-Za-z]) }{ join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) }egox; return HEAD . 'Q?' . $chunk . TAIL; } 1; __END__