24 #include "kmime_util.h"
25 #include "kmime_util_p.h"
29 #include "kmime_header_parsing.h"
30 #include "kmime_message.h"
31 #include "kmime_warning.h"
33 #include <config-kmime.h>
37 #include <kcharsets.h>
41 #include <QtCore/QList>
42 #include <QtCore/QString>
43 #include <QtCore/QTextCodec>
49 #include <boost/concept_check.hpp>
51 using namespace KMime;
55 QList<QByteArray> c_harsetCache;
56 QList<QByteArray> l_anguageCache;
57 QString f_allbackCharEnc;
58 bool u_seOutlookEncoding =
false;
62 foreach (
const QByteArray& charset, c_harsetCache ) {
63 if ( qstricmp( name.data(), charset.data() ) == 0 ) {
68 c_harsetCache.append( name.toUpper() );
70 return c_harsetCache.last();
75 foreach (
const QByteArray& language, l_anguageCache ) {
76 if ( qstricmp( name.data(), language.data() ) == 0 ) {
81 l_anguageCache.append( name.toUpper() );
83 return l_anguageCache.last();
88 uint sLength = s.length();
89 for ( uint i=0; i<sLength; i++ ) {
90 if ( s.at( i ).toLatin1() <= 0 ) {
100 case Headers::CE7Bit:
return QString::fromLatin1(
"7bit" );
101 case Headers::CE8Bit:
return QString::fromLatin1(
"8bit" );
102 case Headers::CEquPr:
return QString::fromLatin1(
"quoted-printable" );
103 case Headers::CEbase64:
return QString::fromLatin1(
"base64" );
104 case Headers::CEuuenc:
return QString::fromLatin1(
"uuencode" );
105 case Headers::CEbinary:
return QString::fromLatin1(
"binary" );
106 default:
return QString::fromLatin1(
"unknown" );
112 QList<Headers::contentEncoding> allowed;
115 switch ( cf.
type() ) {
117 allowed << Headers::CE7Bit;
119 allowed << Headers::CE8Bit;
125 allowed << Headers::CEquPr;
126 allowed << Headers::CEbase64;
128 allowed << Headers::CEbase64;
129 allowed << Headers::CEquPr;
133 allowed << Headers::CEbase64;
144 const uchar specialsMap[16] = {
145 0x00, 0x00, 0x00, 0x00,
146 0x20, 0xCA, 0x00, 0x3A,
147 0x80, 0x00, 0x00, 0x1C,
148 0x00, 0x00, 0x00, 0x00
152 const uchar tSpecialsMap[16] = {
153 0x00, 0x00, 0x00, 0x00,
154 0x20, 0xC9, 0x00, 0x3F,
155 0x80, 0x00, 0x00, 0x1C,
156 0x00, 0x00, 0x00, 0x00
160 const uchar aTextMap[16] = {
161 0x00, 0x00, 0x00, 0x00,
162 0x5F, 0x35, 0xFF, 0xC5,
163 0x7F, 0xFF, 0xFF, 0xE3,
164 0xFF, 0xFF, 0xFF, 0xFE
168 const uchar tTextMap[16] = {
169 0x00, 0x00, 0x00, 0x00,
170 0x5F, 0x36, 0xFF, 0xC0,
171 0x7F, 0xFF, 0xFF, 0xE3,
172 0xFF, 0xFF, 0xFF, 0xFE
176 const uchar eTextMap[16] = {
177 0x00, 0x00, 0x00, 0x00,
178 0x40, 0x35, 0xFF, 0xC0,
179 0x7F, 0xFF, 0xFF, 0xE0,
180 0x7F, 0xFF, 0xFF, 0xE0
185 f_allbackCharEnc = fallbackCharEnc;
190 return f_allbackCharEnc;
195 u_seOutlookEncoding = violateStandard;
200 return u_seOutlookEncoding;
205 const QByteArray &defaultCS,
bool forceCS )
208 QByteArray spaceBuffer;
209 const char *scursor = src.constData();
210 const char *send = scursor + src.length();
211 bool onlySpacesSinceLastWord =
false;
213 while ( scursor != send ) {
215 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
216 spaceBuffer += *scursor++;
221 if ( *scursor ==
'=' ) {
225 const char *start = scursor;
226 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
227 result += decoded.toUtf8();
228 onlySpacesSinceLastWord =
true;
231 if ( onlySpacesSinceLastWord ) {
232 result += spaceBuffer;
233 onlySpacesSinceLastWord =
false;
241 if ( onlySpacesSinceLastWord ) {
242 result += spaceBuffer;
243 onlySpacesSinceLastWord =
false;
251 const QString tryUtf8 = QString::fromUtf8( result );
252 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
253 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
254 return codec->toUnicode( result );
266 static const char *reservedCharacters =
"\"()<>@,.;:\\[]=";
269 bool addressHeader,
bool allow8BitHeaders )
273 bool nonAscii=
false, ok=
true, useQEncoding=
false;
276 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
281 usedCS = KGlobal::locale()->encoding();
282 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
285 if ( charset.isEmpty() ) {
286 usedCS = codec->name();
292 QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
293 QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
294 if ( converterState.invalidChars > 0 ) {
296 codec = QTextCodec::codecForName( usedCS );
297 encoded8Bit = codec->fromUnicode( src );
300 if ( usedCS.contains(
"8859-" ) ) {
304 if ( allow8BitHeaders ) {
308 uint encoded8BitLength = encoded8Bit.length();
309 for (
unsigned int i=0; i<encoded8BitLength; i++ ) {
310 if ( encoded8Bit[i] ==
' ' ) {
315 if ( ( (
signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] ==
'\033' ) ||
316 ( addressHeader && ( strchr(
"\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
324 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
329 for (
int x=end; x<encoded8Bit.length(); x++ ) {
330 if ( ( (
signed char)encoded8Bit[x] < 0 ) || ( encoded8Bit[x] ==
'\033' ) ||
331 ( addressHeader && ( strchr( reservedCharacters, encoded8Bit[x] ) != 0 ) ) ) {
334 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
341 result = encoded8Bit.left( start ) +
"=?" + usedCS;
343 if ( useQEncoding ) {
347 for (
int i=start; i<end; i++ ) {
352 if ( ( ( c >=
'a' ) && ( c <=
'z' ) ) ||
353 ( ( c >=
'A' ) && ( c <=
'Z' ) ) ||
354 ( ( c >=
'0' ) && ( c <=
'9' ) ) ) {
358 hexcode = ( ( c & 0xF0 ) >> 4 ) + 48;
359 if ( hexcode >= 58 ) {
363 hexcode = ( c & 0x0F ) + 48;
364 if ( hexcode >= 58 ) {
372 result +=
"?B?" + encoded8Bit.mid( start, end - start ).toBase64();
376 result += encoded8Bit.right( encoded8Bit.length() - end );
378 result = encoded8Bit;
384 QByteArray encodeRFC2047Sentence(
const QString& src,
const QByteArray& charset )
387 QList<QChar> splitChars;
388 splitChars << QLatin1Char(
',' ) << QLatin1Char(
'\"' ) << QLatin1Char(
';' ) << QLatin1Char(
'\\' );
389 const QChar *ch = src.constData();
390 const int length = src.length();
397 while ( pos < length ) {
399 const bool isAscii = ch->unicode() < 127;
400 const bool isReserved = ( strchr( reservedCharacters, ch->toLatin1() ) != 0 );
401 if ( isAscii && isReserved ) {
402 const int wordSize = pos - wordStart;
403 if ( wordSize > 0 ) {
404 const QString word = src.mid( wordStart, wordSize );
408 result += ch->toLatin1();
416 const int wordSize = pos - wordStart;
417 if ( wordSize > 0 ) {
418 const QString word = src.mid( wordStart, pos - wordStart );
430 if ( str.isEmpty() ) {
434 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
436 if ( charset ==
"us-ascii" ) {
437 latin = str.toLatin1();
438 }
else if ( codec ) {
439 latin = codec->fromUnicode( str );
441 latin = str.toLocal8Bit();
445 for ( l = latin.data(); *l; ++l ) {
446 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) {
455 QByteArray result = charset +
"''";
456 for ( l = latin.data(); *l; ++l ) {
457 bool needsQuoting = ( *l & 0x80 ) || ( *l ==
'%' );
458 if ( !needsQuoting ) {
459 const QByteArray especials =
"()<>@,;:\"/[]?.= \033";
460 int len = especials.length();
461 for (
int i = 0; i < len; i++ ) {
462 if ( *l == especials[i] ) {
468 if ( needsQuoting ) {
470 unsigned char hexcode;
471 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
472 if ( hexcode >= 58 ) {
476 hexcode = ( *l & 0x0F ) + 48;
477 if ( hexcode >= 58 ) {
493 int p = str.indexOf(
'\'' );
495 return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) )->toUnicode( str );
499 QByteArray charset = str.left( p );
501 QByteArray st = str.mid( str.lastIndexOf(
'\'' ) + 1 );
505 while ( p < (
int)st.length() ) {
506 if ( st.at( p ) == 37 ) {
509 if ( p + 2 < st.length() ) {
510 ch = st.at( p + 1 ) - 48;
514 ch2 = st.at( p + 2 ) - 48;
518 st[p] = ch * 16 + ch2;
519 st.remove( p + 1, 2 );
524 kDebug() <<
"Got pre-decoded:" << st;
526 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
527 if ( !charsetcodec || forceCS ) {
528 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
531 usedCS = charsetcodec->name();
532 return charsetcodec->toUnicode( st );
543 static char chars[] =
"0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
547 unsigned int timeval;
551 ran = 1 + (int)( 1000.0 * rand() / ( RAND_MAX + 1.0 ) );
552 timeval = ( now / ran ) + getpid();
554 for (
int i = 0; i < 10; i++ ) {
555 pos = (int) ( 61.0 * rand() / ( RAND_MAX + 1.0 ) );
561 ret.setNum( timeval );
576 if ( header.isEmpty() ) {
580 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
581 while ( ( foldMid = header.indexOf(
'\n', pos ) ) >= 0 ) {
582 foldBegin = foldEnd = foldMid;
584 while ( foldBegin > 0 ) {
585 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
591 while ( foldEnd <= header.length() - 1 ) {
592 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
594 }
else if ( foldEnd > 0 && header[foldEnd - 1] ==
'\n' &&
595 header[foldEnd] ==
'=' && foldEnd + 2 < header.length() &&
596 ( ( header[foldEnd + 1] ==
'0' &&
597 header[foldEnd + 2] ==
'9' ) ||
598 ( header[foldEnd + 1] ==
'2' &&
599 header[foldEnd + 2] ==
'0' ) ) ) {
608 result += header.mid( pos, foldBegin - pos );
609 if ( foldEnd < header.length() - 1 ) {
614 const int len = header.length();
616 result += header.mid( pos, len - pos );
621 int findHeaderLineEnd(
const QByteArray &src,
int &dataBegin,
bool *folded )
624 int len = src.length() - 1;
630 if ( dataBegin < 0 ) {
635 if ( dataBegin > len ) {
643 if ( src.at( end ) ==
'\n' && end + 1 < len &&
644 ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ) ) {
651 if ( src.at( end ) !=
'\n' ) {
653 end = src.indexOf(
'\n', end + 1 );
654 if ( end == -1 || end == len ) {
657 }
else if ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ||
658 ( src[end + 1] ==
'=' && end + 3 <= len &&
659 ( ( src[end + 2] ==
'0' && src[end + 3] ==
'9' ) ||
660 ( src[end + 2] ==
'2' && src[end + 3] ==
'0' ) ) ) ) {
678 int indexOfHeader(
const QByteArray &src,
const QByteArray &name,
int &end,
int &dataBegin,
bool *folded )
684 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
688 const char *p = strcasestr( src.constData(), n.constData() );
692 begin = p - src.constData();
698 dataBegin = begin + name.length() + 1;
700 if ( src.at( dataBegin ) ==
' ' ) {
703 end = findHeaderLineEnd( src, dataBegin, folded );
719 if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
725 result = src.mid( begin, end - begin );
728 QByteArray hdrValue = src.mid( begin, end - begin );
736 QList<QByteArray>
extractHeaders(
const QByteArray &src,
const QByteArray &name )
740 QList<QByteArray> result;
741 QByteArray copySrc( src );
743 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
747 while ( begin >= 0 ) {
749 result.append( copySrc.mid( begin, end - begin ) );
751 QByteArray hdrValue = copySrc.mid( begin, end - begin );
756 copySrc = copySrc.mid( end );
757 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
764 void removeHeader( QByteArray &header,
const QByteArray &name )
766 int begin, end, dummy;
767 begin = indexOfHeader( header, name, end, dummy );
769 header.remove( begin, end - begin + 1 );
776 ret.replace(
"\r\n",
"\n" );
789 ret.replace(
'\n',
"\r\n" );
793 QByteArray
LFtoCRLF(
const char *s )
800 template <
typename StringType,
typename CharType >
void removeQuotesGeneric( StringType & str )
802 bool inQuote =
false;
803 for (
int i = 0; i < str.length(); ++i ) {
804 if ( str[i] == CharType(
'"' ) ) {
809 if ( inQuote && ( str[i] == CharType(
'\\' ) ) ) {
819 removeQuotesGeneric<QByteArray, char>( str );
824 removeQuotesGeneric<QString, QLatin1Char>( str );
827 template<
class StringType,
class CharType,
class CharConverterType,
class StringConverterType,
class ToString>
828 void addQuotes_impl( StringType &str,
bool forceQuotes )
830 bool needsQuotes=
false;
831 for (
int i=0; i < str.length(); i++ ) {
832 const CharType cur = str.at( i );
833 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String(
"\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
836 if ( cur == CharConverterType(
'\\' ) || cur == CharConverterType(
'\"' ) ) {
837 str.insert( i, CharConverterType(
'\\' ) );
842 if ( needsQuotes || forceQuotes ) {
843 str.insert( 0, CharConverterType(
'\"' ) );
844 str.append( StringConverterType(
"\"" ) );
850 addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes );
855 addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes );
860 const int LRO = 0x202D;
861 const int RLO = 0x202E;
862 const int LRE = 0x202A;
863 const int RLE = 0x202B;
864 const int PDF = 0x202C;
866 QString result = input;
868 int openDirChangers = 0;
869 int numPDFsRemoved = 0;
870 for (
int i = 0; i < input.length(); i++ ) {
871 const ushort &code = input.at( i ).unicode();
872 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
874 }
else if ( code == PDF ) {
875 if ( openDirChangers > 0 ) {
879 kWarning() <<
"Possible Unicode spoofing (unexpected PDF) detected in" << input;
880 result.remove( i - numPDFsRemoved, 1 );
886 if ( openDirChangers > 0 ) {
887 kWarning() <<
"Possible Unicode spoofing detected in" << input;
892 for (
int i = openDirChangers; i > 0; i-- ) {
893 if ( result.endsWith( QLatin1Char(
'"' ) ) ) {
894 result.insert( result.length() - 1, QChar( PDF ) );
896 result += QChar( PDF );
906 const int LRO = 0x202D;
907 const int RLO = 0x202E;
908 const int LRE = 0x202A;
909 const int RLE = 0x202B;
910 QString result = input;
911 result.remove( LRO );
912 result.remove( RLO );
913 result.remove( LRE );
914 result.remove( RLE );
918 static bool isCryptoPart(
Content* content )
930 const QByteArray lowerSubType = contentType->
subType().toLower();
931 return ( contentType->
mediaType().toLower() ==
"application" &&
932 ( lowerSubType ==
"pgp-encrypted" ||
933 lowerSubType ==
"pgp-signature" ||
934 lowerSubType ==
"pkcs7-mime" ||
935 lowerSubType ==
"pkcs7-signature" ||
936 lowerSubType ==
"x-pkcs7-signature" ||
937 ( lowerSubType ==
"octet-stream" &&
947 bool emptyFilename =
true;
950 emptyFilename =
false;
953 if ( emptyFilename &&
956 emptyFilename =
false;
960 if ( !emptyFilename && !isCryptoPart( content ) ) {
982 if ( contentType->
isSubtype(
"signed" ) ||
983 contentType->
isSubtype(
"pgp-signature" ) ||
984 contentType->
isSubtype(
"pkcs7-signature" ) ||
985 contentType->
isSubtype(
"x-pkcs7-signature" ) ||
988 message->
mainBodyPart(
"application/pkcs7-signature" ) ||
989 message->
mainBodyPart(
"application/x-pkcs7-signature" ) ) {
1002 if ( contentType->
isSubtype(
"encrypted" ) ||
1003 contentType->
isSubtype(
"pgp-encrypted" ) ||
1004 contentType->
isSubtype(
"pkcs7-mime" ) ||
1006 message->
mainBodyPart(
"application/pgp-encrypted" ) ||
1022 if ( contentType && contentType->
isMediatype(
"text" ) && contentType->
isSubtype(
"calendar" ) ) {
This file is part of the API for handling MIME data and defines the Codec class.
QByteArray unfoldHeader(const QByteArray &header)
Unfolds the given header if necessary.
List contents() const
For multipart contents, this will return a list of all multipart child contents.
bool hasAttachment(Content *content)
Returns whether or not the given MIME node contains an attachment part.
void addQuotes(QByteArray &str, bool forceQuotes)
Converts the given string into a quoted-string if the string contains any special characters (ie...
bool isSigned(Message *message)
Returns whether or not the given message is partly or fully signed.
QByteArray cachedLanguage(const QByteArray &name)
Consult the language cache.
QByteArray cachedCharset(const QByteArray &name)
Consult the charset cache.
QString decodeRFC2231String(const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS, bool forceCS)
Decodes string src according to RFC2231.
void setUseOutlookAttachmentEncoding(bool violateStandard)
Set whether or not to use outlook compatible attachment filename encoding.
QByteArray encodeRFC2231String(const QString &str, const QByteArray &charset)
Encodes string src according to RFC2231 using charset charset.
QByteArray LFtoCRLF(const QByteArray &s)
Converts all occurrences of "\n" (LF) in s to "\r\n" (CRLF).
Content * mainBodyPart(const QByteArray &type=QByteArray())
Returns the first main body part of a given type, taking multipart/mixed and multipart/alternative no...
Headers::ContentDisposition * contentDisposition(bool create=true)
Returns the Content-Disposition header.
void removeQuots(QByteArray &str)
Removes quote (DQUOTE) characters and decodes "quoted-pairs" (ie.
QString nameForEncoding(Headers::contentEncoding enc)
Returns a user-visible string for a contentEncoding, for example "quoted-printable" for CEquPr...
QList< QByteArray > extractHeaders(const QByteArray &src, const QByteArray &name)
Tries to extract the headers with name name from the string src, unfolding it if necessary.
This file is part of the API for handling MIME data and defines the CharFreq class.
bool isEncrypted(Message *message)
Returns whether or not the given message is partly or fully encrypted.
QByteArray multiPartBoundary()
Constructs a random string (sans leading/trailing "--") that can be used as a multipart delimiter (ie...
float printableRatio() const
Returns the percentage of printable characters in the data.
Headers::ContentType * contentType(bool create=true)
Returns the Content-Type header.
void setFallbackCharEncoding(const QString &fallbackCharEnc)
Set the fallback charset to use when decoding RFC2047-encoded headers.
QList< Headers::contentEncoding > encodingsForData(const QByteArray &data)
Returns a list of encodings that can correctly encode the data.
Represents a (email) message.
QString decodeRFC2047String(const QByteArray &src, QByteArray &usedCS, const QByteArray &defaultCS, bool forceCS)
Decodes string src according to RFC2047,i.e., the construct =?charset?[qb]?encoded?=.
QByteArray uniqueString()
Uses current time, pid and random numbers to construct a string that aims to be unique on a per-host ...
A class that encapsulates MIME encoded Content.
bool isUsAscii(const QString &s)
Checks whether s contains any non-us-ascii characters.
A class for performing basic data typing using frequency count heuristics.
QString balanceBidiState(const QString &input)
Makes sure that the bidirectional state at the end of the string is the same as at the beginning of t...
Type type() const
Returns the data Type as derived from the class heuristics.
bool isInvitation(Content *content)
Returns whether or not the given MIME content is an invitation message of the iTIP protocol...
QString fallbackCharEncoding()
Retrieve the set fallback charset if there is one set.
bool useOutlookAttachmentEncoding()
Retrieve whether or not to use outlook compatible encodings for attachments.
QString removeBidiControlChars(const QString &input)
Similar to the above function.
QByteArray extractHeader(const QByteArray &src, const QByteArray &name)
Tries to extract the header with name name from the string src, unfolding it if necessary.
QByteArray encodeRFC2047String(const QString &src, const QByteArray &charset, bool addressHeader, bool allow8BitHeaders)
Encodes string src according to RFC2047 using charset charset.
QByteArray CRLFtoLF(const QByteArray &s)
Converts all occurrences of "\r\n" (CRLF) in s to "\n" (LF).