Main Page   Class Hierarchy   Alphabetical List   Compound List   Examples  
qp.h
00001 /***************************************************************************
00002     copyright            : (C) 2002-2008 by Stefano Barbato
00003     email                : stefano@codesink.org
00004 
00005     $Id: qp.h,v 1.20 2008-10-07 11:06:26 tat Exp $
00006  ***************************************************************************/
00007 #ifndef _MIMETIC_CODEC_QP_H_
00008 #define _MIMETIC_CODEC_QP_H_
00009 #include <iostream>
00010 #include <string>
00011 #include <sstream>
00012 #include <cassert>
00013 #include <mimetic/libconfig.h>
00014 #include <mimetic/utils.h>
00015 #include <mimetic/circular_buffer.h>
00016 #include <mimetic/codec/codec_base.h>
00017 #include <mimetic/codec/codec_chain.h>
00018 
00019 namespace mimetic
00020 {
00021 
00022 class QP
00023 {
00024     friend class test_qp;
00025     enum { LF = 0xA, CR = 0xD, NL = LF, TAB = 9, SP = 32 };
00026     enum { default_maxlen = 76 };
00027     enum { 
00028         printable,  /* print as-is */
00029         tab,        /* print if !isBinary */
00030         sp,         /* ' ' */
00031         newline,    /* cr or lf; encode if isBinary*/    
00032         binary,     /* rest of the ascii map */
00033         unsafe      /* "!\"#$@[]\\^`{}|~" */
00034     };
00035     static char sTb[256];
00036 
00037 public:
00038 
00039 /// quoted-printable encoder
00040 /*!
00041 
00042  \sa encode decode
00043  */
00044 class Encoder: public buffered_codec, public chainable_codec<Encoder>
00045 {
00046     enum { laBufSz = 5 }; // look-ahead buffer
00047     size_t m_pos, m_maxlen;
00048     bool m_binary;
00049     circular_buffer<char_type> m_cbuf;
00050 
00051     template<typename OutIt>
00052     void hardLineBrk(OutIt& out)
00053     {
00054         *out = NL; ++out;
00055         m_pos = 1;
00056     }
00057     template<typename OutIt>
00058     void softLineBrk(OutIt& out)
00059     {
00060         *out = '='; ++out;
00061         hardLineBrk(out);
00062     }
00063     template<typename OutIt>
00064     void write(char_type ch, OutIt& out)
00065     {
00066         bool is_last_ch = m_cbuf.empty();
00067         if(!is_last_ch && m_pos == m_maxlen)
00068             softLineBrk(out);
00069         *out = ch; ++out;
00070         m_pos++;
00071     }
00072     template<typename OutIt>
00073     void writeHex(char_type ch, OutIt& out)
00074     {
00075         static char_type hexc[] =
00076         { 
00077             '0', '1', '2', '3', '4', '5' ,'6', '7', '8', '9',
00078             'A', 'B', 'C', 'D', 'E', 'F'
00079         };        
00080         bool is_last_ch = m_cbuf.empty();
00081         if(m_pos + (is_last_ch ? 1 : 2) >= m_maxlen)
00082             softLineBrk(out);
00083         // write out =HH
00084         *out = '='; ++out;
00085         *out = hexc[ch >> 4]; ++out;
00086         *out = hexc[ch & 0xf]; ++out;
00087         m_pos += 3;
00088     } 
00089     template<typename OutIt>
00090     void encodeChar(char_type c, OutIt& out)
00091     {
00092         int cnt = m_cbuf.count();
00093         switch(sTb[c])
00094         {
00095         case printable:
00096             if(m_pos == 1)
00097             {
00098                 switch(c)
00099                 {
00100                 case 'F': // hex enc on "^From .*"
00101                     if(cnt>=4 && m_cbuf.compare(0,4,"rom "))
00102                     {
00103                         writeHex(c,out);
00104                         return;
00105                     }
00106                     break;
00107                 case '.': // hex encode if "^.[\r\n]" or on eof
00108                     if(!cnt || sTb[ m_cbuf[0] ] == newline)
00109                     {
00110                         writeHex(c,out);
00111                         return;
00112                     }
00113                     break;
00114                 }
00115             } 
00116             write(c,out);
00117             break;
00118         case tab:
00119         case sp:
00120             // on binary encoding, or last input ch or newline
00121             if(m_binary || !cnt || sTb[ m_cbuf[0] ] == newline)
00122                 writeHex(c,out);
00123             else
00124                 write(c,out);
00125             break;
00126         case newline:
00127             if(m_binary)
00128                 writeHex(c, out);
00129             else {
00130                 if(cnt && m_cbuf[0] == (c == CR ? LF : CR))
00131                     m_cbuf.pop_front(); // eat it 
00132                 hardLineBrk(out);
00133             }
00134             break;
00135         case binary:
00136             if(!m_binary) m_binary = 1; // switch to binary mode
00137             writeHex(c, out);
00138             break;
00139         case unsafe:
00140             writeHex(c, out);
00141             break;
00142         }
00143     }
00144 public:
00145     /*! return the multiplier of the required (max) size of the output buffer 
00146      * when encoding */
00147     double codeSizeMultiplier() const
00148     {
00149         // worse case is *3 but we'll use the (euristic) average value of 1.5.
00150         // this may decrease performance when encoding messages with many 
00151         // non-ASCII (> 127) characters 
00152         return 1.5;
00153     }
00154     /*!
00155      Constructor
00156      \param isBinary if true all space and newline characters will be
00157      treated like binary chars and will be hex encoded (useful if you
00158      want to encode a binary file).
00159      */
00160     Encoder(bool isBinary = false)
00161     : m_pos(1), m_maxlen(default_maxlen), 
00162       m_binary(isBinary), m_cbuf(laBufSz) 
00163     {
00164     }
00165     /*! Returns the name of the codec ("Quoted-Printable") */
00166     const char* name() const { return "Quoted-Printable"; }
00167     /*! Returns the max line length */
00168     size_t maxlen()
00169     {
00170         return m_maxlen;
00171     }
00172     /*! 
00173         Set the max line length. No more then \p i chars will be 
00174         printed on one line.
00175     */
00176     void maxlen(size_t i)
00177     {
00178         m_maxlen = i;
00179     }
00180     /*! 
00181      Encodes [\p bit,\p eit) and write any encoded char to \p out.
00182      */
00183     template<typename InIt, typename OutIt>
00184     void process(InIt bit, InIt eit, OutIt out)
00185     {
00186         for(; bit != eit; ++bit)
00187             process(*bit, out);
00188         flush(out);
00189     }
00190     /*! 
00191      Encodes \p ic and write any encoded output char to \p out.
00192      \warning You must call flush() when all chars have been 
00193      processed by the encode funcion.
00194      \n
00195      \code
00196         while( (c = getchar()) != EOF )
00197             qp.process(c, out);    
00198         qp.flush();
00199      \endcode
00200      \n
00201      \sa flush()
00202      */
00203     template<typename OutIt>
00204     void process(char_type ic, OutIt& out)
00205     {
00206         m_cbuf.push_back(ic);
00207         if(m_cbuf.count() < laBufSz)
00208             return;
00209         char_type c = m_cbuf.front();
00210         m_cbuf.pop_front();
00211         encodeChar(c, out);
00212     }
00213     /*!
00214     Write to \p out any buffered encoded char.
00215      */
00216     template<typename OutIt>
00217     void flush(OutIt& out)
00218     {
00219         char_type c;
00220         while(!m_cbuf.empty())
00221         {
00222             c = m_cbuf.front();
00223             m_cbuf.pop_front();
00224             encodeChar(c, out);
00225         }
00226     }
00227 };
00228 
00229 /// quoted-printable decoder
00230 /*!
00231 
00232  \sa encode decode
00233  */
00234 class Decoder: public buffered_codec, public chainable_codec<Encoder>
00235 {
00236     enum { laBufSz = 80 }; // look-ahead buffer
00237     enum {
00238         sWaitingChar,
00239         sAfterEq,
00240         sWaitingFirstHex,
00241         sWaitingSecondHex,
00242         sBlank,
00243         sNewline,
00244         sOtherChar
00245     };
00246     size_t m_pos, m_maxlen;
00247 
00248 
00249     int m_state, m_nl;
00250     std::string m_prev;
00251 
00252     template<typename OutIt>
00253     void hardLineBrk(OutIt& out) const
00254     {
00255         *out = NL; ++out;
00256     }
00257     template<typename OutIt>
00258     void write(char_type ch, OutIt& out) const
00259     {
00260         *out = ch; ++out;
00261     }
00262     bool isnl(char_type c) const
00263     {
00264         return (c == CR || c == LF);
00265     }
00266     template<typename OutIt>
00267     void flushPrev(OutIt& out)
00268     {
00269         copy(m_prev.begin(), m_prev.end(), out);
00270         m_prev.clear();
00271     }
00272     int hex_to_int(char_type c) const
00273     {
00274         if( c >= '0' && c <='9') return c - '0';
00275         else if( c >= 'A' && c <='F') return c - 'A' + 10;
00276         else if( c >= 'a' && c <='f') return c - 'a' + 10;
00277         else return 0;
00278     }
00279     bool ishex(char_type c) const
00280     {
00281         return  (c >= '0' && c <= '9') || 
00282             (c >= 'A' && c <= 'F') || 
00283             (c >= 'a' && c <= 'f');
00284     }
00285     template<typename OutIt>
00286     void decodeChar(char_type c, OutIt& out)
00287     {
00288         for(;;)
00289         {
00290             switch(m_state)
00291             {
00292             case sBlank:
00293                 if(isblank(c))
00294                     m_prev.append(1,c);
00295                 else if(isnl(c)) {
00296                     // soft linebrk & ignore trailing blanks
00297                     m_prev.clear(); 
00298                     m_state = sWaitingChar;
00299                 } else {
00300                     flushPrev(out);
00301                     m_state = sWaitingChar;
00302                     continue;
00303                 }
00304                 return;
00305             case sAfterEq:
00306                 if(isblank(c))
00307                     m_prev.append(1,c);
00308                 else if(isnl(c)) {
00309                     // soft linebrk 
00310                     m_state = sNewline;
00311                     continue;
00312                 } else {
00313                     if(m_prev.length() > 1) 
00314                     {
00315                         // there're blanks after =
00316                         flushPrev(out);
00317                         m_state = sWaitingChar;
00318                     } else
00319                         m_state = sWaitingFirstHex;
00320                     continue;
00321                 }
00322                 return;
00323             case sWaitingFirstHex:
00324                 if(!ishex(c))
00325                 {
00326                     // malformed: =[not-hexch]
00327                     flushPrev(out);
00328                     write(c, out);
00329                     m_state = sWaitingChar;
00330                     return;
00331                 } else {
00332                     m_prev.append(1,c);
00333                     m_state = sWaitingSecondHex;
00334                 }
00335                 return;
00336             case sWaitingSecondHex:
00337                 if(!ishex(c))
00338                 { // malformed (=[hexch][not-hexch])
00339                     flushPrev(out);
00340                     write(c, out);
00341                 } else {
00342                     char_type oc, last;
00343                     assert(m_prev.length());
00344                     last = m_prev[m_prev.length()-1];
00345                     oc = hex_to_int(last) << 4 | 
00346                         hex_to_int(c) ;
00347                     write(oc,out);
00348                     m_prev.clear();
00349                 }
00350                 m_state = sWaitingChar;
00351                 return;
00352             case sNewline:
00353                 if(m_nl == 0)
00354                 {
00355                     m_nl = c;
00356                     return;
00357                 } else {
00358                     int len = m_prev.length();
00359                     if(!len || m_prev[0] != '=')
00360                         hardLineBrk(out);
00361                     m_prev.clear();
00362                     m_state = sWaitingChar;
00363                     bool is2Ch;
00364                     is2Ch = (c == (m_nl == CR ? LF : CR));
00365                     m_nl = 0;
00366                     if(is2Ch)
00367                         return;
00368                     continue;
00369                 }
00370             case sWaitingChar:
00371                 if(isblank(c))
00372                 {
00373                     m_state = sBlank;
00374                     continue;
00375                 } else if(isnl(c)) {
00376                     m_state = sNewline;
00377                     continue;
00378                 } else if(c == '=') {
00379                     m_state = sAfterEq;
00380                     m_prev.append(1, c);
00381                     return;
00382                 } else {
00383                     // WARNING: NOT ignoring chars > 126
00384                     // as suggested in rfc2045 6.7 note 4
00385                     if(c < 32 && c != TAB)
00386                     {
00387                         // malformed, CTRL ch found
00388                         // ignore (rfc2045 6.7 note 4)
00389                         return;
00390                     }
00391                     write(c,out);
00392                 }
00393                 return;
00394             }
00395         }
00396     }
00397 public:
00398     /*! Constructor */
00399     Decoder()
00400     : m_state(sWaitingChar), m_nl(0)
00401     {
00402     }
00403     /*! Returns the name of the codec ("Quoted-Printable") */
00404     const char* name() const { return "Quoted-Printable"; }
00405     /*! Returns the max line length */
00406     size_t maxlen()
00407     {
00408         return m_maxlen;
00409     }
00410     /*! 
00411     Set the max line length. No more then \p i chars will be 
00412     printed on one line.
00413     */
00414     void maxlen(size_t i)
00415     {
00416         m_maxlen = i;
00417     }
00418     /*! 
00419      Decodes [\p bit,\p eit) and write any decoded char to \p out.
00420      */
00421     template<typename InIt, typename OutIt>
00422     void process(InIt bit, InIt eit, OutIt out)
00423     {
00424         for(;bit != eit; ++bit)
00425             decodeChar(*bit, out);
00426         flush(out);
00427     }
00428     /*! 
00429      Decodes \p ic and write any decoded output char to \p out.
00430      
00431      \warning You must call flush() when all chars have been 
00432      processed by the code(...) funcion.
00433      \n
00434      \code
00435         while( (c = getchar()) != EOF )
00436             qp.process(c, out);    
00437         qp.flush();
00438      \endcode
00439      \n
00440      \sa flush()
00441      */
00442     template<typename OutIt>
00443     void process(char_type ic, OutIt& out)
00444     {
00445         decodeChar(ic, out);
00446     }
00447     /*!
00448     Write to \p out any buffered decoded char.
00449      */
00450     template<typename OutIt>
00451     void flush(OutIt& out)
00452     {
00453         /* m_prev can be (regex):
00454             empty: 
00455                 ok
00456             '=' : 
00457               malformed, '=' is last stream char, print as is
00458               (rfc2045 6.7 note 3)
00459             '=[a-zA-Z]'
00460               malformed, print as is
00461               (rfc2045 6.7 note 2)
00462             '= +'
00463               malformed, just print '=' and ignore trailing
00464               blanks (rfc2045 6.7 (3) )
00465         */
00466         int len = m_prev.length();
00467         if(len)
00468         {
00469             if(len == 1)
00470             {
00471                 /* malformed if m_prev[0] == '=' */
00472                 write('=', out);
00473             } else {
00474                 write('=', out);
00475                 if(m_prev[1] != ' ')
00476                     write(m_prev[1], out);
00477             }
00478         } else if(m_nl != 0) // stream ends with newline
00479             hardLineBrk(out);
00480 
00481     }
00482 };
00483 
00484 };
00485 
00486 
00487 } // namespace
00488 
00489 #endif
00490