21 #include <libbasexx-0/base64_encode.h>
22 #include <libbasexx-0/base64_decode.h>
23 #include <libjpiconv-0/iconv.h>
24 #include <libssiconv-0/iconv.h>
25 #include <libuciconv-0/iconv.h>
29 #include "fileutils.h"
58 #define MAIN_ERR_PREFIX "ENC: "
61 #define ENC_UC_NORM_DEBUG 0
64 #define ENC_MIME_PARA_LENGTH_MAX (size_t) 127
79 #define ENC_MIME_HEADER_FOLD_ASCII_LINES 1
152 struct mime_parameter
156 size_t attribute_len;
157 unsigned int section;
159 const char* value_start;
160 const char* value_end;
169 #define ENC_RC 0xFFFDL
175 #define ENC_UC_DECOMPOSITION_BUFSIZE (size_t) 16
178 #define ENC_HDR_BUFSIZE (size_t) 998
181 #define ENC_FMT_BUFLEN (size_t) 7
184 #include "../uc_cdc.c"
187 #include "../uc_hst.c"
190 #include "../uc_qc_nfc.c"
193 #include "../uc_fce.c"
196 #include "../uc_cf.c"
203 static volatile int ign;
216 static int enc_hex_decode_nibble(
char nibble)
221 if(0x30 <= n && 0x39 >= n) { res = n - 0x30; }
222 else if(0x41 <= n && 0x46 >= n) { res = n - 0x41 + 10; }
223 else if(0x61 <= n && 0x66 >= n) { res = n - 0x61 + 10; }
224 else {
PRINT_ERROR(
"Can't decode invalid hexadecimal nibble"); }
242 static const char* enc_8bit_convert_to_utf8(
enum enc_mime_cs charset,
245 const char* res = NULL;
247 unsigned char us_ascii = 1;
255 if ((
const unsigned char) 0x80 & (
const unsigned char) s[inlen])
267 char* inbuf = api_posix_malloc(inlen + (
size_t) 1);
271 size_t outlen = inlen * (size_t) 4;
273 const char* cs_name = NULL;
274 char* outbuf = api_posix_malloc(outlen + (
size_t) 1);
317 size_t rv = (size_t) -1;
319 memcpy(inbuf, s, inlen + (
size_t) 1);
320 rv = ssic0_iconvstr(
"UTF-8", cs_name,
321 inbuf, &inlen, outbuf, &outlen,
322 SSIC0_ICONV_REPLACE_INVALID);
323 if ((
size_t) -1 == rv || (
size_t) 0 != inlen)
326 PRINT_ERROR(
"Conversion from 8-bit codepage to UTF-8 failed");
327 api_posix_free((
void*) outbuf);
334 res = api_posix_realloc((
void*) outbuf, len + (
size_t) 1);
335 if (NULL == res) { api_posix_free((
void*) outbuf); }
339 api_posix_free((
void*) inbuf);
368 static int enc_uc_check_cesu8(
const char* s,
unsigned char utf)
375 size_t remaining = 0;
376 unsigned long int mbc = 0;
379 while((c = (
int) s[i++]))
384 if(!(0 <= c && 127 >= c)) { multibyte = 1; }
391 if((c & 0xE0) == 0xC0) { len = 2; }
392 else if((c & 0xF0) == 0xE0) { len = 3; }
393 else if((c & 0xF8) == 0xF0) { len = 4; }
396 PRINT_ERROR(
"Invalid start of code sequence in UTF-8 data");
402 case 2: mbc |= (
unsigned long int) (c & 0x1F) << 6;
break;
403 case 3: mbc |= (
unsigned long int) (c & 0x0F) << 12;
break;
404 case 4: mbc |= (
unsigned long int) (c & 0x07) << 18;
break;
406 remaining = len - (size_t) 1;
410 if((c & 0xC0) != 0x80)
412 PRINT_ERROR(
"Invalid continuation character in UTF-8 sequence");
419 mbc |= (
unsigned long int) (c & 0x3F) << remaining * (size_t) 6;
438 || (utf && 0x00D800UL <= mbc && 0x00DFFFUL >= mbc))
447 if(0x010000UL > mbc || 0x10FFFFUL < mbc)
471 if(multibyte) { res = -1; }
495 static long int enc_uc_decode_utf8(
const char* s,
size_t* i)
501 size_t remaining = 0;
502 unsigned long int mbc = 0;
506 while((c = (
int) s[(*i)++]))
511 if(0 <= c && 127 >= c) { res = (
long int) c;
break; }
512 else { multibyte = 1; }
519 if((c & 0xE0) == 0xC0) { len = 2; }
520 else if((c & 0xF0) == 0xE0) { len = 3; }
521 else if((c & 0xF8) == 0xF0) { len = 4; }
524 case 2: mbc |= (
unsigned long int) (c & 0x1F) << 6;
break;
525 case 3: mbc |= (
unsigned long int) (c & 0x0F) << 12;
break;
526 case 4: mbc |= (
unsigned long int) (c & 0x07) << 18;
break;
529 PRINT_ERROR(
"UTF-8 decoder called with invalid data");
534 if(error) { res = -1L;
break; }
535 remaining = len - (size_t) 1;
540 mbc |= (
unsigned long int) (c & 0x3F) << remaining * (size_t) 6;
544 res = (
long int) mbc;
576 unsigned char prefix;
579 for(ii = 0; ii < *di; ++ii)
581 if (0L > dbuf[ii]) { inval = 1; }
582 else if(0x00007FL >= dbuf[ii]) { buf[(*i)++] = (char) dbuf[ii]; }
583 else if(0x0007FFL >= dbuf[ii])
585 data = (
unsigned char) ((dbuf[ii] >> 6) & 0x1FL);
587 buf[(*i)++] = (char) (prefix |
data);
588 data = (
unsigned char) (dbuf[ii] & 0x3FL);
590 buf[(*i)++] = (char) (prefix |
data);
592 else if(0x00FFFFL >= dbuf[ii])
594 data = (
unsigned char) ((dbuf[ii] >> 12) & 0x0FL);
596 buf[(*i)++] = (char) (prefix |
data);
597 data = (
unsigned char) ((dbuf[ii] >> 6) & 0x3FL);
599 buf[(*i)++] = (char) (prefix |
data);
600 data = (
unsigned char) (dbuf[ii] & 0x3FL);
602 buf[(*i)++] = (char) (prefix |
data);
604 else if(0x10FFFFL >= dbuf[ii])
606 data = (
unsigned char) ((dbuf[ii] >> 18) & 0x07L);
608 buf[(*i)++] = (char) (prefix |
data);
609 data = (
unsigned char) ((dbuf[ii] >> 12) & 0x3FL);
611 buf[(*i)++] = (char) (prefix |
data);
612 data = (
unsigned char) ((dbuf[ii] >> 6) & 0x3FL);
614 buf[(*i)++] = (char) (prefix |
data);
615 data = (
unsigned char) (dbuf[ii] & 0x3FL);
617 buf[(*i)++] = (char) (prefix |
data);
622 PRINT_ERROR(
"Unicode UTF-8 encoder: Invalid codepoint detected");
647 static int enc_uc_check_control(
long int ucp)
652 if(0x1FL >= ucp || 0x7FL == ucp)
655 if(0x09L != ucp && 0x0AL != ucp && 0x0DL != ucp) { res = -1; }
658 else if(0x80L <= ucp && 0x9FL >= ucp) { res = -1; }
660 else if(0xFFF9L <= ucp && 0xFFFBL >= ucp) { res = -1; }
662 else if(0x2028L <= ucp && 0x2029L >= ucp) { res = -1; }
664 else if( 0xE0001L == ucp) { res = -1; }
673 else if(0xE0020L <= ucp && 0xE007FL >= ucp) { res = -1; }
687 static void enc_uc_lookup_cdc(
long int ucp,
struct uc_cdc* res)
699 while(-1L != uc_cdc_table[i].cp)
701 if(ucp == uc_cdc_table[i].cp)
704 res->ccc = uc_cdc_table[i].ccc;
705 res->dc1 = uc_cdc_table[i].dc1;
706 res->dc2 = uc_cdc_table[i].dc2;
713 if(-1L == res->dc1) { res->dc1 = ucp; res->dc2 = -1L; }
732 static long int enc_uc_lookup_cc(
long int starter,
long int cm)
742 const long int SBase = 0xAC00L;
743 const long int LBase = 0x1100L;
744 const long int VBase = 0x1161L;
745 const long int TBase = 0x11A7L;
746 const long int NCount = 588L;
747 const long int LCount = 19L;
748 const long int VCount = 21L;
749 const long int TCount = 28L;
752 enum uc_hs_type hst2;
758 enc_uc_lookup_cdc(starter, &cdc);
763 while(-1L != uc_cdc_table[i].cp)
765 if(uc_cdc_table[i].dc1 == starter && uc_cdc_table[i].dc2 == cm)
768 res = uc_cdc_table[i].cp;
771 while(-1L != uc_fce_table[ii].first)
773 first = uc_fce_table[ii].first;
774 last = uc_fce_table[ii].last;
775 if(first <= res && last >= res)
778 #if ENC_UC_NORM_DEBUG
779 printf(
" Canonical composition exception\n");
795 enc_uc_lookup_cdc(cm, &cdc);
799 while(-1L == res && -1L != uc_hst_table[i].first)
801 first = uc_hst_table[i].first;
802 last = uc_hst_table[i].last;
803 hst = uc_hst_table[i].hst;
804 if(first <= starter && last >= starter)
806 if(UC_HST_L == hst || UC_HST_LV == hst)
810 while(-1L != uc_hst_table[ii].first)
812 first = uc_hst_table[ii].first;
813 last = uc_hst_table[ii].last;
814 hst2 = uc_hst_table[ii].hst;
815 if(first <= cm && last >= cm)
817 if(UC_HST_L == hst && UC_HST_V == hst2)
819 if(LBase <= starter && VBase <= cm)
821 LIndex = starter - LBase;
827 #if ENC_UC_NORM_DEBUG
828 printf(
" Canonical composition"
829 " for hangul LV-syllable found\n");
830 printf(
"Hangul LIndex: %ld\n", LIndex);
831 printf(
"Hangul VIndex: %ld\n", VIndex);
835 + LIndex * NCount + VIndex * TCount;
840 else if(UC_HST_LV == hst && UC_HST_T == hst2)
847 #if ENC_UC_NORM_DEBUG
848 printf(
" Canonical composition"
849 " for hangul LVT-syllable found\n");
850 printf(
"Hangul TIndex: %ld\n", TIndex);
853 res = starter + TIndex;
862 " for hangul syllable failed");
869 while(-1L != uc_hst_table[ii].first)
871 first = uc_hst_table[ii].first;
872 last = uc_hst_table[ii].last;
873 hst = uc_hst_table[ii].hst;
874 if(first <= res && last >= res)
876 if(2 == jamo && UC_HST_LV != hst)
881 if(3 == jamo && UC_HST_LVT != hst)
927 static void enc_uc_lookup_cf(
long int ucp,
long int mapping[3])
933 while(-1L != uc_cf_table[i].cp)
935 if(ucp == uc_cf_table[i].cp)
938 mapping[0] = uc_cf_table[i].first;
939 mapping[1] = uc_cf_table[i].second;
940 mapping[2] = uc_cf_table[i].third;
977 static size_t enc_uc_get_glyph_count(
const char* s,
size_t end)
986 ucp = enc_uc_decode_utf8(s, &i);
987 if(-1L == ucp) {
break; }
991 if (!((0x00ADL == ucp) && (end && i < end)))
994 enc_uc_lookup_cdc(ucp, &cdc);
995 if(!cdc.ccc) { ++res; }
998 if(end && i >= end) {
break; }
1019 static int enc_uc_check_nfc(
const char* s)
1028 unsigned char ccc_last = 0;
1032 ucp = enc_uc_decode_utf8(s, &i);
1033 if(-1L == ucp) {
break; }
1039 while(-1L != uc_qc_nfc_table[ii].first)
1041 first = uc_qc_nfc_table[ii].first;
1042 last = uc_qc_nfc_table[ii].last;
1043 if(first <= ucp && last >= ucp)
1053 enc_uc_lookup_cdc(ucp, &cdc);
1054 if(cdc.ccc && (cdc.ccc < ccc_last)) { res = -1;
break; }
1057 else { ccc_last = 0; }
1060 #if ENC_UC_NORM_DEBUG
1063 printf(
"Maybe not NFC: %s (len: %u)\n ", s, (
unsigned int) strlen(s));
1066 printf(
" 0x%02X", (
unsigned int) (
unsigned char) s[i++]);
1090 static int enc_uc_engine_decompose(
long int ucp,
long int* dbuf,
size_t* di)
1096 if(ENC_UC_DECOMPOSITION_BUFSIZE - (
size_t) 2 <= *di)
1099 PRINT_ERROR(
"Unicode canonical decomposition engine failed");
1100 dbuf[0] = (
long int) (
unsigned char)
'[';
1101 dbuf[1] = (
long int) (
unsigned char)
'E';
1102 dbuf[2] = (
long int) (
unsigned char)
'r';
1103 dbuf[3] = (
long int) (
unsigned char)
'r';
1104 dbuf[4] = (
long int) (
unsigned char)
'o';
1105 dbuf[5] = (
long int) (
unsigned char)
'r';
1106 dbuf[6] = (
long int) (
unsigned char)
']';
1113 enc_uc_lookup_cdc(ucp, &cdc);
1114 if(cdc.dc1 != ucp) { res = enc_uc_engine_decompose(cdc.dc1, dbuf, di); }
1115 else { dbuf[(*di)++] = cdc.dc1; }
1116 if(-1L != cdc.dc2) { dbuf[(*di)++] = cdc.dc2; }
1133 static void enc_uc_engine_reorder(
long int* dbuf,
size_t di)
1135 size_t i, ii, iii, iiii;
1142 for(i = 0; i < di; ++i)
1144 enc_uc_lookup_cdc(dbuf[i], &cdc);
1148 #if ENC_UC_NORM_DEBUG
1149 printf(
" Nonstarter: U+%04lX (ccc=%u)\n",
1150 dbuf[i], (
unsigned int) cdc.ccc);
1155 enc_uc_lookup_cdc(dbuf[ii], &cdc);
1156 if(!cdc.ccc) {
break; }
1160 #if ENC_UC_NORM_DEBUG
1161 printf(
" Sort burst: len=%u\n", (
unsigned int) len);
1163 for(iii = i; iii < i + len; ++iii)
1171 for(iiii = i + len - (
size_t) 1; iiii > iii; --iiii)
1173 enc_uc_lookup_cdc(dbuf[iiii - (
size_t) 1], &cdc);
1175 enc_uc_lookup_cdc(dbuf[iiii], &cdc);
1179 tmp = dbuf[iiii - (size_t) 1];
1180 dbuf[iiii - (size_t) 1] = dbuf[iiii];
1186 #if ENC_UC_NORM_DEBUG
1187 else { printf(
" Starter : U+%04lX\n", dbuf[i]); }
1200 static void enc_uc_engine_compose(
long int* dbuf,
size_t* di)
1212 #if ENC_UC_NORM_DEBUG
1214 printf(
" Starter at beginning : U+%04lX\n", dbuf[0]);
1215 printf(
" Codepoint in question: U+%04lX\n", dbuf[i]);
1217 ucp = enc_uc_lookup_cc(dbuf[0], dbuf[i]);
1221 enc_uc_lookup_cdc(dbuf[i], &cdc);
1223 #if ENC_UC_NORM_DEBUG
1224 printf(
" Codepoint has ccc : %u\n", (
unsigned int) ccc);
1225 printf(
" Canonical composition: U+%04lX\n", ucp);
1229 for(ii = 1; ii < i; ++ii)
1231 enc_uc_lookup_cdc(dbuf[ii], &cdc);
1235 #if ENC_UC_NORM_DEBUG
1236 printf(
" => Don't compose\n");
1242 if(skip) {
continue; }
1244 #if ENC_UC_NORM_DEBUG
1245 printf(
" => Compose\n");
1247 for(ii = i; ii < *di - (size_t) 1; ++ii)
1249 dbuf[ii] = dbuf[ii + (size_t) 1];
1256 #if ENC_UC_NORM_DEBUG
1280 static const char* enc_uc_engine_n(
const char* s,
size_t* l,
int nfc)
1290 long int dbuf[ENC_UC_DECOMPOSITION_BUFSIZE];
1297 if(rlen - ri <= (
size_t) 4 * ENC_UC_DECOMPOSITION_BUFSIZE)
1304 if(!rlen) { rlen = (size_t) 4 * ENC_UC_DECOMPOSITION_BUFSIZE; }
1306 p = api_posix_realloc((
void*) res, rlen);
1307 if(NULL == p) { api_posix_free((
void*) res); res = NULL;
break; }
1312 ucp = enc_uc_decode_utf8(s, &i);
1313 if(-1L == ucp) {
break; }
1314 enc_uc_lookup_cdc(ucp, &cdc);
1322 enc_uc_engine_reorder(dbuf, di);
1323 if(nfc) { enc_uc_engine_compose(dbuf, &di); }
1330 if(!error) { error = enc_uc_engine_decompose(ucp, dbuf, &di); }
1335 enc_uc_engine_reorder(dbuf, di);
1336 if(nfc) { enc_uc_engine_compose(dbuf, &di); }
1367 static const char* enc_uc_engine_nfd(
const char* s,
size_t* l)
1369 return(enc_uc_engine_n(s, l, 0));
1393 static const char* enc_uc_engine_nfc_part1(
const char* s,
size_t* l)
1395 return(enc_uc_engine_n(s, l, 1));
1416 static const char* enc_uc_engine_nfc_part2(
const char* s,
size_t l,
1428 #if ENC_UC_NORM_DEBUG
1429 printf(
" *** Part 2 ***\n");
1431 res = api_posix_malloc(++l);
1437 ucp = enc_uc_decode_utf8(s, &i);
1438 if(-1L == ucp) {
break; }
1441 enc_uc_lookup_cdc(ucp, &cdc);
1450 if((
size_t) 2 == di)
1452 enc_uc_engine_compose(dbuf, &di);
1454 if((
size_t) 2 == di)
1473 if(NULL != res) { res[ri] = 0; }
1475 #if ENC_UC_NORM_DEBUG
1478 printf(
"Now NFC: %s (len: %u)\n ", res, (
unsigned int) strlen(res));
1479 i = 0;
while(res[i])
1481 printf(
" 0x%02X", (
unsigned int) (
unsigned char) res[i++]);
1510 static const char* enc_uc_strip_dccs(
const char* s)
1512 const char* res = NULL;
1523 ucp = enc_uc_decode_utf8(s, &i);
1524 if(-1L == ucp) {
break; }
1525 enc_uc_lookup_cdc(ucp, &cdc);
1526 if(!cdc.ccc) {
break; }
1530 if(!skip) {
PRINT_ERROR(
"Semantic error in Unicode string"); }
1537 len = strlen(&s[i]);
1538 res = (
const char*) api_posix_malloc(++len);
1539 if(NULL != res) { memcpy((
void*) res, &s[i], len); }
1576 static const char* enc_uc_normalize_to_nfd(
const char* s)
1578 const char* res = NULL;
1583 res = enc_uc_strip_dccs(s);
1587 tgt = enc_uc_engine_nfd(res, &l);
1588 if(res != s) { api_posix_free((
void*) res); }
1593 if(NULL == res) {
PRINT_ERROR(
"Unicode NFD normalization failed"); }
1631 static const char* enc_uc_normalize_to_nfc(
const char* s)
1633 const char* res = NULL;
1639 res = enc_uc_strip_dccs(s);
1642 if(NULL != res && enc_uc_check_nfc(res))
1645 tgt = enc_uc_engine_nfd(res, &l);
1646 if(res != s) { api_posix_free((
void*) res); }
1651 tgt = enc_uc_engine_nfc_part1(res, &l);
1652 if(res != s) { api_posix_free((
void*) res); }
1657 tgt = enc_uc_engine_nfc_part2(res, l, &flag);
1658 api_posix_free((
void*) res);
1660 if(NULL != res && flag)
1663 tgt = enc_uc_engine_nfc_part1(res, &l);
1664 api_posix_free((
void*) res);
1672 if(NULL == res) {
PRINT_ERROR(
"Unicode NFC normalization failed"); }
1691 static const char* enc_uc_convert_nsutf_to_utf8(
const char* s,
const char* e)
1693 const char* res = NULL;
1695 unsigned char us_ascii = 1;
1700 if ( ((
const unsigned char) 0x80 & (
const unsigned char) s[inlen]) ||
1701 ((
const unsigned char) 0x1B == (
const unsigned char) s[inlen]) ||
1702 ((
const unsigned char) 0x2B == (
const unsigned char) s[inlen]) )
1714 char* inbuf = api_posix_malloc(inlen + (
size_t) 1);
1718 size_t outlen = inlen * (size_t) 4;
1719 size_t len = outlen;
1720 char* outbuf = api_posix_malloc(outlen + (
size_t) 1);
1724 size_t rv = (size_t) -1;
1726 memcpy(inbuf, s, inlen + (
size_t) 1);
1727 rv = ucic0_iconvstr(
"UTF-8", e, inbuf, &inlen, outbuf, &outlen,
1728 UCIC0_ICONV_REPLACE_INVALID);
1729 if ((
size_t) -1 == rv || (
size_t) 0 != inlen)
1732 PRINT_ERROR(
"Conversion from CESU-8 or UTF-7 to UTF-8 failed");
1733 api_posix_free((
void*) outbuf);
1740 res = api_posix_realloc((
void*) outbuf, len + (
size_t) 1);
1741 if (NULL == res) { api_posix_free((
void*) outbuf); }
1744 api_posix_free((
void*) inbuf);
1762 static const char* enc_iso2022jp_convert_to_utf8(
const char* s)
1764 const char* res = NULL;
1766 unsigned char us_ascii = 1;
1774 if ( ((
const unsigned char) 0x80 & (
const unsigned char) s[inlen]) ||
1775 ((
const unsigned char) 0x1B == (
const unsigned char) s[inlen]) )
1787 char* inbuf = api_posix_malloc(inlen + (
size_t) 1);
1791 size_t outlen = inlen * (size_t) 4;
1792 size_t len = outlen;
1793 char* outbuf = api_posix_malloc(outlen + (
size_t) 1);
1797 size_t rv = (size_t) -1;
1799 memcpy(inbuf, s, inlen + (
size_t) 1);
1800 rv = jpic0_iconvstr(
"UTF-8",
"ISO-2022-JP",
1801 inbuf, &inlen, outbuf, &outlen,
1802 JPIC0_ICONV_REPLACE_INVALID);
1803 if ((
size_t) -1 == rv || (
size_t) 0 != inlen)
1806 PRINT_ERROR(
"Conversion from ISO-2022-JP to UTF-8 failed");
1807 api_posix_free((
void*) outbuf);
1814 res = api_posix_realloc((
void*) outbuf, len + (
size_t) 1);
1815 if (NULL == res) { api_posix_free((
void*) outbuf); }
1818 api_posix_free((
void*) inbuf);
1841 static enum enc_mime_cs enc_mime_get_charset(
const char* s,
size_t len)
1846 const char not_supported[] =
"MIME: Unsupported character set: ";
1856 PRINT_ERROR(
"MIME: Name of character set too long");
1861 for(i = 0; i < len; ++i)
1863 buf[i] = (char) toupper((
int) s[i]);
1896 else if(!strcmp(buf,
"KOI8-R")) { res =
ENC_CS_KOI8R; }
1897 else if(!strcmp(buf,
"KOI8-U")) { res =
ENC_CS_KOI8U; }
1908 else if(!strcmp(buf,
"ANSI_X3.4-1968")) { res =
ENC_CS_ASCII; }
1909 else if(!strcmp(buf,
"ANSI_X3.4-1986")) { res =
ENC_CS_ASCII; }
1910 else if(!strcmp(buf,
"ISO-IR-6")) { res =
ENC_CS_ASCII; }
1911 else if(!strcmp(buf,
"ISO_646.IRV:1991")) { res =
ENC_CS_ASCII; }
1912 else if(!strcmp(buf,
"ISO646-US")) { res =
ENC_CS_ASCII; }
1913 else if(!strcmp(buf,
"IBM367")) { res =
ENC_CS_ASCII; }
1915 else if(!strcmp(buf,
"CSASCII")) { res =
ENC_CS_ASCII; }
1918 else if(!strcmp(buf,
"CSUTF-8")) { res =
ENC_CS_UTF_8; }
1923 else if(!strcmp(buf,
"CSUTF-7")) { res =
ENC_CS_UTF_7; }
2047 else if(!strcmp(buf,
"CSKOI8R")) { res =
ENC_CS_KOI8R; }
2048 else if(!strcmp(buf,
"CSKOI8U")) { res =
ENC_CS_KOI8U; }
2056 else if(!strcmp(buf,
"CSPC8CODEPAGE437")) { res =
ENC_CS_IBM437; }
2060 else if(!strcmp(buf,
"CSPC775BALTIC")) { res =
ENC_CS_IBM775; }
2063 else if(!strcmp(buf,
"CSPC850MULTILINGUAL")) { res =
ENC_CS_IBM850; }
2071 else if(!strcmp(buf,
"PC-MULTILINGUAL-850+EURO"))
2073 else if(!strcmp(buf,
"CSIBM00858")) { res =
ENC_CS_IBM858; }
2074 else if(!strcmp(buf,
"CCSID00858")) { res =
ENC_CS_IBM858; }
2081 else if(!strcmp(buf,
"ISO8859-1"))
2083 else if(!strcmp(buf,
"ISO8859-2"))
2085 else if(!strcmp(buf,
"ISO8859-3"))
2087 else if(!strcmp(buf,
"ISO8859-4"))
2089 else if(!strcmp(buf,
"ISO8859-5"))
2091 else if(!strcmp(buf,
"ISO8859-6"))
2093 else if(!strcmp(buf,
"ISO8859-7"))
2095 else if(!strcmp(buf,
"ISO8859-8"))
2097 else if(!strcmp(buf,
"ISO8859-9"))
2099 else if(!strcmp(buf,
"ISO8859-10"))
2101 else if(!strcmp(buf,
"ISO-8859-11"))
2103 else if(!strcmp(buf,
"ISO8859-11"))
2105 else if(!strcmp(buf,
"ISO8859-13"))
2107 else if(!strcmp(buf,
"ISO8859-14"))
2109 else if(!strcmp(buf,
"ISO8859-15"))
2111 else if(!strcmp(buf,
"ISO8859-16"))
2116 else if(!strcmp(buf,
"MACROMAN"))
2118 else if(!strcmp(buf,
"X-MAC-ROMAN"))
2122 else if(!strcmp(buf,
"CP-437")) { ibminv = 1; res =
ENC_CS_IBM437; }
2123 else if(!strcmp(buf,
"IBM858")) { ibminv = 1; res =
ENC_CS_IBM858; }
2124 else if(!strcmp(buf,
"CP858")) { ibminv = 1; res =
ENC_CS_IBM858; }
2125 else if(!strcmp(buf,
"CP1250"))
2127 else if(!strcmp(buf,
"CP1251"))
2129 else if(!strcmp(buf,
"CP1252"))
2131 else if(!strcmp(buf,
"CP1253"))
2133 else if(!strcmp(buf,
"CP1254"))
2135 else if(!strcmp(buf,
"CP1255"))
2137 else if(!strcmp(buf,
"CP1256"))
2139 else if(!strcmp(buf,
"CP1257"))
2141 else if(!strcmp(buf,
"CP1258"))
2145 else if(!strcmp(buf,
"UTF7"))
2147 PRINT_ERROR(
"MIME: Invalid character set UTF7 accepted as UTF-7");
2150 else if(!strcmp(buf,
"UTF8"))
2152 PRINT_ERROR(
"MIME: Invalid character set UTF8 accepted as UTF-8");
2158 if(ENC_CS_UNKNOWN == res)
2161 p = (
char*) api_posix_malloc(++l);
2166 strcat(p, not_supported);
2167 strncat(p, buf, len);
2169 api_posix_free((
void*) p);
2179 PRINT_ERROR(
"MIME: Invalid ISO 8859 character set accepted");
2183 PRINT_ERROR(
"MIME: Invalid Macintosh character set accepted");
2187 PRINT_ERROR(
"MIME: Invalid IBM codepage accepted");
2231 static char* enc_mime_decode_qp(
const char* start,
const char* end,
2232 int ec,
size_t* dlen)
2239 size_t ws = API_POSIX_SIZE_MAX;
2245 char nibble_high = 0;
2250 len = (size_t) (end - start);
2251 p = api_posix_malloc(len + (
size_t) 1);
2252 if(NULL == p) {
return(NULL); }
2256 for(i = 0; i < len; ++i)
2259 if((
char) 0x0A == start[i] && i)
2261 if((
char) 0x0D == start[i - (
size_t) 1] && API_POSIX_SIZE_MAX != ws)
2269 if((
char) 0x09 == start[i] || (
char) 0x20 == start[i])
2271 if(API_POSIX_SIZE_MAX == ws) { ws = bi; }
2273 else if((
char) 0x0D != start[i]) { ws = API_POSIX_SIZE_MAX; }
2274 src[bi++] = start[i];
2286 for(i = 0; i < (size_t) (end - start); ++i)
2290 if(bi + (
size_t) 4 >= len)
2292 if(!len) { len = 64; }
2293 p = api_posix_realloc((
void*) tmp, len *= (
size_t) 2);
2296 api_posix_free((
void*) tmp);
2307 if(!((9 <= v && 10 >= v) || 13 == v || (32 <= v && 126 >= v)))
2312 if(ec && !invalid && (9 == v || 32 == v)) { invalid = 1; }
2316 PRINT_ERROR(
"MIME: Decoding invalid quoted printable data");
2321 if(!state &&
'=' == current) { ++state; }
2333 nibble_high = current;
2341 ((
char) 0x09 == nibble_high || (
char) 0x20 == nibble_high) )
2343 if((
char) 0x09 == current || (
char) 0x20 == current) {
break; }
2344 else if((
char) 0x0D == current)
2346 nibble_high = current;
2358 if(!ec && (
char) 0x0D == nibble_high && (
char) 0x0A == current)
2365 v = enc_hex_decode_nibble(nibble_high);
2366 if(0 > v) { invalid = 1; }
2369 c = (
unsigned char) (v * 16);
2370 v = enc_hex_decode_nibble(current);
2371 if(0 > v) { invalid = 1; }
2372 else { c += (
unsigned char) v; }
2377 PRINT_ERROR(
"MIME: Invalid quoted printable encoded data");
2379 tmp[bi++] = nibble_high;
2380 c = (
unsigned char) current;
2387 if(ec &&
'_' == current) { c = (
unsigned char) 0x20; }
2388 else { c = (
unsigned char) current; }
2392 if(c) { tmp[bi++] = (char) c; }
2396 if(NULL == tmp) { res = NULL; }
2404 api_posix_free((
void*) src);
2430 static const char* enc_mime_decode_q(
enum enc_mime_cs charset,
2431 const char* start,
const char* end,
2434 const char* res = NULL;
2438 tmp = enc_mime_decode_qp(start, end, ec, &len);
2443 if(tmp != res) { api_posix_free((
void*) tmp); }
2470 static char* enc_mime_decode_base64(
const char* start,
const char* end,
2473 const unsigned char* in = (
const unsigned char*) start;
2474 size_t len_in = end - start;
2475 unsigned char* out = NULL;
2476 size_t len_out = BXX0_BASE64_DECODE_LEN_OUT(len_in);
2477 unsigned char flags = BXX0_BASE64_DECODE_FLAG_IGNORE;
2481 unsigned char* p = api_posix_malloc(len_out + (
size_t) 1);
2485 PRINT_ERROR(
"MIME: Base 64: Memory allocation for decoder failed");
2492 flags |= BXX0_BASE64_DECODE_FLAG_NOPAD;
2493 flags |= BXX0_BASE64_DECODE_FLAG_CONCAT;
2494 flags |= BXX0_BASE64_DECODE_FLAG_INVTAIL;
2497 size_t len_out_orig = len_out;
2498 signed char rv = bxx0_base64_decode(out, &len_out, in, &len_in, flags);
2503 if(BXX0_BASE64_DECODE_ERROR_SIZE == rv)
2504 PRINT_ERROR(
"MIME: Base 64: Error: Output buffer too small (bug)");
2505 else if(BXX0_BASE64_DECODE_ERROR_TAIL == rv)
2506 PRINT_ERROR(
"MIME: Base 64: Error: Invalid tail before padding");
2507 else if(BXX0_BASE64_DECODE_ERROR_PAD == rv)
2508 PRINT_ERROR(
"MIME: Base 64: Error: Invalid padding");
2509 else if(BXX0_BASE64_DECODE_ERROR_DAP == rv)
2510 PRINT_ERROR(
"MIME: Base 64: Error: Data after padding");
2512 PRINT_ERROR(
"MIME: Base 64: Error: Unknown error");
2514 api_posix_free((
void*) out);
2521 if(BXX0_BASE64_DECODE_FLAG_INVTAIL & rv)
2523 "Unused bits with nonzero value in tail");
2524 else if(BXX0_BASE64_DECODE_FLAG_CONCAT & rv)
2526 "Accepted additional data after correctly padded tail");
2532 PRINT_ERROR(
"MIME: Base 64: Error: Decoding data aborted (bug)");
2533 api_posix_free((
void*) out);
2537 *dlen = len_out_orig - len_out;
2562 static const char* enc_mime_decode_b(
enum enc_mime_cs charset,
2563 const char* start,
const char* end)
2565 const char* res = NULL;
2569 tmp = enc_mime_decode_base64(start, end, &len);
2574 if(tmp != res) { api_posix_free((
void*) tmp); }
2589 static int enc_check_leap_year(
unsigned int year)
2591 if(!(year % 400U) || (!(year % 4U) && (year % 100U))) {
return(1); }
2615 static int enc_encode_posix_timestamp(
core_time_t* pts,
unsigned int year,
2616 unsigned int month,
unsigned int day,
2617 unsigned int hour,
unsigned int minute,
2618 unsigned int second,
int zone)
2620 static const unsigned int dom[12] = { 31U, 29U, 31U, 30U, 31U, 30U,
2621 31U, 31U, 30U, 31U, 30U, 31U };
2633 PRINT_ERROR(
"Warning: core_time_t overflow while decoding timestamp");
2636 for(i = 1970U; i < year; ++i)
2640 if(enc_check_leap_year(i)) { ts += (
core_time_t) 86400; }
2642 for(i = 0; i < month - 1U; ++i)
2646 if(1U == i && !enc_check_leap_year(year))
2664 if(ts >= zone_seconds)
2684 printf(
"Seconds : %lu\n", (
long int) ts);
2686 t = gmtime((api_posix_time_t*) &ts);
2687 printf(
"Conv. UTC: %04d-%02d-%02d %02d:%02d:%02d\n",
2688 t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
2689 t->tm_hour, t->tm_min, t->tm_sec);
2707 static int enc_check_atom(
const char* s)
2713 if(0x30 <= c && 0x39 >=c) { res = 0; }
2714 else if(0x41 <= c && 0x5A >=c) { res = 0; }
2715 else if(0x61 <= c && 0x7A >=c) { res = 0; }
2716 else if((
int)
'!' == c) { res = 0; }
2717 else if((
int)
'#' == c) { res = 0; }
2718 else if((
int)
'$' == c) { res = 0; }
2719 else if((
int)
'%' == c) { res = 0; }
2720 else if((
int)
'&' == c) { res = 0; }
2721 else if(0x27 == c) { res = 0; }
2722 else if((
int)
'*' == c) { res = 0; }
2723 else if((
int)
'+' == c) { res = 0; }
2724 else if((
int)
'-' == c) { res = 0; }
2725 else if((
int)
'/' == c) { res = 0; }
2726 else if((
int)
'=' == c) { res = 0; }
2727 else if((
int)
'?' == c) { res = 0; }
2728 else if((
int)
'^' == c) { res = 0; }
2729 else if((
int)
'_' == c) { res = 0; }
2730 else if((
int)
'`' == c) { res = 0; }
2731 else if((
int)
'{' == c) { res = 0; }
2732 else if((
int)
'|' == c) { res = 0; }
2733 else if((
int)
'}' == c) { res = 0; }
2734 else if((
int)
'~' == c) { res = 0; }
2750 static int enc_check_dotatom(
const char* s)
2755 if(
'.' == *s) { res = 0; }
2756 else { res = enc_check_atom(s); }
2771 static void enc_encode_dispname(
char* s)
2774 char buf[ENC_HDR_BUFSIZE + (size_t) 1];
2776 char word[ENC_HDR_BUFSIZE + (size_t) 1];
2789 ii = i;
while(
' ' == s[ii]) { ++ii; }
2790 w = strchr(&s[ii], (
int)
' ');
2793 word_len = strlen(&s[i]);
2796 else { word_len = (size_t) (w - &s[i]); }
2797 if(ENC_HDR_BUFSIZE < word_len) { word[0] = 0; }
2800 memcpy((
void*) word, (
void*) &s[i], word_len);
2804 if(!last_word) { ++i; }
2811 if(0x80U <= (
unsigned int) (
unsigned char) word[ii])
2816 if(enc_check_atom(&word[ii])) { atom = 0; }
2823 if(ENC_HDR_BUFSIZE <= bi) {
break; }
else { buf[bi++] =
' '; }
2829 if(ENC_HDR_BUFSIZE - bi < word_len) {
break; }
2832 memcpy((
void*) &buf[bi], (
void*) word, word_len);
2842 if(ENC_HDR_BUFSIZE <= bi) { error = 1; }
else { buf[bi++] =
'"'; }
2844 for(ii = 0; ii < word_len; ++ii)
2847 cbuf[0] = word[ii]; cbuf[1] = 0;
2850 if(ENC_HDR_BUFSIZE - bi < (
size_t) 2) { error = 1;
break; }
2852 if(
'"' == word[ii] || 0x5C == (
int) word[ii]) { buf[bi++] = 0x5C; }
2853 buf[bi++] = word[ii];
2856 if(ENC_HDR_BUFSIZE <= bi) { error = 1; }
else { buf[bi++] =
'"'; }
2857 if(error) { bi = start; }
2859 if(last_word) {
break; }
2864 strncpy(s, buf, ++bi);
2886 static char* enc_mime_decode_parameter(
const char* buf,
const char* cs)
2890 const char* tmp2 = NULL;
2899 tmp = (
char*) api_posix_malloc(++len);
2902 memcpy((
void*) tmp, (
void*) buf, len);
2905 PRINT_ERROR(
"MIME: Nonprintable characters in parameter");
2912 PRINT_ERROR(
"MIME: Percent encoding failed for parameter");
2916 charset = enc_mime_get_charset(cs, strlen(cs));
2920 PRINT_ERROR(
"MIME: Parameter charset not supported");
2925 res = (
char*) api_posix_malloc(++len);
2928 memcpy((
void*) res, (
void*) tmp2, len);
2930 if(tmp != tmp2) { api_posix_free((
void*) tmp2); }
2936 api_posix_free((
void*) tmp);
2971 const char* res = NULL;
2979 char name[(size_t) 2 * ENC_HDR_BUFSIZE + (
size_t) 1];
2980 char addr_spec[ENC_HDR_BUFSIZE + (size_t) 1];
2983 if((
size_t) 2 * ENC_HDR_BUFSIZE < strlen(
data)) { error = 1; }
2988 if(!strlen(name)) { error = 1; }
2996 if(NULL == strchr(&name[i + (
size_t) 1], (
int)
'<'))
2998 if(!i) { name[0] = 0; }
2999 else { name[i - (size_t) 1] = 0; }
3000 if(ENC_HDR_BUFSIZE < strlen(&name[i])) { error = 1; }
3003 strcpy(addr_spec, &name[++i]);
3004 i = strlen(addr_spec) - (size_t) 1;
3005 if(
'>' != addr_spec[i]) { addr_spec[0] = 0; }
3006 else { addr_spec[i] = 0; }
3019 enc_encode_dispname(name);
3020 len += strlen(name);
3026 len += strlen(addr_spec);
3033 c = addr_spec[i];
if(!c) {
break; }
3034 if(
'@' != c && enc_check_dotatom(&c))
3043 if(!i || !addr_spec[i + (
size_t) 1])
3049 if(
'.' == addr_spec[i - (
size_t) 1]
3050 ||
'.' == addr_spec[i + (
size_t) 1])
3057 if(!error &&
'.' == c)
3059 if(!i || !addr_spec[i + (
size_t) 1])
3070 if(! (error || (
size_t) 1 != counter || (
size_t) 5 > strlen(addr_spec)) )
3073 buf = (
char*) api_posix_malloc(len);
3082 else { strcpy(buf,
"<"); }
3084 strcat(buf, addr_spec);
3088 if(0 >= rv) { api_posix_free((
void*) buf); }
3095 if(error) {
PRINT_ERROR(
"Creating name-addr construct failed"); }
3117 unsigned long int res;
3119 if(1 != sscanf(lines,
"%lu", &res)) { res = 0; }
3140 rv = api_posix_snprintf(l, 11,
"%lu", l_raw);
3141 if(0 > rv || 11 <= rv)
3173 static const char* months[12] = {
"JAN",
"FEB",
"MAR",
"APR",
"MAY",
"JUN",
3174 "JUL",
"AUG",
"SEP",
"OCT",
"NOV",
"DEC" };
3175 static const unsigned int dom[12] = { 31U, 29U, 31U, 30U, 31U, 30U,
3176 31U, 31U, 30U, 31U, 30U, 31U };
3184 unsigned int zh = 0;
3185 unsigned int zm = 0;
3188 unsigned int month = 13U;
3190 unsigned int hour = 0;
3191 unsigned int minute = 0;
3192 unsigned int second = 0;
3198 printf(
"------------------------------------------------------------\n");
3199 printf(
"Timestamp: %s\n", timestamp);
3203 p = strchr(timestamp, (
int)
',');
3204 if(NULL == p) { p = timestamp; }
else { ++p; }
3207 rv = sscanf(p,
"%u %3c %u%n", &day, m, &year, &pos);
3208 if(3 != rv) {
PRINT_ERROR(
"Invalid date in timestamp"); }
3214 if(50U > year) { year += 2000U; }
else { year += 1900U; }
3217 m[0] = (char) toupper((
int) m[0]);
3218 m[1] = (char) toupper((
int) m[1]);
3219 m[2] = (char) toupper((
int) m[2]);
3221 for(i = 0; i < 12; ++i)
3223 if(!strcmp(months[i], m)) { month = i + 1U;
break; }
3225 if(13U <= month) {
PRINT_ERROR(
"Invalid month in timestamp"); }
3229 if(1U > day || dom[i] < day) { month = 13U; }
3232 if(2U == month && 29U == day)
3235 if(!enc_check_leap_year(year)) { month = 13U; }
3249 rv = sscanf(p,
"%u : %u%n", &hour, &minute, &pos);
3257 q = strchr(p, (
int)
':');
3258 if(NULL != q) { p = q; }
3259 rv = sscanf(p,
": %u%n", &second, &pos);
3260 if(1 == rv) { p += pos; }
3261 rv = sscanf(p,
"%5s", z);
3270 if(23U < hour || 59U < minute || 60U < second)
3277 if(
'+' == z[0] ||
'-' == z[0])
3279 for(i = 1; i < 5; ++i)
3285 zh = ((
unsigned int) z[1] - 0x30) * 10U;
3286 zh += ((
unsigned int) z[2] - 0x30);
3287 zm = ((
unsigned int) z[3] - 0x30) * 10U;
3288 zm += ((
unsigned int) z[4] - 0x30);
3289 if(59U < zm) { zone = 0; }
3297 zone = (int) (zh * 60U + zm);
3298 if(
'-' == z[0]) { zone *= -1; }
3304 if(!strcmp(
"GMT", z)) { zone = 0; }
3305 else if(!strcmp(
"UT", z)) { zone = 0; }
3306 else if(!strcmp(
"EDT", z)) { zone = -4 * 60; }
3307 else if(!strcmp(
"EST", z)) { zone = -5 * 60; }
3308 else if(!strcmp(
"CDT", z)) { zone = -5 * 60; }
3309 else if(!strcmp(
"CST", z)) { zone = -6 * 60; }
3310 else if(!strcmp(
"MDT", z)) { zone = -6 * 60; }
3311 else if(!strcmp(
"MST", z)) { zone = -7 * 60; }
3312 else if(!strcmp(
"PDT", z)) { zone = -7 * 60; }
3313 else if(!strcmp(
"PST", z)) { zone = -8 * 60; }
3314 else if(!strcmp(
"Z", z)) { zone = 0; }
3318 PRINT_ERROR(
"Decode unknown timezone in timestamp as UTC");
3323 printf(
"Decoded : %04u-%02u-%02u %02u:%02u:%02u %+d minutes\n",
3324 year, month, day, hour, minute, second, zone);
3336 enc_encode_posix_timestamp(&res, year, month, day, hour, minute, second,
3362 api_posix_time_t ts;
3363 api_posix_struct_tm t_data;
3364 api_posix_struct_tm* t;
3374 PRINT_ERROR(
"Warning: time_t overflow while converting timestamp");
3377 else { ts = (api_posix_time_t) pts; }
3384 t = api_posix_localtime_r(&ts, &t_data);
3388 ign = api_posix_snprintf(isodate, 20,
"%04d-%02d-%02d %02d:%02d:%02d",
3389 t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
3390 t->tm_hour, t->tm_min, t->tm_sec);
3394 if(0 > res) {
PRINT_ERROR(
"Timestamp conversion failed"); }
3415 api_posix_time_t ts;
3416 api_posix_struct_tm t_data;
3417 api_posix_struct_tm* t;
3424 api_posix_time(&ts);
3425 if((api_posix_time_t) 0 > ts) { res = -1; }
3433 t = api_posix_gmtime_r(&ts, &t_data);
3437 ign = api_posix_snprintf(isodate, 21,
"%04d-%02d-%02dT%02d:%02d:%02dZ",
3438 t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
3439 t->tm_hour, t->tm_min, t->tm_sec);
3444 if(0 > res) {
PRINT_ERROR(
"ISO 8601 date request failed"); }
3475 unsigned int minute;
3476 unsigned int second;
3479 rv = sscanf(isodate,
"%u-%u-%uT%u:%u:%uZ", &year, &month, &mday,
3480 &hour, &minute, &second);
3481 if(6 != rv) {
PRINT_ERROR(
"ISO 8601 timestamp has invalid format"); }
3484 if(1970U <= year && 9999U >= year
3485 && 1U <= month && 12U >= month
3486 && 1U <= mday && 31U >= mday
3487 && 23U >= hour && 59U >= minute && 59U >= second) { res = 0; }
3493 res = enc_encode_posix_timestamp(pts, year, month, mday,
3494 hour, minute, second, 0);
3522 static const char* months[12] = {
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
3523 "Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec" };
3533 rv = sscanf(isodate,
"%u-%u-%u", &year, &month, &mday);
3534 if(3 != rv) {
PRINT_ERROR(
"ISO 8601 timestamp has invalid format"); }
3537 if(1900U <= year && 9999U >= year
3538 && 1U <= month && 12U >= month
3539 && 1U <= mday && 31U >= mday) { res = 0; }
3544 buf = (
char*) api_posix_malloc(len);
3545 if(NULL == buf) { res = -1; }
3548 api_posix_snprintf(buf, 50,
"%u %s %04u %02d:%02d:%02d -0000",
3549 mday, months[--month], year, 0, 0, 0);
3550 *ts = (
const char*) buf;
3583 PRINT_ERROR(
"Value of CORE_ANUM_T_MAX is too large");
3587 rv = api_posix_snprintf(result, 17,
"%lu", (
unsigned long int) wm);
3588 if(rv > 0 && rv <= 16)
3631 if(0 < len && 20 >= len)
3637 c = (
unsigned char) wm[len-- - 1];
3658 if(0 > res) {
PRINT_ERROR(
"Article number conversion failed"); }
3687 if(2 == api_posix_snprintf(result, 3,
"%02X", octet)) { res = 0; }
3691 if(res) { strcpy(result,
"XX"); }
3719 if(65 <= c && 90 >= c)
3722 if(90 < c) { c = 65 - 1 + (c - 90); }
3726 else if(97 <= c && 122 >= c)
3729 if(122 < c) { c = 97 - 1 + (c - 122); }
3733 if(modified) {
data[i] = (char) c; }
3763 size_t len_out = BXX0_BASE64_ENCODE_LEN_OUT(len) + 1U;
3764 size_t len_out_orig = len_out;
3765 unsigned char* out = (
unsigned char*) api_posix_malloc(len_out);
3767 if(NULL == out) {
return -1; }
3771 const unsigned char* in = (
const unsigned char*)
data;
3772 signed char rv = bxx0_base64_encode(out, &len_out, in, &len, 0);
3774 if(0 > rv || 0U != len)
3776 api_posix_free((
void*) out);
3781 out[len_out_orig - len_out] = 0;
3782 *enc = (
const char*) out;
3819 len = strlen(mailbox);
3821 if((
size_t) 3 <= len)
3831 for(i = len; i; --i)
3833 ii = i - (size_t) 1;
3834 if(!state &&
'>' == mailbox[ii])
3840 if(1U == state &&
'@' == mailbox[ii]) { ++state;
continue; }
3841 if(2U == state &&
'<' == mailbox[ii])
3850 if((!state || 3U <= state) && e > s + 2)
3853 len = (size_t) (e - s);
3854 res = (
char*) api_posix_malloc(len + (
size_t) 1);
3857 memcpy((
void*) res, (
void*) s, len); res[len] = 0;
3861 while(res[i] &&
'@' != res[i])
3863 if(enc_check_dotatom(&res[i])) { state = 1;
break; }
3868 if(!i ||
'@' != res[i]) { state = 1; }
3870 else {
if(!res[++i]) { state = 1; } }
3886 if(enc_check_dotatom(&res[i]))
3897 api_posix_free((
void*) res);
3906 if(NULL == res) {
PRINT_ERROR(
"Invalid e-mail address"); }
3929 while((c = (
int) s[i++]))
3931 if(!(0 <= c && 127 >= c)) { res = -1; }
3955 if(!(65 <= c && 90 >= c) && !(97 <= c && 122 >= c)) { res = -1; }
3978 if(!(48 <= c && 57 >= c)) { res = -1; }
4007 while((c = (
int) s[i++]))
4009 if(!(9 == c || (32 <= c && 126 >= c))) { res = -1; }
4035 if(!(9 == c || (32 <= c && 126 >= c))) { s[i] =
'?'; }
4066 if(9 == c || 32 == c)
4068 len = strlen(&s[i + (
size_t) 1]);
4070 memmove((
void*) &s[i], (
void*) &s[i + (
size_t) 1], ++len);
4083 if(!start && !i) { error = 1; }
4084 else if(start && start + (
size_t) 1 == i) { error = 1; }
4088 if(
'+' != s[i] &&
'-' != s[i] &&
'_' != s[i] &&
',' != s[i])
4094 if(!error &&
',' == s[i])
4097 if(!s[i + (
size_t) 1]) { error = 1; }
4103 PRINT_ERROR(
"Invalid entry in distribution list removed");
4104 p = strchr(&s[i + (
size_t) 1], (
int)
',');
4106 if(NULL == p) { s[i] = 0; }
4110 if(!start) { p += 1; }
4113 memmove((
void*) &s[start], (
void*) p, ++len);
4143 return enc_uc_check_cesu8(s, 1);
4161 char* res = (
char*) api_posix_malloc(strlen(s) * (size_t) 3);
4162 const char rc[3] = { (char) 0xEF, (
char) 0xBF, (char) 0xBD };
4168 size_t remaining = 0;
4169 unsigned long int mbc = 0;
4175 while((c = (
int) s[i++]))
4180 if((c & 0xC0) == 0x80) {
continue; }
4181 else { multibyte = 0; }
4186 if(!(0 <= c && 127 >= c)) { multibyte = 1; }
4187 else { res[ri++] = (char) c; }
4198 if((c & 0xE0) == 0xC0) { len = 2; }
4199 else if((c & 0xF0) == 0xE0) { len = 3; }
4200 else if((c & 0xF8) == 0xF0) { len = 4; }
4204 res[ri++] = rc[0]; res[ri++] = rc[1]; res[ri++] = rc[2];
4209 case 2: mbc |= (
unsigned long int) (c & 0x1F) << 6;
break;
4210 case 3: mbc |= (
unsigned long int) (c & 0x0F) << 12;
break;
4211 case 4: mbc |= (
unsigned long int) (c & 0x07) << 18;
break;
4214 remaining = len - (size_t) 1;
4218 if((c & 0xC0) != 0x80)
4221 res[ri++] = rc[0]; res[ri++] = rc[1]; res[ri++] = rc[2];
4222 if(0 <= c && 127 >= c) { res[ri++] = (char) c; }
4228 mbc |= (
unsigned long int) (c & 0x3F) << remaining * (size_t) 6;
4230 if(!remaining && !error)
4237 if(0x000080UL > mbc)
4247 res[ri++] = s[i - (size_t) 2];
4248 res[ri++] = s[i - (size_t) 1];
4255 || (0x00D800UL <= mbc && 0x00DFFFUL >= mbc))
4265 res[ri++] = s[i - (size_t) 3];
4266 res[ri++] = s[i - (size_t) 2];
4267 res[ri++] = s[i - (size_t) 1];
4273 if(0x010000UL > mbc || 0x10FFFFUL < mbc)
4283 res[ri++] = s[i - (size_t) 4];
4284 res[ri++] = s[i - (size_t) 3];
4285 res[ri++] = s[i - (size_t) 2];
4286 res[ri++] = s[i - (size_t) 1];
4293 api_posix_free((
void*) res);
4314 PRINT_ERROR(
"UTF-8 data still invalid after repair (bug)");
4315 api_posix_free((
void*) res);
4369 if(NULL == strpbrk(wm,
"\x5C[]"))
4377 len = strlen(&wm[i]);
4379 buf = (
char*) api_posix_malloc(len * (
size_t) 3 + (size_t) 3);
4380 if(NULL == buf) {
break; }
4390 if((
size_t) 1 < bi) { store = 1; }
4395 if(NULL != strchr(
".()*+?{|^$", (
int) wm[i]))
4461 if(!store) { api_posix_free((
void*) buf); }
4464 if(INT_MAX == res) { error = 1; }
4473 if(NULL == p) { error = 1; }
4477 (*obj)[res].negate = negate;
4478 (*obj)[res].ere = buf;
4484 api_posix_free((
void*) buf);
4495 if(error || !eod || 0 >= res)
4497 PRINT_ERROR(
"Failed to convert RFC 3977 wildmat");
4520 if(NULL != obj && NULL != *obj)
4522 for(i = 0; i < num; ++i)
4524 api_posix_free((
void*) (*obj)[i].ere);
4526 api_posix_free((
void*) *obj);
4560 const char* res = NULL;
4568 long int rc_ucp = ENC_RC;
4576 p = (
char*) api_posix_malloc((
size_t) 1);
4577 if(NULL != p) { p[0] = 0; res = p; }
4588 if(bi + (
size_t) 4 + (size_t) 1 >= len)
4591 if(!len) { len = 64; }
4592 p = (
char*) api_posix_realloc((
void*) buf, len *= (size_t) 2);
4595 api_posix_free((
void*) buf);
4602 if(bi && i && 0x0A == (
int) s[i] && 0x0D == (
int) s[i - (
size_t) 1])
4605 buf[bi - (size_t) 1] = 0x0A;
4607 else if(i && 0x0A != (
int) s[i] && 0x0D == (int) s[i - (
size_t) 1])
4619 else if(0x0A == (
int) s[i])
4633 else { buf[bi++] = s[i]; }
4644 PRINT_ERROR(
"Invalid CR control character(s) detected"
4645 " while decoding canonical format");
4650 PRINT_ERROR(
"Invalid LF control character(s) detected"
4651 " while decoding canonical format");
4681 const char* res = NULL;
4693 p = (
char*) api_posix_malloc((
size_t) 1);
4694 if(NULL != p) { p[0] = 0; res = p; }
4701 if(bi + (
size_t) 4 >= len)
4704 if(!len) { len = 64; }
4705 p = (
char*) api_posix_realloc((
void*) buf, len *= (size_t) 2);
4708 api_posix_free((
void*) buf);
4715 if(0x0A == (
int) s[i])
4721 else if(0x0D == (
int) s[i])
4723 PRINT_ERROR(
"Invalid CR control character deleted"
4724 " while converting to canonical format");
4726 else { buf[bi++] = s[i]; }
4734 if(0x0A != (
int) buf[bi - (
size_t) 1])
4779 const char* res = NULL;
4786 long int rc_ucp = ENC_RC;
4787 char rc_utf8[5] = { 0 };
4795 PRINT_ERROR(
"Convert unsupported ISO 8859 character set as US-ASCII");
4802 p = (
char*) api_posix_malloc(++len);
4803 if(NULL == p) {
break; }
4804 for(i = 0; i < len; ++i)
4807 if((
unsigned char) 127 < (
unsigned char) p[i]) { p[i] =
'?'; }
4845 res = enc_8bit_convert_to_utf8(charset, s);
4850 res = enc_iso2022jp_convert_to_utf8(s);
4855 res = enc_uc_convert_nsutf_to_utf8(s,
"UTF-7");
4860 res = enc_uc_convert_nsutf_to_utf8(s,
"CESU-8");
4889 if(res != tmp && res != s) { api_posix_free((
void*) res); }
4897 tmp = enc_uc_normalize_to_nfc(res);
4898 if(res != tmp && res != s) { api_posix_free((
void*) res); }
4908 ucp = enc_uc_decode_utf8(res, &i);
4909 if(-1L == ucp) {
break; }
4910 if(enc_uc_check_control(ucp)) { cc_flag = 1;
break; }
4920 len *= strlen(rc_utf8);
4921 p = (
char*) api_posix_malloc(++len);
4924 if(s != res) { api_posix_free((
void*) res); }
4932 ucp = enc_uc_decode_utf8(res, &i);
4933 if(-1L == ucp) {
break; }
4934 if(enc_uc_check_control(ucp))
4947 if(s != res) { api_posix_free((
void*) res); }
4950 PRINT_ERROR(
"Unwanted control characters detected and replaced");
4992 const char** cs_iana)
4994 const char* res = NULL;
5007 p = (
char*) api_posix_malloc(++len);
5012 ucp = enc_uc_decode_utf8(s, &i);
5013 if(-1L == ucp) {
break; }
5015 if(256L <= ucp) { error = 1;
break; }
5016 else { p[ii++] = (char) (
unsigned char) ucp; }
5019 if(error) { api_posix_free((
void*) p); }
5025 if(NULL != cs_iana) { *cs_iana =
"ISO-8859-1"; }
5091 static const char error_msg[] =
"[Error]";
5092 static const char folding[] =
"\n ";
5095 size_t rbuf_len = 0;
5097 const char* body = NULL;
5098 const char* body_tmp = NULL;
5099 const char* cs_iana =
"UTF-8";
5109 char enc_word[1001];
5112 unsigned int dh, dl;
5118 #if !ENC_MIME_HEADER_FOLD_ASCII_LINES
5127 if((
size_t) 25 < pl)
5135 rem = (size_t) 76 - pl;
5141 && NULL == strstr(b,
"=?") && NULL == strstr(b,
"?="))
5152 PRINT_ERROR(
"MIME: Encoding of header field failed");
5153 p = (
char*) api_posix_malloc(strlen(error_msg) + (size_t) 1);
5154 if(NULL != p) { strcpy(p, error_msg); }
5160 body_tmp = enc_uc_normalize_to_nfc(b);
5162 if(NULL == body_tmp) { res = -1; }
5175 if(NULL == body) { body = body_tmp; }
5182 cs_iana =
"US-ASCII";
5190 if(!body[i] ||
' ' == body[i])
5195 if(
' ' == body[i + (
size_t) 1]
5196 || (
char) 0x09 == body[i + (
size_t) 1])
5202 enc_last = enc_flag; enc_flag = 0;
5204 for(ii = start; ii <= end; ++ii)
5206 enc_word[ei++] = body[ii];
5207 if(128U <= (
unsigned int) body[ii]) { enc_flag = 1;
break; }
5208 if(
'=' == (
unsigned int) body[ii])
5210 if((ii < end &&
'?' == body[ii + (
size_t) 1])
5211 || (ii > start &&
'?' == body[ii - (
size_t) 1]))
5218 if(enc_split) { enc_flag = 1; }
5222 #if !ENC_MIME_HEADER_FOLD_ASCII_LINES
5225 strcpy(enc_word,
"=?");
5226 strcat(enc_word, cs_iana);
5227 strcat(enc_word,
"?Q?");
5229 if(enc_last && !enc_split)
5232 strcat(enc_word,
"_");
5234 ei = strlen(enc_word);
5235 for(ii = start; ii <= end; ++ii)
5238 if(
ENC_CS_UTF_8 == cs && 0x80 != ((
int) body[ii] & 0xC0))
5246 gcpsl = iii * (size_t) 3;
5248 if(!body[ii + iii]) { eod = 1; }
5252 if(!body[ii + iii]) {
break; }
5253 ucp = enc_uc_decode_utf8(&body[ii], &iii);
5259 enc_uc_lookup_cdc(ucp, &cdc);
5264 if(!cdc.ccc) {
continue; }
5281 if((
size_t) (75 - 12) < gcpsl)
5285 "Combining character sequence too long");
5290 enc_word[ei++] =
'=';
5291 enc_word[ei++] =
'3';
5292 enc_word[ei++] =
'F';
5293 ii += iii - (size_t) 1;
5301 else if(first && ((
size_t) (rem - 2) - gcpsl < ei))
5305 else if((
size_t) (75 - 2) - gcpsl < ei)
5313 if(uc_split) { --ii; }
5317 if( (
'0' <= body[ii] &&
'9' >= body[ii])
5318 || (
'A' <= body[ii] &&
'Z' >= body[ii])
5319 || (
'a' <= body[ii] &&
'z' >= body[ii])
5320 ||
'!' == body[ii] ||
'*' == body[ii] ||
'+' == body[ii]
5321 ||
'-' == body[ii] ||
'/' == body[ii] )
5324 enc_word[ei++] = body[ii];
5329 enc_word[ei++] =
'=';
5330 dh = (
unsigned int) (
unsigned char) body[ii] / 16U;
5331 if(10U > dh) { enc_word[ei++] = (char) (48U + dh); }
5332 else { enc_word[ei++] = (char) (65U + dh - 10U); }
5333 dl = (
unsigned int) (
unsigned char) body[ii] % 16U;
5334 if(10U > dl) { enc_word[ei++] = (char) (48U + dl); }
5335 else { enc_word[ei++] = (char) (65U + dl - 10U); }
5343 if(uc_split || (
size_t) (75 - 3 - 2) < ei)
5355 else { enc_split = 0; }
5358 enc_word[ei++] =
'?';
5359 enc_word[ei++] =
'=';
5365 word_len = strlen(enc_word) + (size_t) 1;
5366 if((
size_t) 998 < word_len)
5373 if(word_len && (word_len > rem)
5375 && !(no_ec && !enc_flag && (word_len < rem + (
size_t) 922))
5382 PRINT_ERROR(
"MIME: Encoded-word too long for first line");
5386 else if(word_len > rem)
5388 memmove((
void*) &enc_word[strlen(folding)], (
void*) enc_word,
5391 memcpy((
void*) enc_word, (
void*) folding, strlen(folding));
5392 word_len += strlen(folding);
5395 #if !ENC_MIME_HEADER_FOLD_ASCII_LINES
5397 if(!enc_flag) { no_ec = 1; }
5407 if(init) { init = 0; --word_len; }
5410 memmove((
void*) &enc_word[1], (
void*) enc_word, word_len);
5416 while(ri + word_len >= rbuf_len)
5418 if(!rbuf_len) { rbuf_len = 128; }
5419 p = api_posix_realloc((
void*) rbuf, rbuf_len *= (
size_t) 2);
5420 if(NULL == p) { res = -1;
break; }
5423 if(-1 == res) {
break; }
5425 memcpy((
void*) &rbuf[ri], (
void*) enc_word, word_len);
5427 if(rem < word_len) { rem = 0; }
5428 else { rem -= word_len; }
5431 start = i + (size_t) 1;
5436 if(body != body_tmp) { api_posix_free((
void*) body); }
5437 if(body_tmp != b) { api_posix_free((
void*) body_tmp); }
5439 if(NULL != rbuf) { rbuf[ri] = 0; }
5446 *r = (
const char*) rbuf;
5456 api_posix_free((
void*) rbuf);
5508 size_t rbuf_len = 0;
5520 size_t word_trailing_space = 0;
5526 target = strchr(&b[i], (
int)
'=');
5530 target = strchr(&b[i], 0x0A);
5533 target = strchr(&b[i], 0x0D);
5534 if(NULL == target) { res = 1; }
5544 (
' ' == b[i + nbuf_len] || (
const char) 0x09 == b[i + nbuf_len]))
5553 if(
'=' == target[0])
5555 if(
'?' == target[1])
5558 p = strchr(&target[2], (
int)
'?');
5562 p2 = strchr(&target[2], (
int)
'*');
5563 if(NULL == p2) { p2 = p; }
5564 else if(p < p2) { p2 = p; }
5565 charset = enc_mime_get_charset(&target[2],
5566 (
size_t) (p2 - &target[2]));
5570 encoding = (char) toupper((
int) p[1]);
5573 PRINT_ERROR(
"MIME: Syntax error in encoded-word");
5579 p = strchr(target, (
int)
'?');
5585 "Too many fields in encoded-word");
5595 wbuf = enc_mime_decode_q(charset,
5602 wbuf = enc_mime_decode_b(charset,
5624 while( ri && (
' ' == rbuf[ri - (
size_t) 1] ||
5625 0x09 == (
int) rbuf[ri - (
size_t) 1]) )
5629 ri += word_trailing_space;
5634 nbuf_len = strlen(nbuf);
5635 i += (size_t) (&p[2] - &b[i]);
5637 word_trailing_space = 0;
5643 if(
' ' != nbuf[ii]) {
break; }
5644 else { ++word_trailing_space; }
5653 p = strchr(nbuf, (
int)
' ');
5654 p2 = strchr(nbuf, 0x09);
5655 if(NULL != p2 && p2 < p) { p = p2; }
5656 if(NULL == p) { nbuf_len = strlen(nbuf); }
5657 else { nbuf_len = (size_t) (p - nbuf); }
5669 while(ri + nbuf_len >= rbuf_len)
5671 if(!rbuf_len) { rbuf_len = 128; }
5672 if(API_POSIX_SIZE_MAX / (
size_t) 2 < rbuf_len) { res = -1;
break; }
5673 p = (
char*) api_posix_realloc((
void*) rbuf, rbuf_len *= (size_t) 2);
5674 if(NULL == p) { res = -1;
break; }
5679 memcpy((
void*) &rbuf[ri], (
void*) nbuf, nbuf_len);
5681 if(NULL != wbuf) { api_posix_free((
void*) wbuf); }
5684 if(NULL != rbuf) { len = ri; rbuf[len] = 0; }
5692 if(0x0A == (
int) rbuf[ri] || 0x0D == (
int) rbuf[ri])
5701 if(API_POSIX_SIZE_MAX / (
size_t) 3 <= len) { res = -1; }
5705 rbuf_len = len * (size_t) 3 + (
size_t) 1;
5706 p = (
char*) api_posix_realloc((
void*) rbuf, rbuf_len);
5707 if(NULL == p) { res = -1; }
5715 if(0x0A == (
int) rbuf[ri] || 0x0D == (int) rbuf[ri])
5717 memmove((
void*) &rbuf[ri + (
size_t) 2], (
void*) &rbuf[ri],
5718 rbuf_len - (ri + (
size_t) 2));
5719 rbuf[ri++] = (
unsigned char) 0xEF;
5720 rbuf[ri++] = (
unsigned char) 0xBF;
5721 rbuf[ri] = (
unsigned char) 0xBD;
5727 "Unwanted CR and/or LF detected in header field");
5737 *r = (
const char*) rbuf;
5747 api_posix_free((
void*) rbuf);
5806 api_posix_locale_t loc_ctype_posix = 0;
5808 struct mime_parameter** parray = NULL;
5809 size_t ppsize =
sizeof(
struct mime_parameter*);
5810 struct mime_parameter* pdata;
5811 size_t psize =
sizeof(
struct mime_parameter);
5812 const char* first_end;
5815 const char* p_start;
5817 const char* p_eq_sign;
5818 const char* p_asterisk;
5824 struct mime_parameter** tmp;
5827 unsigned int sec_num;
5840 loc_ctype_posix = api_posix_newlocale(API_POSIX_LC_CTYPE_MASK,
"POSIX",
5841 (api_posix_locale_t) 0);
5842 if((api_posix_locale_t) 0 == loc_ctype_posix)
5849 if(NULL == strchr(b, (
int)
'*')) { *r = b; res = 1; }
5854 printf(
"---------------\n");
5855 printf(
"Header field body : %s\n", b);
5858 first_end = strchr(b, (
int)
';');
5859 if(NULL == first_end) { *r = b; res = 1; }
5863 parray = (
struct mime_parameter**) api_posix_malloc(ppsize);
5876 if(
' ' == *p) { ++p; }
5878 p_end = strchr(p, (
int)
';');
5881 p_end = p + strlen(p);
5883 if(
' ' == *(p_end - 1)) { --p_end; }
5887 p_eq_sign = strchr(p, (
int)
'=');
5888 if(NULL == p_eq_sign) {
break; }
5889 if(p_end < p_eq_sign) {
break; }
5891 p_asterisk = strchr(p, (
int)
'*');
5892 if(NULL != p_asterisk && p_eq_sign > p_asterisk)
5895 rv = sscanf(p_asterisk,
" * %u", &sec_num);
5901 sscanf(p_asterisk,
" %c", &ext_mark);
5906 sscanf(p_asterisk,
" * %*u %c", &ext_mark);
5909 else { p_asterisk = p_eq_sign; }
5910 alen = (size_t) (p_asterisk - p);
5911 if(alen &&
' ' == p[alen - (
size_t) 1])
5919 PRINT_ERROR(
"MIME: Parameter attribute too long");
5923 p_start = p_eq_sign + 1;
5925 if(!sec_num &&
'*' == ext_mark)
5927 q = strchr(p_start, 0x27);
5930 PRINT_ERROR(
"MIME: Parameter charset field missing");
5934 clen = (size_t) (q - p_start);
5941 q = strchr(p_start, 0x27);
5944 PRINT_ERROR(
"MIME: Parameter language field missing");
5946 else { p_start = q + 1; }
5952 if(api_posix_strncasecmp_l(p,
"Charset", alen, loc_ctype_posix)
5953 && api_posix_strncasecmp_l(p,
"Format", alen, loc_ctype_posix)
5954 && api_posix_strncasecmp_l(p,
"DelSp", alen, loc_ctype_posix)
5955 && api_posix_strncasecmp_l(p,
"InsLine", alen, loc_ctype_posix)
5956 && api_posix_strncasecmp_l(p,
"Boundary", alen, loc_ctype_posix)
5964 tmp = (
struct mime_parameter**)
5965 api_posix_realloc(parray,
5966 ppsize +=
sizeof(
struct mime_parameter*));
5969 PRINT_ERROR(
"MIME: Parameter memory allocation failed");
5974 pdata = (
struct mime_parameter*) api_posix_malloc(psize);
5977 PRINT_ERROR(
"MIME: Parameter memory allocation failed");
5980 strncpy(pdata->attribute, p, alen);
5981 pdata->attribute[alen] = 0;
5982 pdata->attribute_len = alen;
5983 pdata->section = sec_num;
5984 strncpy(pdata->charset, p_cs, clen);
5985 pdata->charset[clen] = 0;
5986 pdata->value_start = p_start;
5987 pdata->value_end = p_end;
5991 printf(
"Index : %u / Section: %u (%s): ", (
unsigned int) i,
5992 sec_num, pdata->attribute);
5993 if(strlen(pdata->charset))
5995 printf(
"[Charset: %s] ", pdata->charset);
5997 for(
size_t iii = 0; (size_t) (p_end - p_start) > iii; ++iii)
5999 printf(
"%c", pdata->value_start[iii]);
6010 rbuf_len = (size_t) (first_end - b);
6012 rbuf = (
char*) api_posix_malloc(rbuf_len + (
size_t) 3);
6016 strncpy(rbuf, b, rbuf_len);
6022 while(NULL != (q = strchr(rbuf, (
int)
' ')))
6024 memmove((
void*) q, (
void*) (q + 1),
6025 strlen(q + 1) + (
size_t) 1);
6026 if(rbuf_len) { --rbuf_len; }
6030 rbuf_len += (size_t) 3;
6034 if(NULL == parray[i])
6041 if(!parray[i]->valid) {
continue; }
6042 if(parray[i]->section) {
continue; }
6044 rbuf[ri++] =
';'; rbuf[ri++] =
' ';
6049 para_charset = NULL;
6053 if(rewind) { rewind = 0; ii = 0; }
6054 if(!parray[ii]->valid) {
continue; }
6055 if(sec_num != parray[ii]->section) {
continue; }
6056 else if(!strcmp(parray[i]->attribute,
6057 parray[ii]->attribute))
6063 alen = parray[ii]->attribute_len + (size_t) 1;
6067 len += (size_t) (parray[ii]->value_end
6068 - parray[ii]->value_start);
6071 while(ri + len >= rbuf_len)
6073 tmp2 = api_posix_realloc((
void*) rbuf,
6074 rbuf_len *= (
size_t) 2);
6078 " for result buffer failed");
6082 else { rbuf = tmp2; }
6088 strncpy(&rbuf[ri], parray[ii]->attribute, alen);
6089 rbuf[ri + alen - (size_t) 1] =
'=';
6094 if(!sec_num) { para_charset = parray[ii]->charset; }
6097 if(NULL != para_charset)
6100 if((
size_t) 0 == strlen(para_charset))
6102 para_charset=
"US-ASCII";
6105 tmp2 = api_posix_malloc(len + (
size_t) 1);
6108 strncpy(tmp2, parray[ii]->value_start, len);
6110 tmp3 = enc_mime_decode_parameter(tmp2,
6114 len2 = strlen(tmp3);
6118 api_posix_free((
void*) tmp3);
6123 strcpy(&rbuf[ri], tmp3);
6127 api_posix_free((
void*) tmp2);
6132 strncpy(&rbuf[ri], parray[ii]->value_start, len);
6136 api_posix_free((
void*) tmp3);
6137 parray[ii]->valid = 0;
6143 while(!error && (NULL != parray[++ii] || rewind));
6144 parray[i]->valid = 0;
6146 while(!error && NULL != parray[++i]);
6150 while(NULL != parray[i]) { api_posix_free((
void*) parray[i++]); }
6151 api_posix_free((
void*) parray);
6155 if(error) { res = -1; }
6158 if((api_posix_locale_t) 0 != loc_ctype_posix)
6160 api_posix_freelocale(loc_ctype_posix);
6164 if(0 > res) { *r = NULL; }
6169 printf(
"Result: %s\n", rbuf);
6170 printf(
"---------------\n");
6173 }
else { api_posix_free((
void*) rbuf); }
6227 char fmt[ENC_FMT_BUFLEN];
6229 size_t bo_len, bo_len_valid;
6230 int trailing_sp = 0;
6233 ct->
type = ENC_CT_UNKNOWN;
6234 ct->
subtype = ENC_CTS_UNKNOWN;
6243 printf(
"Content-Type: Not specified\n");
6252 len = strlen(hf_body);
6253 body = (
char*) api_posix_malloc(len + (
size_t) 1);
6257 for(i = 0; i < len; ++i) { body[i] = (char) toupper((
int) hf_body[i]); }
6261 printf(
"Content-Type: %s\n", body);
6264 if(!strncmp(
"TEXT", body, 4))
6268 if(!strncmp(
"TEXT/PLAIN", body, 10))
6272 for(i = 0; i < len; ++i)
6274 if(!strncmp(
"FORMAT", &body[i], 6))
6277 ii = i + (size_t) 6;
6280 if(
'=' != body[ii] &&
' ' != body[ii]) {
break; }
6283 for(i = 0; i < ENC_FMT_BUFLEN; ++i)
6286 ||
';' == body[ii + i] ||
' ' == body[ii + i])
6290 else { fmt[i] = body[ii + i]; }
6292 fmt[ENC_FMT_BUFLEN - (size_t) 1] = 0;
6293 if(!strncmp(
"FLOWED", fmt, 6))
6295 ct->
flags |= ENC_CT_FLAG_FLOWED;
6300 if(ct->
flags & ENC_CT_FLAG_FLOWED)
6303 for(i = 0; i < len; ++i)
6305 if(!strncmp(
"DELSP", &body[i], 5))
6308 ii = i + (size_t) 5;
6311 if(
'=' != body[ii] &&
' ' != body[ii]) {
break; }
6314 for(i = 0; i < ENC_FMT_BUFLEN; ++i)
6317 ||
';' == body[ii + i] ||
' ' == body[ii + i])
6321 else { fmt[i] = body[ii + i]; }
6323 fmt[ENC_FMT_BUFLEN - (size_t) 1] = 0;
6324 if(!strncmp(
"YES", fmt, 3))
6326 ct->
flags |= ENC_CT_FLAG_DELSP;
6332 for(i = 0; i < len; ++i)
6334 if(!strncmp(
"INSLINE", &body[i], 7))
6337 ii = i + (size_t) 7;
6340 if(
'=' != body[ii] &&
' ' != body[ii]) {
break; }
6343 for(i = 0; i < ENC_FMT_BUFLEN; ++i)
6346 ||
';' == body[ii + i] ||
' ' == body[ii + i])
6350 else { fmt[i] = body[ii + i]; }
6352 fmt[ENC_FMT_BUFLEN - (size_t) 1] = 0;
6353 if(!strncmp(
"YES", fmt, 3))
6355 ct->
flags |= ENC_CT_FLAG_INSLINE;
6363 for(i = 0; i < len; ++i)
6365 if(!strncmp(
"CHARSET", &body[i], 7))
6368 ii = i + (size_t) 7;
6371 if(
'=' != body[ii] &&
' ' != body[ii]) {
break; }
6377 ||
';' == body[ii + i] ||
' ' == body[ii + i])
6381 else { cs[i] = body[ii + i]; }
6384 ct->
charset = enc_mime_get_charset(cs, strlen(cs));
6390 else if(!strncmp(
"IMAGE", body, 5))
6395 else if(!strncmp(
"AUDIO", body, 5))
6400 else if(!strncmp(
"VIDEO", body, 5))
6405 else if(!strncmp(
"MESSAGE/RFC822", body, 14))
6411 else if(!strncmp(
"MULTIPART", body, 9))
6415 if(!strncmp(
"MULTIPART/ALTERNATIVE", body, 21))
6419 else if(!strncmp(
"MULTIPART/DIGEST", body, 16))
6425 for(i = 0; i < len; ++i)
6427 if(!strncmp(
"BOUNDARY", &body[i], 8))
6430 ii = i + (size_t) 8;
6431 if(
'=' != hf_body[ii++])
6434 "Missing multipart boundary parameter value");
6441 if(!hf_body[ii + i] ||
';' == hf_body[ii + i])
6445 else { bo[i] = hf_body[ii + i]; }
6449 bo_len = strlen(bo);
6450 bo_len_valid = strspn(bo,
6452 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
6455 if (bo_len_valid != bo_len)
6457 PRINT_ERROR(
"MIME: Invalid multipart boundary parameter");
6458 if(bo_len_valid &&
' ' != bo[bo_len_valid - (
size_t) 1])
6461 bo[bo_len_valid] = 0;
6462 bo_len = bo_len_valid;
6474 if(
' ' != bo[bo_len - (
size_t) 1]) {
break; }
6482 "from multipart boundary parameter");
6490 api_posix_free((
void*) body);
6528 const char not_supported[]
6529 =
"ENC: MIME: Unsupported content transfer encoding: ";
6536 res = ENC_CTE_UNKNOWN;
6537 len = strlen(hf_body);
6541 PRINT_ERROR(
"MIME: Name of content transfer encoding too long");
6546 for(i = 0; i < len; ++i)
6548 buf[i] = (char) toupper((
int) hf_body[i]);
6555 if(!strcmp(buf,
"QUOTED-PRINTABLE")) { res =
ENC_CTE_Q; }
6556 if(!strcmp(buf,
"BASE64")) { res =
ENC_CTE_B; }
6558 if(!strcmp(buf,
"7-BIT"))
6561 "Invalid content transfer encoding 7-bit accepted as 7bit");
6564 if(!strcmp(buf,
"8-BIT"))
6567 "Invalid content transfer encoding 8-bit accepted as 8bit");
6571 if(ENC_CTE_UNKNOWN == res)
6573 l = strlen(not_supported) + len;
6574 p = (
char*) api_posix_malloc(++l);
6577 strcpy(p, not_supported);
6578 strncat(p, buf, len);
6580 api_posix_free((
void*) p);
6608 api_posix_locale_t loc_ctype_posix;
6610 const char* fn_para =
"FILENAME=";
6618 *type = ENC_CD_UNKNOWN;
6622 loc_ctype_posix = api_posix_newlocale(API_POSIX_LC_CTYPE_MASK,
"POSIX",
6623 (api_posix_locale_t) 0);
6624 if((api_posix_locale_t) 0 == loc_ctype_posix)
6631 if(!api_posix_strncasecmp_l(hf_body,
"inline", strlen(
"inline"),
6634 *type = ENC_CD_INLINE;
6636 else if(!api_posix_strncasecmp_l(hf_body,
"attachment",
6637 strlen(
"attachment"),
6640 *type = ENC_CD_ATTACHMENT;
6642 api_posix_freelocale(loc_ctype_posix);
6646 len = strlen(hf_body);
6647 body = (
char*) api_posix_malloc(len + (
size_t) 1);
6651 for(i = 0; i < len; ++i) { body[i] = (char) toupper((
int) hf_body[i]); }
6654 p = strstr(body, fn_para);
6657 p += strlen(fn_para);
6659 if(NULL != q) { len = (size_t) (q - p); }
6660 else { len = strlen(p); }
6662 buf = (
char*) malloc(len + (
size_t) 1);
6665 i = (size_t) (p - body);
6666 strncpy(buf, &hf_body[i], len);
6669 p = strrchr(buf,
'/');
6670 if(NULL != p) { ++p; memmove(buf, p, strlen(p) + (
size_t) 1); }
6675 p = strpbrk(buf,
"~|\x5C");
6679 "Filename in Content-Disposition rejected");
6681 else { *filename = buf; }
6685 api_posix_free((
void*) body);
6708 size_t len = strlen(entity);
6709 const char* p = entity;
6710 const char* buf = NULL;
6713 api_posix_mode_t perm = API_POSIX_S_IRUSR | API_POSIX_S_IWUSR |
6714 API_POSIX_S_IRGRP | API_POSIX_S_IWGRP |
6715 API_POSIX_S_IROTH | API_POSIX_S_IWOTH;
6722 buf = enc_mime_decode_qp(entity, &entity[len], 0, &len);
6728 buf = enc_mime_decode_base64(entity, &entity[len], &len);
6740 PRINT_ERROR(
"MIME: Content transfer encoding not supported");
6749 API_POSIX_O_WRONLY | API_POSIX_O_CREAT
6750 | API_POSIX_O_TRUNC, perm);
6792 const char* res = NULL;
6793 size_t len = strlen(s);
6800 res = enc_mime_decode_q(charset, s, &s[len], 0);
6805 res = enc_mime_decode_b(charset, s, &s[len]);
6817 PRINT_ERROR(
"MIME: Content transfer encoding not supported");
6845 unsigned int insline)
6847 const char* quote_mark;
6859 int insert_crlf = 0;
6882 case 0: { quote_mark =
">";
break; }
6883 case 1: { quote_mark =
"> ";
break; }
6886 PRINT_ERROR(
"Quoting style configuration not supported");
6908 if(API_POSIX_INT_MAX <= qd) {
break; }
6912 if(-1 == qdepth) { qdepth = (int) qd; }
6915 if((
int) qd != qdepth)
6918 " (format=flowed)");
6924 if(
' ' == s[i]) { ++i; }
6929 if(i && 0x0A == (
int) s[i])
6931 if(0x0D != (
int) s[i - (
size_t) 1])
6935 " (format=flowed)");
6938 else { end = i - (size_t) 1; }
6943 if(!s[++i]) { end = i; }
6947 if(llen &&
' ' == s[end - (
size_t) 1])
6950 if(!((
size_t) 3 == llen
6951 &&
'-' == s[start] &&
'-' == s[start + (
size_t) 1]))
6955 if(delsp) { --llen; --end; }
6959 while(pi + llen + (
size_t) 1 >= plen)
6961 if(!plen) { plen = 128; }
6962 p = (
char*) api_posix_realloc((
void*) para, plen *= (size_t) 2);
6971 if(error) {
break; }
6973 strncpy(¶[pi], &s[start], llen);
6977 if(error) {
break; }
6980 if(pflowed && !llen) { pell = 1; };
6985 llen = (size_t) qdepth * strlen(quote_mark);
7001 if(!para[pi]) { abort = 1; }
7005 if(
' ' == para[pi]) { check = 1; }
7008 && 0xADU == (
unsigned int) (
unsigned char) para[pi]
7009 && 0xC2U == (
unsigned int)
7010 (
unsigned char) para[pi - (
size_t) 1])
7016 && 0x8BU == (
unsigned int) (
unsigned char) para[pi]
7017 && 0x80U == (
unsigned int)
7018 (
unsigned char) para[pi - (
size_t) 1]
7019 && 0xE2U == (
unsigned int)
7020 (
unsigned char) para[pi - (
size_t) 2])
7029 llimit = (size_t) 72;
7030 if(1 == qdepth) { llimit = (size_t) 74; }
7031 else if(2 == qdepth) { llimit = (size_t) 76; }
7032 else if(3 <= qdepth) { llimit = (size_t) 78; }
7034 if(llimit - (
size_t) 20
7035 <= (size_t) qdepth * strlen(quote_mark))
7037 llimit = (size_t) 20;
7041 llimit -= (size_t) qdepth * strlen(quote_mark);
7044 ustring_len = pi - start;
7048 if(pi &&
' ' == para[pi - (
size_t) 1]) { --ustring_len; }
7050 if(llimit < enc_uc_get_glyph_count(¶[start], ustring_len))
7053 if(last_space) { pi = last_space; }
7058 if(pell) { insert_crlf = 1; }
7066 if(pell) { insert_crlf = 1; }
7068 else { last_space = pi; }
7072 if(start < pi &&
' ' == para[pi - (
size_t) 1])
7074 end = pi - (size_t) 1;
7077 llen += end - start;
7088 if(insert_crlf) { llen += (size_t) 2; }
7090 while(bi + llen + (
size_t) 1 >= len)
7092 if(!len) { len = 256; }
7093 p = (
char*) api_posix_realloc((
void*) buf, len *= (size_t) 2);
7102 if(error) {
break; }
7104 for(ii = 0; ii < (size_t) qdepth; ++ii)
7106 strncpy(&buf[bi], quote_mark, strlen(quote_mark));
7107 bi += strlen(quote_mark);
7110 strncpy(&buf[bi], ¶[start], end - start);
7113 buf[bi++] = (char) 0x0D; buf[bi++] = (char) 0x0A;
7119 buf[bi++] = (char) 0x0D; buf[bi++] = (char) 0x0A;
7122 if(error) {
break; }
7124 api_posix_free((
void*) para);
7127 PRINT_ERROR(
"MIME: Decoding of format=flowed content failed");
7128 api_posix_free((
void*) buf);
7131 else if(NULL != buf)
7166 PRINT_ERROR(
"Parsing encapsulated message aborted");
7171 array[res].
start = s;
7177 if(res) { *mpe = array; }
7215 if(!b_len || (
size_t) 70 < b_len)
7217 PRINT_ERROR(
"Invalid MIME multipart boundary delimiter");
7221 PRINT_ERROR(
"Value of ENC_BO_BUFLEN must be at least 75");
7226 strncpy(&boundary[2], b, 71);
7227 b_len += (size_t) 2;
7236 if((
size_t) 2 <= i) { end = i - (size_t) 2; }
7239 if(!strncmp(&s[i], boundary, b_len)) { match = 1; }
7243 if(!s[i]) {
break; }
7244 else if((
char) 0x0D == s[i++])
7246 if((
char) 0x0A == s[i++]) {
break; }
7253 if(!preamble && end >
start)
7256 e_len = end -
start;
7258 api_posix_realloc(array, (res + (
size_t) 1)
7270 array[res++].
len = e_len;
7282 if(res) { *mpe = array; }
7313 unsigned char c = 0;
7319 p = strchr(q, (
int)
'%');
7324 if((
size_t) 3 > strlen(p)) { res = -1;
break; }
7326 v = enc_hex_decode_nibble(p[1]);
7327 if(0 > v) { invalid = 1; }
7330 c = (
unsigned char) (v * 16);
7331 v = enc_hex_decode_nibble(p[2]);
7332 if(0 > v) { invalid = 1; }
7333 else { c += (
unsigned char) v; }
7336 if(invalid) { res = -1;
break; }
7343 if(!p[0] ||
';' == p[0]) { p[0] =
'_'; }
7345 len = strlen(&p[3]);
7346 memmove((
void*) &p[1], (
void*) &p[3], ++
len);
7351 if(-1 == res) {
PRINT_ERROR(
"Percent decoding of URI failed"); }
7392 const char* res = NULL;
7393 const char* gen_delims =
7395 const char* sub_delims =
7397 const char* unreserved =
7398 "abcdefghijklmnopqrstuvwxyz"
7399 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
7408 size_t commercial_at = 0;
7409 unsigned int nibble;
7416 if(NULL == strchr(unreserved, (
int) s[i])) { process = 1;
break; }
7419 if(!process) { res = s; }
7423 buf = (
char*) api_posix_malloc(strlen(s) * (size_t) 3 + (
size_t) 1);
7440 if(
'/' == s[i]) { encode = 0; }
7441 else if(NULL == strchr(unreserved, (
int) s[i]))
7455 if(
' ' == s[i]) { error = 1; }
7456 else if(
'%' == s[i]) { encode = 1; }
7457 else if(
'@' == s[i])
7466 else if(NULL != strchr(gen_delims, (
int) s[i]))
7471 && NULL != strchr(sub_delims, (
int) s[i]))
7480 PRINT_ERROR(
"Invalid URI scheme for percent encoding");
7485 if(error) {
break; }
7486 if(!encode) { buf[bi++] = s[i]; }
7492 nibble = ((
unsigned int) s[i] & 0xF0U) >> 4;
7493 if(10U > nibble) { buf[bi] = 0x30; }
7494 else { buf[bi] = 0x41; nibble -= 10U; }
7495 buf[bi++] += (char) nibble;
7497 nibble = (
unsigned int) s[i] & 0x0FU;
7498 if(10 > nibble) { buf[bi] = 0x30; }
7499 else { buf[bi] = 0x41; nibble -= 10U; }
7500 buf[bi++] += (char) nibble;
7507 if(!error) { res = buf; }
7516 PRINT_ERROR(
"Missing \"@\" in URI with scheme \"mailto\"");
7523 api_posix_free((
void*) buf);
7563 int enc_uc_search(
const char* s,
size_t start_pos,
const char* search_s,
7564 size_t* found_pos,
size_t* found_len)
7567 int ok = 0, ok2 = 0, ok3 = 0, ok4 = 0, ok5 = 0;
7568 size_t search_s_len;
7573 struct uc_cdc ucp_attr;
7574 long int mapping[3];
7576 size_t match_pos = 0, match_len = 0;
7577 size_t tmp_pos = 0, end_pos = 0;
7578 const char* s_nfd = NULL;
7579 const char* search_s_nfd = NULL;
7580 const char* s_cf = NULL;
7581 const char* search_s_cf = NULL;
7595 const size_t mem_factor = 6;
7599 i = 0; ucp = enc_uc_decode_utf8(s, &i);
7600 if(-1L == ucp) {
goto error; }
7601 enc_uc_lookup_cdc(ucp, &ucp_attr);
7602 if(ucp_attr.ccc) {
goto error; }
7603 s_nfd = enc_uc_normalize_to_nfd(&s[start_pos]);
7604 if(NULL == s_nfd) {
goto error; }
7608 search_s_nfd = enc_uc_normalize_to_nfd(search_s);
7609 if(NULL == search_s_nfd) {
goto error; }
7612 search_s_len = strlen(search_s_nfd);
7613 if(!search_s_len) {
goto error; }
7614 if(search_s_len * mem_factor + (
size_t) 1 < search_s_len)
7621 p = (
char*) api_posix_malloc(search_s_len * mem_factor + (
size_t) 1);
7622 if(NULL == p) {
PRINT_ERROR(
"Memory allocation failed"); }
7629 ucp = enc_uc_decode_utf8(search_s_nfd, &i);
7630 if(-1L == ucp) {
break; }
7633 enc_uc_lookup_cf(ucp, mapping);
7634 for(j = 0; (size_t) 3 > j; ++j)
7636 if(-1L == mapping[j]) {
break; }
7647 q = enc_uc_normalize_to_nfd(p);
7648 if(NULL == q) {
enc_free((
void*) p); }
7651 if(p == q) { search_s_cf = p; }
7657 match_len = strlen(search_s_cf);
7666 s_len = strlen(s_nfd);
7667 if(s_len * mem_factor + (
size_t) 1 < s_len)
7674 p = (
char*) api_posix_malloc(s_len * mem_factor + (
size_t) 1);
7675 if(NULL == p) {
PRINT_ERROR(
"Memory allocation failed"); }
7682 ucp = enc_uc_decode_utf8(s_nfd, &i);
7683 if(-1L == ucp) {
break; }
7686 enc_uc_lookup_cf(ucp, mapping);
7687 for(j = 0; (size_t) 3 > j; ++j)
7689 if(-1L == mapping[j]) {
break; }
7700 q = enc_uc_normalize_to_nfd(p);
7701 if(NULL == q) {
enc_free((
void*) p); }
7704 if(strlen(p) != strlen(q))
7718 PRINT_ERROR(
"Case folding failed, length changed (bug)");
7722 if(p == q) { s_cf = p; }
7738 p = strstr(s_cf, search_s_cf);
7741 match_pos = (size_t) (p - s_cf);
7766 ucp = enc_uc_decode_utf8(s_nfd, &i);
7767 if(-1L == ucp) {
break; }
7770 enc_uc_lookup_cf(ucp, mapping);
7771 for(j = 0; (size_t) 3 > j; ++j)
7773 if(-1L == mapping[j]) {
break; }
7784 if(ok4 && (bi == match_pos + match_len))
7793 if(ok5 && tmp_pos < end_pos)
7795 p = (
char*) api_posix_malloc(end_pos + (
size_t) 1);
7796 if(NULL == p) {
PRINT_ERROR(
"Memory allocation failed"); }
7799 strncpy(p, s_nfd, end_pos); p[end_pos] = 0;
7800 q = enc_uc_normalize_to_nfc(p);
7804 if(p != q) { api_posix_free((
void*) q); }
7807 end_pos -= (end_pos - j);
7810 q = enc_uc_normalize_to_nfc(p);
7814 if(p != q) { api_posix_free((
void*) q); }
7817 tmp_pos -= (tmp_pos - j);
7818 *found_pos = start_pos + tmp_pos;
7819 *found_len = end_pos - tmp_pos;
7825 api_posix_free((
void*) p);
7830 api_posix_free((
void*) search_s_cf);
7831 api_posix_free((
void*) s_cf);
7834 if(search_s != search_s_nfd) { api_posix_free((
void*) search_s_nfd); }
7835 if(&s[start_pos] != s_nfd) { api_posix_free((
void*) s_nfd); }