encoding.h
1 #ifndef ENCODING_H
2 #define ENCODING_H 1
3 
4 
5 /* ========================================================================== */
6 /* Include files */
7 
8 #include <limits.h>
9 
10 #include "conf.h"
11 #include "core.h"
12 #include "unicode.h"
13 
14 
15 /*! \addtogroup ENCODING */
16 /*! @{ */
17 
18 
19 /* ========================================================================== */
20 /* Data types */
21 
22 /*! \brief IDs for supported MIME content types */
24 {
25  ENC_CT_UNKNOWN,
26  ENC_CT_TEXT, /*!< Text */
27  ENC_CT_IMAGE, /*!< Picture */
28  ENC_CT_AUDIO, /*!< Audio */
29  ENC_CT_VIDEO, /*!< Video */
30  ENC_CT_MULTIPART, /*!< Content consists of multiple parts */
31  ENC_CT_MESSAGE, /*!< Content consists of an encapsulated message */
32  ENC_CT_APPLICATION /*!< Content for unknown application */
33 };
34 
35 /*! \brief IDs for supported MIME content subtypes */
37 {
38  ENC_CTS_UNKNOWN,
39  ENC_CTS_PLAIN, /*!< Plain without enrichment */
40  ENC_CTS_MIXED, /*!< Independent parts with a particular order */
41  ENC_CTS_ALTERNATIVE, /*!< Different representations of same content */
42  ENC_CTS_DIGEST, /*!< Default media type message/rfc822 */
43  ENC_CTS_RFC822, /*!< Encapulated message */
44  ENC_CTS_OCTETSTREAM /*!< Raw octet stream */
45 };
46 
47 /*! \brief IDs for supported MIME content transfer encodings */
49 {
50  ENC_CTE_UNKNOWN,
51  ENC_CTE_7BIT, /*!< ASCII text */
52  ENC_CTE_8BIT, /*!< Raw non ASCII text */
53  ENC_CTE_BIN, /*!< Arbitrary binary data */
54  ENC_CTE_Q, /*!< MIME quoted-printable */
55  ENC_CTE_B /*!< MIME base64 */
56 };
57 
58 /*! \brief IDs for supported MIME character sets */
60 {
61  ENC_CS_UNKNOWN,
62  ENC_CS_ASCII, /*!< ANSI X3.4 */
63  ENC_CS_ISO8859_1, /*!< ISO 8859-1 */
64  ENC_CS_ISO8859_2, /*!< ISO 8859-2 */
65  ENC_CS_ISO8859_3, /*!< ISO 8859-3 */
66  ENC_CS_ISO8859_4, /*!< ISO 8859-4 */
67  ENC_CS_ISO8859_5, /*!< ISO 8859-5 */
68  ENC_CS_ISO8859_6, /*!< ISO 8859-6 */
69  ENC_CS_ISO8859_7, /*!< ISO 8859-7 */
70  ENC_CS_ISO8859_8, /*!< ISO 8859-8 */
71  ENC_CS_ISO8859_9, /*!< ISO 8859-9 */
72  ENC_CS_ISO8859_10, /*!< ISO 8859-10 */
73  ENC_CS_ISO8859_11, /*!< ISO 8859-11 */
74  ENC_CS_ISO8859_13, /*!< ISO 8859-13 */
75  ENC_CS_ISO8859_14, /*!< ISO 8859-14 */
76  ENC_CS_ISO8859_15, /*!< ISO 8859-15 */
77  ENC_CS_ISO8859_16, /*!< ISO 8859-16 */
78  ENC_CS_ISO8859_X, /*!< Generic fallback (handle only ASCII characters) */
79  ENC_CS_MACINTOSH, /*!< Mac Roman */
80  ENC_CS_KOI8R, /*!< Kod Obmena Informatsiey 8 bit (russion) */
81  ENC_CS_KOI8U, /*!< Kod Obmena Informatsiey 8 bit (ukrainian) */
82  ENC_CS_WINDOWS_1250, /*!< Windows codepage 1250 */
83  ENC_CS_WINDOWS_1251, /*!< Windows codepage 1251 */
84  ENC_CS_WINDOWS_1252, /*!< Windows codepage 1252 */
85  ENC_CS_WINDOWS_1253, /*!< Windows codepage 1253 */
86  ENC_CS_WINDOWS_1254, /*!< Windows codepage 1254 */
87  ENC_CS_WINDOWS_1255, /*!< Windows codepage 1255 */
88  ENC_CS_WINDOWS_1256, /*!< Windows codepage 1256 */
89  ENC_CS_WINDOWS_1257, /*!< Windows codepage 1257 */
90  ENC_CS_WINDOWS_1258, /*!< Windows codepage 1258 */
91  ENC_CS_IBM437, /*!< IBM codepage 437 */
92  ENC_CS_IBM775, /*!< IBM codepage 775 */
93  ENC_CS_IBM850, /*!< IBM codepage 850 */
94  ENC_CS_IBM852, /*!< IBM codepage 852 */
95  ENC_CS_IBM858, /*!< IBM codepage 858 */
96  ENC_CS_ISO2022_JP, /*!< ISO 2022-JP */
97  ENC_CS_CESU_8, /*!< Compatibility Encoding Scheme for UTF-16 */
98  ENC_CS_UTF_7, /*!< UCS Transformation Format 7 bit */
99  ENC_CS_UTF_8, /*!< UCS Transformation Format 8 bit */
100  ENC_CS_UTF_16BE /*!< UCS Transformation Format 16 bit big endian */
101 };
102 
103 /*! \brief IDs for supported MIME content disposition */
105 {
106  ENC_CD_UNKNOWN,
107  ENC_CD_INLINE,
108  ENC_CD_ATTACHMENT
109 };
110 
111 /*! \brief MIME content type information */
113 {
114  enum enc_mime_ct_type type; /*!< Content type ID */
115  enum enc_mime_ct_subtype subtype; /*!< Content subtype ID */
116  enum enc_mime_cs charset; /*!< Character set ID */
117  /*! Content type flags (use \c ENC_CT_FLAG_* constants) */
118  unsigned int flags;
119 };
120 
121 /*! \brief Locations of MIME multipart entities */
123 {
124  const char* start; /*!< Start index of entity */
125  size_t len; /*!< Length of entity */
126 };
127 
128 /*! \brief URI schemes */
130 {
131  ENC_URI_SCHEME_INVALID,
132  ENC_URI_SCHEME_HTTP, /*!< Hyper Text Transfer Protocol */
133  ENC_URI_SCHEME_FTP, /*!< File Transfer Protocol */
134  ENC_URI_SCHEME_NEWS, /*!< News group or article */
135  ENC_URI_SCHEME_MAILTO /*!< E-mail */
136 };
137 
138 /*! \brief Wildmat array element (for RFC 3977 wildmat-pattern) */
140 {
141  int negate;
142  const char* ere;
143 };
144 
145 
146 /* ========================================================================== */
147 /* Constants */
148 
149 /*! \brief Buffer size for content transfer encoding name strings */
150 #define ENC_CTE_BUFLEN (size_t) 32
151 
152 /*! \brief Buffer size for character set name strings */
153 #define ENC_CS_BUFLEN (size_t) 32
154 
155 /*! \name Content type flags according to RFC 3676
156  *
157  * The flags can be bitwise ORed together.
158  *
159  * \note The parameter "InsLine" is experimental (not defined in RFC 3676).
160  */
161 /*! @{ */
162 #define ENC_CT_FLAG_FLOWED 0x01U
163 #define ENC_CT_FLAG_DELSP 0x02U
164 #define ENC_CT_FLAG_INSLINE 0x04U
165 /*! @} */
166 
167 /*! \brief Buffer size for multipart boundary strings
168  *
169  * RFC 2046 limits the length of the boundary delimiter to 70 characters.
170  * There are always two hyphens before the boundary delimiter.
171  * At the end of the multipart body, there are 2 hyphens after the boundary
172  * delimiter.
173  * Finally we need space for the NUL character to terminate the string.
174  * Result: 70 + 2 + 2 + 1 = 75
175  */
176 #define ENC_BO_BUFLEN (size_t) 75
177 
178 /*! \brief Delimiter string to print between article header and body parts */
179 #define ENC_DELIMITER \
180  "________________________________________" \
181  "_______________________________________|" "\n" \
182  " " \
183  " |" "\n"
184 
185 /*! @} */
186 
187 
188 /* ========================================================================== */
189 /* Function prototypes */
190 
191 const char* enc_create_name_addr(const char*, size_t);
192 unsigned long int enc_lines_decode(const char*);
193 void enc_convert_lines_to_string(char*, unsigned long int);
194 core_time_t enc_timestamp_decode(const char*);
196 int enc_get_iso8601_utc(char*);
197 int enc_convert_iso8601_to_posix(core_time_t*, const char*);
198 int enc_convert_iso8601_to_timestamp(const char**, const char*);
199 int enc_convert_anum_to_ascii(char[17], size_t*, core_anum_t);
200 int enc_convert_ascii_to_anum(core_anum_t*, const char*, int);
201 int enc_convert_octet_to_hex(char*, unsigned int);
202 void enc_rot13(char*);
203 const char* enc_extract_addr_spec(const char*);
204 int enc_ascii_check(const char*);
205 int enc_ascii_check_alpha(const char*);
206 int enc_ascii_check_digit(const char*);
207 int enc_ascii_check_printable(const char*);
210 int enc_uc_check_utf8(const char*);
211 const char* enc_uc_repair_utf8(const char*);
212 void enc_uc_encode_utf8(char*, size_t*, long int*, size_t*);
213 int enc_create_wildmat(struct enc_wm_pattern**, const char*);
214 void enc_destroy_wildmat(struct enc_wm_pattern**, int);
215 const char* enc_convert_canonical_to_posix(const char*, int, int);
216 const char* enc_convert_posix_to_canonical(const char*);
217 const char* enc_convert_to_utf8_nfc(enum enc_mime_cs, const char*);
218 const char* enc_convert_to_8bit(enum enc_mime_cs*, const char*, const char**);
219 int enc_mime_encode_base64(const char**, const char*, size_t);
220 int enc_mime_word_encode(const char**, const char*, size_t);
221 int enc_mime_word_decode(const char**, const char*);
222 int enc_mime_para_decode(const char**, const char*, int);
223 void enc_mime_get_ct(struct enc_mime_ct*, const char*, char*);
224 enum enc_mime_cte enc_mime_get_cte(const char*);
225 void enc_mime_get_cd(const char*, enum enc_mime_cd*, const char**);
226 int enc_mime_save_to_file(const char*, enum enc_mime_cte, const char*);
227 const char* enc_mime_decode(enum enc_mime_cte, enum enc_mime_cs, const char*);
228 const char* enc_mime_flowed_decode(const char*, unsigned int, unsigned int);
229 size_t enc_mime_message(const char*, size_t, struct enc_mime_mpe**);
230 size_t enc_mime_multipart(const char*, const char*, struct enc_mime_mpe**);
231 int enc_percent_decode(char*, int);
232 const char* enc_uri_percent_encode(const char*, enum enc_uri_scheme);
233 int enc_uc_search(const char*, size_t, const char*, size_t*, size_t*);
234 void enc_free(void*);
235 
236 
237 #endif /* ENCODING_H */
238 
239 /* EOF */
ENC_CS_ISO8859_14
Definition: encoding.h:75
enc_get_iso8601_utc
int enc_get_iso8601_utc(char *)
Get current UTC date in ISO 8601 conformant format.
Definition: encoding.c:4395
ENC_CS_ISO8859_5
Definition: encoding.h:67
enc_ascii_convert_distribution
void enc_ascii_convert_distribution(char *)
Convert body of distribution header field.
Definition: encoding.c:5074
ENC_CS_KOI8U
Definition: encoding.h:81
enc_mime_ct::subtype
enum enc_mime_ct_subtype subtype
Definition: encoding.h:115
ENC_CS_IBM850
Definition: encoding.h:93
ENC_URI_SCHEME_HTTP
Definition: encoding.h:132
ENC_CT_VIDEO
Definition: encoding.h:29
ENC_CS_ASCII
Definition: encoding.h:62
ENC_CTE_BIN
Definition: encoding.h:53
enc_mime_mpe
Locations of MIME multipart entities.
Definition: encoding.h:122
enc_free
void enc_free(void *)
Free an object allocated by encoding module.
Definition: encoding.c:8868
ENC_CS_IBM775
Definition: encoding.h:92
enc_convert_to_utf8_nfc
const char * enc_convert_to_utf8_nfc(enum enc_mime_cs, const char *)
Convert string from supported character set to Unicode (UTF-8 NFC)
Definition: encoding.c:5788
core_anum_t
#define core_anum_t
Article number data type (value zero is always reserved)
Definition: core.h:24
enc_mime_cs
enc_mime_cs
IDs for supported MIME character sets.
Definition: encoding.h:59
enc_uri_percent_encode
const char * enc_uri_percent_encode(const char *, enum enc_uri_scheme)
Percent encoding for URI content.
Definition: encoding.c:8402
ENC_URI_SCHEME_NEWS
Definition: encoding.h:134
ENC_CT_MULTIPART
Definition: encoding.h:30
enc_mime_flowed_decode
const char * enc_mime_flowed_decode(const char *, unsigned int, unsigned int)
Decode MIME "text/plain" content with "format=flowed" parameter.
Definition: encoding.c:7856
core_time_t
unsigned long int core_time_t
Time in seconds since the epoche (in terms of POSIX.1)
Definition: core.h:54
enc_mime_save_to_file
int enc_mime_save_to_file(const char *, enum enc_mime_cte, const char *)
Decode MIME content transfer encoding and save to file.
Definition: encoding.c:7717
enc_rot13
void enc_rot13(char *)
Encode or decode data with ROT13 algorithm.
Definition: encoding.c:4692
ENC_CS_ISO8859_4
Definition: encoding.h:66
enc_mime_word_decode
int enc_mime_word_decode(const char **, const char *)
Decode header field containing potential MIME encoded-word tokens.
Definition: encoding.c:6518
ENC_CS_ISO8859_8
Definition: encoding.h:70
ENC_CS_ISO8859_10
Definition: encoding.h:72
enc_mime_get_cte
enum enc_mime_cte enc_mime_get_cte(const char *)
Decode content transfer encoding description.
Definition: encoding.c:7536
ENC_CS_WINDOWS_1256
Definition: encoding.h:88
enc_ascii_check
int enc_ascii_check(const char *)
Verify ASCII encoding.
Definition: encoding.c:4944
ENC_CTE_8BIT
Definition: encoding.h:52
enc_convert_ascii_to_anum
int enc_convert_ascii_to_anum(core_anum_t *, const char *, int)
Convert number from ASCII to numerical format.
Definition: encoding.c:4604
ENC_CS_KOI8R
Definition: encoding.h:80
enc_percent_decode
int enc_percent_decode(char *, int)
Percent decoder.
Definition: encoding.c:8318
ENC_CS_ISO8859_X
Definition: encoding.h:78
ENC_CS_ISO8859_16
Definition: encoding.h:77
enc_mime_ct::type
enum enc_mime_ct_type type
Definition: encoding.h:114
ENC_CT_APPLICATION
Definition: encoding.h:32
enc_mime_mpe::start
const char * start
Definition: encoding.h:124
ENC_CTE_Q
Definition: encoding.h:54
enc_convert_lines_to_string
void enc_convert_lines_to_string(char *, unsigned long int)
Convert number of lines to string.
Definition: encoding.c:4119
ENC_CS_WINDOWS_1255
Definition: encoding.h:87
enc_mime_mpe::len
size_t len
Definition: encoding.h:125
enc_wm_pattern
Wildmat array element (for RFC 3977 wildmat-pattern)
Definition: encoding.h:139
ENC_CS_ISO8859_6
Definition: encoding.h:68
ENC_CS_ISO8859_7
Definition: encoding.h:69
enc_convert_posix_to_iso8601
int enc_convert_posix_to_iso8601(char *, core_time_t)
Convert POSIX timestamp to ISO 8601 conformant local date and time.
Definition: encoding.c:4342
enc_extract_addr_spec
const char * enc_extract_addr_spec(const char *)
Extract addr-spec token from RFC 5322 mailbox.
Definition: encoding.c:4830
enc_ascii_check_alpha
int enc_ascii_check_alpha(const char *)
Check for ASCII alphabetic characters.
Definition: encoding.c:4972
enc_mime_ct_subtype
enc_mime_ct_subtype
IDs for supported MIME content subtypes.
Definition: encoding.h:36
enc_uc_encode_utf8
void enc_uc_encode_utf8(char *, size_t *, long int *, size_t *)
Encode Unicode codepoints to UTF-8.
Definition: encoding.c:1008
enc_timestamp_decode
core_time_t enc_timestamp_decode(const char *)
Decode canonical timestamp to POSIX time (seconds since epoche)
Definition: encoding.c:4154
enc_ascii_check_printable
int enc_ascii_check_printable(const char *)
Check for printable ASCII characters.
Definition: encoding.c:5022
ENC_CS_ISO2022_JP
Definition: encoding.h:96
ENC_CS_WINDOWS_1257
Definition: encoding.h:89
ENC_CT_AUDIO
Definition: encoding.h:28
enc_ascii_check_digit
int enc_ascii_check_digit(const char *)
Check for ASCII digit characters.
Definition: encoding.c:4995
enc_mime_message
size_t enc_mime_message(const char *, size_t, struct enc_mime_mpe **)
Extract MIME encapsulated message.
Definition: encoding.c:8168
ENC_CS_ISO8859_13
Definition: encoding.h:74
enc_mime_para_decode
int enc_mime_para_decode(const char **, const char *, int)
Decode header field containing potential MIME parameters.
Definition: encoding.c:6817
ENC_CS_WINDOWS_1253
Definition: encoding.h:85
enc_uc_check_utf8
int enc_uc_check_utf8(const char *)
Verify UTF-8 encoding.
Definition: encoding.c:5162
enc_mime_decode
const char * enc_mime_decode(enum enc_mime_cte, enum enc_mime_cs, const char *)
Decode MIME text content to UTF-8 NFC.
Definition: encoding.c:7801
enc_convert_anum_to_ascii
int enc_convert_anum_to_ascii(char[17], size_t *, core_anum_t)
Convert article number from numerical format to ASCII.
Definition: encoding.c:4558
enc_mime_ct_type
enc_mime_ct_type
IDs for supported MIME content types.
Definition: encoding.h:23
ENC_CTS_MIXED
Definition: encoding.h:40
enc_mime_ct::charset
enum enc_mime_cs charset
Definition: encoding.h:116
ENC_URI_SCHEME_MAILTO
Definition: encoding.h:135
ENC_CTS_PLAIN
Definition: encoding.h:39
ENC_CTS_DIGEST
Definition: encoding.h:42
enc_destroy_wildmat
void enc_destroy_wildmat(struct enc_wm_pattern **, int)
Destroy wildmat pattern array.
Definition: encoding.c:5537
enc_convert_octet_to_hex
int enc_convert_octet_to_hex(char *, unsigned int)
Convert octet to hexadecimal (ASCII) format.
Definition: encoding.c:4664
ENC_CS_WINDOWS_1258
Definition: encoding.h:90
ENC_CTE_7BIT
Definition: encoding.h:51
ENC_CTS_OCTETSTREAM
Definition: encoding.h:44
ENC_CTS_RFC822
Definition: encoding.h:43
ENC_URI_SCHEME_FTP
Definition: encoding.h:133
ENC_CS_UTF_7
Definition: encoding.h:98
ENC_CS_UTF_16BE
Definition: encoding.h:100
ENC_CS_MACINTOSH
Definition: encoding.h:79
enc_mime_ct
MIME content type information.
Definition: encoding.h:112
enc_ascii_convert_to_printable
void enc_ascii_convert_to_printable(char *)
Convert to printable ASCII format.
Definition: encoding.c:5049
enc_convert_posix_to_canonical
const char * enc_convert_posix_to_canonical(const char *)
Convert from local (POSIX) to canonical (RFC 822) form.
Definition: encoding.c:5695
enc_uri_scheme
enc_uri_scheme
URI schemes.
Definition: encoding.h:129
ENC_CS_ISO8859_9
Definition: encoding.h:71
ENC_CS_IBM852
Definition: encoding.h:94
ENC_CS_WINDOWS_1251
Definition: encoding.h:83
ENC_CS_WINDOWS_1252
Definition: encoding.h:84
ENC_CS_ISO8859_11
Definition: encoding.h:73
enc_create_wildmat
int enc_create_wildmat(struct enc_wm_pattern **, const char *)
Create wildmat pattern array.
Definition: encoding.c:5371
enc_convert_iso8601_to_timestamp
int enc_convert_iso8601_to_timestamp(const char **, const char *)
Convert ISO 8601 conformant date to canonical timestamp.
Definition: encoding.c:4503
ENC_CTS_ALTERNATIVE
Definition: encoding.h:41
enc_uc_repair_utf8
const char * enc_uc_repair_utf8(const char *)
Repair UTF-8 encoding.
Definition: encoding.c:5181
ENC_CS_IBM858
Definition: encoding.h:95
ENC_CS_ISO8859_15
Definition: encoding.h:76
ENC_CS_WINDOWS_1254
Definition: encoding.h:86
ENC_CT_MESSAGE
Definition: encoding.h:31
ENC_CT_IMAGE
Definition: encoding.h:27
ENC_CS_ISO8859_3
Definition: encoding.h:65
ENC_CS_CESU_8
Definition: encoding.h:97
enc_mime_encode_base64
int enc_mime_encode_base64(const char **, const char *, size_t)
Encode binary data to base64.
Definition: encoding.c:4744
enc_convert_canonical_to_posix
const char * enc_convert_canonical_to_posix(const char *, int, int)
Convert from canonical (RFC 822) to local (POSIX) form.
Definition: encoding.c:5579
enc_create_name_addr
const char * enc_create_name_addr(const char *, size_t)
Create a "name-addr" construct according to RFC 5322.
Definition: encoding.c:3952
enc_lines_decode
unsigned long int enc_lines_decode(const char *)
Decode number of lines.
Definition: encoding.c:4098
ENC_CTE_B
Definition: encoding.h:55
enc_mime_get_cd
void enc_mime_get_cd(const char *, enum enc_mime_cd *, const char **)
Decode content disposition.
Definition: encoding.c:7619
ENC_CS_ISO8859_2
Definition: encoding.h:64
enc_mime_ct::flags
unsigned int flags
Definition: encoding.h:118
ENC_CS_WINDOWS_1250
Definition: encoding.h:82
ENC_CS_UTF_8
Definition: encoding.h:99
enc_mime_cd
enc_mime_cd
IDs for supported MIME content disposition.
Definition: encoding.h:104
ENC_CS_ISO8859_1
Definition: encoding.h:63
enc_mime_multipart
size_t enc_mime_multipart(const char *, const char *, struct enc_mime_mpe **)
Parse MIME multipart content.
Definition: encoding.c:8210
enc_mime_word_encode
int enc_mime_word_encode(const char **, const char *, size_t)
Encode header field body using MIME encoded-word tokens.
Definition: encoding.c:6103
enc_mime_get_ct
void enc_mime_get_ct(struct enc_mime_ct *, const char *, char *)
Decode MIME "Content-Type" header field.
Definition: encoding.c:7235
ENC_CS_IBM437
Definition: encoding.h:91
enc_mime_cte
enc_mime_cte
IDs for supported MIME content transfer encodings.
Definition: encoding.h:48
enc_convert_to_8bit
const char * enc_convert_to_8bit(enum enc_mime_cs *, const char *, const char **)
Convert string from Unicode (UTF-8 NFC) to an 8bit character set.
Definition: encoding.c:6005
enc_convert_iso8601_to_posix
int enc_convert_iso8601_to_posix(core_time_t *, const char *)
Convert ISO 8601 conformant UTC date and time to POSIX timestamp.
Definition: encoding.c:4450
ENC_CT_TEXT
Definition: encoding.h:26

Generated at 2024-04-27 using  doxygen