The functions in this group should be conformant to the following standards: ANSI X3.4, ISO 2022, ISO 8601, ISO 8859, ISO 10646, RFC 1468, RFC 2045, RFC 2046, RFC 2047, RFC 2049, RFC 2152, RFC 2183, RFC 2231, RFC 2646, RFC 3629, RFC 3676, RFC 5198, RFC 5536, RFC 6657, POSIX.1-1996, Unicode 14.0.0. More...
Classes | |
struct | enc_mime_ct |
MIME content type information. More... | |
struct | enc_mime_mpe |
Locations of MIME multipart entities. More... | |
struct | enc_wm_pattern |
Wildmat array element (for RFC 3977 wildmat-pattern) More... | |
Macros | |
#define | MAIN_ERR_PREFIX "ENC: " |
Message prefix for ENCODING module. | |
#define | ENC_UC_NORM_DEBUG 0 |
#define | ENC_MIME_PARA_LENGTH_MAX (size_t) 127 |
Maximum length of MIME parameter attribute tokens. | |
#define | ENC_MIME_HEADER_FOLD_ASCII_LINES 1 |
MIME word encoder folding behaviour. More... | |
#define | ENC_UA "\xEF\xBF\xBD" /* U+FFFD */ |
#define | ENC_RC 0xFFFDL /* U+FFFD */ |
#define | ENC_UC_DECOMPOSITION_BUFSIZE (size_t) 16 |
#define | ENC_HDR_BUFSIZE (size_t) 998 |
#define | ENC_FMT_BUFLEN (size_t) 7 |
#define | ENC_CTE_BUFLEN (size_t) 32 |
Buffer size for content transfer encoding name strings. | |
#define | ENC_CS_BUFLEN (size_t) 32 |
Buffer size for character set name strings. | |
#define | ENC_BO_BUFLEN (size_t) 75 |
Buffer size for multipart boundary strings. More... | |
#define | ENC_DELIMITER |
Delimiter string to print between article header and body parts. More... | |
Functions | |
void | enc_uc_encode_utf8 (char *buf, size_t *i, long int *dbuf, size_t *di) |
Encode Unicode codepoints to UTF-8. More... | |
const char * | enc_create_name_addr (const char *data, size_t offset) |
Create a "name-addr" construct according to RFC 5322. More... | |
unsigned long int | enc_lines_decode (const char *lines) |
Decode number of lines. More... | |
void | enc_convert_lines_to_string (char *l, unsigned long int l_raw) |
Convert number of lines to string. More... | |
core_time_t | enc_timestamp_decode (const char *timestamp) |
Decode canonical timestamp to POSIX time (seconds since epoche) More... | |
int | enc_convert_posix_to_iso8601 (char *isodate, core_time_t pts) |
Convert POSIX timestamp to ISO 8601 conformant local date and time. More... | |
int | enc_get_iso8601_utc (char *isodate) |
Get current UTC date in ISO 8601 conformant format. More... | |
int | enc_convert_iso8601_to_posix (core_time_t *pts, const char *isodate) |
Convert ISO 8601 conformant UTC date and time to POSIX timestamp. More... | |
int | enc_convert_iso8601_to_timestamp (const char **ts, const char *isodate) |
Convert ISO 8601 conformant date to canonical timestamp. More... | |
int | enc_convert_anum_to_ascii (char result[17], size_t *len, core_anum_t wm) |
Convert article number from numerical format to ASCII. More... | |
int | enc_convert_ascii_to_anum (core_anum_t *result, const char *wm, int len) |
Convert number from ASCII to numerical format. More... | |
int | enc_convert_octet_to_hex (char *result, unsigned int octet) |
Convert octet to hexadecimal (ASCII) format. More... | |
void | enc_rot13 (char *data) |
Encode or decode data with ROT13 algorithm. More... | |
int | enc_mime_encode_base64 (const char **enc, const char *data, size_t len) |
Encode binary data to base64. More... | |
const char * | enc_extract_addr_spec (const char *mailbox) |
Extract addr-spec token from RFC 5322 mailbox. More... | |
int | enc_ascii_check (const char *s) |
Verify ASCII encoding. More... | |
int | enc_ascii_check_alpha (const char *s) |
Check for ASCII alphabetic characters. More... | |
int | enc_ascii_check_digit (const char *s) |
Check for ASCII digit characters. More... | |
int | enc_ascii_check_printable (const char *s) |
Check for printable ASCII characters. More... | |
void | enc_ascii_convert_to_printable (char *s) |
Convert to printable ASCII format. More... | |
void | enc_ascii_convert_distribution (char *s) |
Convert body of distribution header field. More... | |
int | enc_uc_check_utf8 (const char *s) |
Verify UTF-8 encoding. More... | |
const char * | enc_uc_repair_utf8 (const char *s) |
Repair UTF-8 encoding. More... | |
int | enc_create_wildmat (struct enc_wm_pattern **obj, const char *wm) |
Create wildmat pattern array. More... | |
void | enc_destroy_wildmat (struct enc_wm_pattern **obj, int num) |
Destroy wildmat pattern array. More... | |
const char * | enc_convert_canonical_to_posix (const char *s, int rcr, int rlf) |
Convert from canonical (RFC 822) to local (POSIX) form. More... | |
const char * | enc_convert_posix_to_canonical (const char *s) |
Convert from local (POSIX) to canonical (RFC 822) form. More... | |
const char * | enc_convert_to_utf8_nfc (enum enc_mime_cs charset, const char *s) |
Convert string from supported character set to Unicode (UTF-8 NFC) More... | |
const char * | enc_convert_to_8bit (enum enc_mime_cs *charset, const char *s, const char **cs_iana) |
Convert string from Unicode (UTF-8 NFC) to an 8bit character set. More... | |
int | enc_mime_word_encode (const char **r, const char *b, size_t pl) |
Encode header field body using MIME encoded-word tokens. More... | |
int | enc_mime_word_decode (const char **r, const char *b) |
Decode header field containing potential MIME encoded-word tokens. More... | |
int | enc_mime_para_decode (const char **r, const char *b, int m) |
Decode header field containing potential MIME parameters. More... | |
void | enc_mime_get_ct (struct enc_mime_ct *ct, const char *hf_body, char *bo) |
Decode MIME "Content-Type" header field. More... | |
enum enc_mime_cte | enc_mime_get_cte (const char *hf_body) |
Decode content transfer encoding description. More... | |
void | enc_mime_get_cd (const char *hf_body, enum enc_mime_cd *type, const char **filename) |
Decode content disposition. More... | |
int | enc_mime_save_to_file (const char *pn, enum enc_mime_cte cte, const char *entity) |
Decode MIME content transfer encoding and save to file. More... | |
const char * | enc_mime_decode (enum enc_mime_cte cte, enum enc_mime_cs charset, const char *s) |
Decode MIME text content to UTF-8 NFC. More... | |
const char * | enc_mime_flowed_decode (const char *s, unsigned int delsp, unsigned int insline) |
Decode MIME "text/plain" content with "format=flowed" parameter. More... | |
size_t | enc_mime_message (const char *s, size_t len, struct enc_mime_mpe **mpe) |
Extract MIME encapsulated message. More... | |
size_t | enc_mime_multipart (const char *s, const char *b, struct enc_mime_mpe **mpe) |
Parse MIME multipart content. More... | |
int | enc_percent_decode (char *s, int clean) |
Percent decoder. More... | |
const char * | enc_uri_percent_encode (const char *s, enum enc_uri_scheme sch) |
Percent encoding for URI content. More... | |
int | enc_uc_search (const char *s, size_t start_pos, const char *search_s, size_t *found_pos, size_t *found_len) |
void | enc_free (void *p) |
Free an object allocated by encoding module. More... | |
The functions in this group should be conformant to the following standards: ANSI X3.4, ISO 2022, ISO 8601, ISO 8859, ISO 10646, RFC 1468, RFC 2045, RFC 2046, RFC 2047, RFC 2049, RFC 2152, RFC 2183, RFC 2231, RFC 2646, RFC 3629, RFC 3676, RFC 5198, RFC 5536, RFC 6657, POSIX.1-1996, Unicode 14.0.0.
iconv()
because on old operating systems there may be no Unicode support. And even on such old machines we don't want an external dependency from GNU iconv. iconv()
on request. #define ENC_BO_BUFLEN (size_t) 75 |
Buffer size for multipart boundary strings.
RFC 2046 limits the length of the boundary delimiter to 70 characters. There are always two hyphens before the boundary delimiter. At the end of the multipart body, there are 2 hyphens after the boundary delimiter. Finally we need space for the NUL character to terminate the string. Result: 70 + 2 + 2 + 1 = 75
Definition at line 176 of file encoding.h.
#define ENC_DELIMITER |
Delimiter string to print between article header and body parts.
Definition at line 179 of file encoding.h.
#define ENC_MIME_HEADER_FOLD_ASCII_LINES 1 |
MIME word encoder folding behaviour.
If this is defined to nonzero, all lines of RFC 2047 conformant header fields that contain MIME encoded words are folded before 76 characters. Otherwise all lines that contain no encoded-words are not folded before 998 characters.
RFC 2047 is ambigous regarding this rule:
https://tools.ietf.org/html/rfc2047#section-2
The default value 1 is safe in any case. Please read section 2, paragraph 5 carefully before redefining this to 0!
Definition at line 73 of file encoding.c.
enum enc_mime_cs |
IDs for supported MIME character sets.
Definition at line 59 of file encoding.h.
enum enc_mime_ct_subtype |
IDs for supported MIME content subtypes.
Definition at line 36 of file encoding.h.
enum enc_mime_ct_type |
IDs for supported MIME content types.
Definition at line 23 of file encoding.h.
enum enc_mime_cte |
IDs for supported MIME content transfer encodings.
Enumerator | |
---|---|
ENC_CTE_7BIT | ASCII text |
ENC_CTE_8BIT | Raw non ASCII text |
ENC_CTE_BIN | Arbitrary binary data |
ENC_CTE_Q | MIME quoted-printable |
ENC_CTE_B | MIME base64 |
Definition at line 48 of file encoding.h.
enum enc_uri_scheme |
URI schemes.
Enumerator | |
---|---|
ENC_URI_SCHEME_HTTP | Hyper Text Transfer Protocol |
ENC_URI_SCHEME_FTP | File Transfer Protocol |
ENC_URI_SCHEME_NEWS | News group or article |
ENC_URI_SCHEME_MAILTO |
Definition at line 129 of file encoding.h.
int enc_ascii_check | ( | const char * | s | ) |
Verify ASCII encoding.
[in] | s | String to verify |
Definition at line 4944 of file encoding.c.
Referenced by core_get_signature(), core_post_article(), and core_subscribe_group().
int enc_ascii_check_alpha | ( | const char * | s | ) |
Check for ASCII alphabetic characters.
[in] | s | Pointer to single character |
Locale independent check based on ASCII.
Definition at line 4972 of file encoding.c.
Referenced by enc_ascii_convert_distribution().
int enc_ascii_check_digit | ( | const char * | s | ) |
Check for ASCII digit characters.
[in] | s | Pointer to single character |
Locale independent check based on ASCII.
Definition at line 4995 of file encoding.c.
Referenced by enc_ascii_convert_distribution(), and enc_convert_ascii_to_anum().
int enc_ascii_check_printable | ( | const char * | s | ) |
Check for printable ASCII characters.
[in] | s | String to check |
HT (9) and SPACE (32, 0x20) inside s are treated as "printable" to make this function suitable to check header field bodies according to RFC 5322.
Definition at line 5022 of file encoding.c.
Referenced by enc_mime_word_encode(), and ext_download_file().
void enc_ascii_convert_distribution | ( | char * | s | ) |
Convert body of distribution header field.
[in] | s | String with unfolded body to convert |
This function process s in-place. The result will always be shorter or same length as the original data.
Every element of dist-list
that contains invalid characters is removed.
Definition at line 5074 of file encoding.c.
References enc_ascii_check_alpha(), enc_ascii_check_digit(), and PRINT_ERROR.
void enc_ascii_convert_to_printable | ( | char * | s | ) |
Convert to printable ASCII format.
[in] | s | String to convert |
This function should be used to repair a string in-place after the function enc_ascii_check_printable() have reported an error.
Every invalid byte is replaced with '?'.
Definition at line 5049 of file encoding.c.
int enc_convert_anum_to_ascii | ( | char | result[17], |
size_t * | len, | ||
core_anum_t | wm | ||
) |
Convert article number from numerical format to ASCII.
[out] | result | Pointer to result string buffer (Size: 17 bytes) |
[out] | len | Pointer to length of result string (Maximum value: 16) |
[in] | wm | Article number (watermark) to convert |
RFC 3977 allows max. 16 digits.
Definition at line 4558 of file encoding.c.
References CORE_ANUM_T_MAX, and PRINT_ERROR.
int enc_convert_ascii_to_anum | ( | core_anum_t * | result, |
const char * | wm, | ||
int | len | ||
) |
Convert number from ASCII to numerical format.
[out] | result | Pointer to result |
[in] | wm | Article number (watermark) string to convert |
[in] | len | Length of string wm |
Max. 20 digits are supported, sufficient for 64-bit article numbers. RFC 3977 allows max. 16 digits.
This function correctly processes leading zeros and does not use standard library functions with locale dependent behaviour.
Definition at line 4604 of file encoding.c.
References enc_ascii_check_digit(), NNTP_ANUM_T_MAX, and PRINT_ERROR.
const char* enc_convert_canonical_to_posix | ( | const char * | s, |
int | rcr, | ||
int | rlf | ||
) |
Convert from canonical (RFC 822) to local (POSIX) form.
[in] | s | String to convert |
[in] | rcr | Replace invalid CR control characters if nonzero |
[in] | rlf | Replace invalid LF control characters if nonzero |
According to RFC 822 and RFC 2049 this function accepts plain text article content in canonical form and convert the CRLF line breaks to local (POSIX, single LF) form.
ENC_RC
can be inserted as replacement for CR or/and LF by setting rcr or/and rlf respectively to a nonzero value.On success the caller is responsible to free the allocated memory.
Definition at line 5579 of file encoding.c.
Referenced by core_convert_canonical_to_posix().
int enc_convert_iso8601_to_posix | ( | core_time_t * | pts, |
const char * | isodate | ||
) |
Convert ISO 8601 conformant UTC date and time to POSIX timestamp.
[out] | pts | Seconds since epoche (as defined by POSIX.1) |
[in] | isodate | Buffer for date string (at least 20 characters) |
YYYY-MM-DDTHH-MM-SSZ
format (UTC).Definition at line 4450 of file encoding.c.
References PRINT_ERROR.
int enc_convert_iso8601_to_timestamp | ( | const char ** | ts, |
const char * | isodate | ||
) |
Convert ISO 8601 conformant date to canonical timestamp.
[out] | ts | Pointer to canonical timestamp as defined by RFC 5322 |
[in] | isodate | ISO 8601 date string (exactly 10 characters) |
YYYY-MM-DD
format (only date, time is not supported).Definition at line 4503 of file encoding.c.
References PRINT_ERROR.
void enc_convert_lines_to_string | ( | char * | l, |
unsigned long int | l_raw | ||
) |
Convert number of lines to string.
[out] | l | Pointer to result buffer (at least 11 characters large) |
[in] | l_raw | Number of lines |
"Error"
is returned. Definition at line 4119 of file encoding.c.
int enc_convert_octet_to_hex | ( | char * | result, |
unsigned int | octet | ||
) |
Convert octet to hexadecimal (ASCII) format.
[out] | result | Pointer to result |
[in] | octet | Octet to convert |
Exactly 3 bytes are written to the buffer pointed to by result . If octet is smaller than 16, a leading zero is created. On error, the result "XX" is generated. The result is always a zero terminated string.
Definition at line 4664 of file encoding.c.
const char* enc_convert_posix_to_canonical | ( | const char * | s | ) |
Convert from local (POSIX) to canonical (RFC 822) form.
[in] | s | String to convert |
According to RFC 822 and RFC 2049 this function accepts plain text article content in local (POSIX) form and convert the single LF line breaks to canonical (CRLF) form.
According to RFC 2045 and RFC 2046 single CR characters are deleted.
On success the caller is responsible to free the allocated memory.
Definition at line 5695 of file encoding.c.
References PRINT_ERROR.
Referenced by core_convert_posix_to_canonical().
int enc_convert_posix_to_iso8601 | ( | char * | isodate, |
core_time_t | pts | ||
) |
Convert POSIX timestamp to ISO 8601 conformant local date and time.
[out] | isodate | Buffer for date string (at least 20 characters) |
[in] | pts | Seconds since epoche (as defined by POSIX.1) |
ISO 8601 allows to omit the 'T' character between the date and time fields if there is no risk of confusing a date and time of day representation. This is the case here => We omit the 'T' for better human readability
time_t
implementations will overflow). Definition at line 4342 of file encoding.c.
References PRINT_ERROR.
const char* enc_convert_to_8bit | ( | enum enc_mime_cs * | charset, |
const char * | s, | ||
const char ** | cs_iana | ||
) |
Convert string from Unicode (UTF-8 NFC) to an 8bit character set.
[out] | charset | Pointer to character set of result (or NULL ) |
[in] | s | Unicode string to convert in UTF-8 NFC format |
[out] | cs_iana | Pointer to IANA charset name of result (or NULL ) |
According to RFC 2046 the following rules are applied:
If NULL
is passed as parameter charset or cs_iana , this indicates that the caller is not interested in this information. The corresponding data is discarded in this case.
Definition at line 6005 of file encoding.c.
Referenced by core_post_article().
const char* enc_convert_to_utf8_nfc | ( | enum enc_mime_cs | charset, |
const char * | s | ||
) |
Convert string from supported character set to Unicode (UTF-8 NFC)
[in] | charset | Character set of string s |
[in] | s | String to convert |
According to RFC 2049 the following rules are applied:
According to RFC 3629 the following rules are applied:
Definition at line 5788 of file encoding.c.
Referenced by core_post_article().
const char* enc_create_name_addr | ( | const char * | data, |
size_t | offset | ||
) |
Create a "name-addr" construct according to RFC 5322.
This function is intended to create the "From" and "Reply-To" header fields.
[in] | data | Input data |
[in] | offset | Folding offset, e.g. sizeof ("From: ") |
The input data must have the following format: name
<addr-spec>
.
addr-spec
construct is not allowed to contain comments or quoted strings. Both parts, name
and <addr-spec>
must fit on a single header line of 998 characters. Note that offset adds to the length of name
.name
must be an Unicode identifier corresponding to addr-spec
. If it contains non-ASCII characters, it is converted to a valid display-name
token. The result will be folded according to RFC 2047.
On success the caller is responsible to free the memory allocated for the result.
Definition at line 3952 of file encoding.c.
int enc_create_wildmat | ( | struct enc_wm_pattern ** | obj, |
const char * | wm | ||
) |
Create wildmat pattern array.
[out] | obj | Pointer to wildmat pattern array |
[in] | wm | RFC 3977 conformant wildmat |
This function splits a RFC 3977 conformant wildmat
into its elements of type wildmat-pattern
. Every wildmat-pattern
is converted to a POSIX extended regular expression and stored together with a negation flag (that is set if the wildmat-pattern
was preceded by an exclamation mark) in the array obj .
On success the caller is responsible to free the memoy allocated for the resulting array with the function enc_destory_wildmat() .
NULL
was written to obj) Definition at line 5371 of file encoding.c.
References enc_destroy_wildmat(), enc_uc_check_utf8(), and PRINT_ERROR.
void enc_destroy_wildmat | ( | struct enc_wm_pattern ** | obj, |
int | num | ||
) |
Destroy wildmat pattern array.
[in,out] | obj | Pointer to wildmat pattern array |
[in] | num | Number of elements in array |
NULL
is written to the location pointed to by obj after releasing the memory allocated for the array.
Definition at line 5537 of file encoding.c.
Referenced by enc_create_wildmat().
const char* enc_extract_addr_spec | ( | const char * | mailbox | ) |
Extract addr-spec token from RFC 5322 mailbox.
[in] | mailbox | RFC 5322 mailbox |
addr-spec
token!name-addr
token because it is ignored anyway.On success a pointer to the result buffer is returned. The caller is responsible to free the memory allocated for this buffer.
addr-spec
tokenDefinition at line 4830 of file encoding.c.
void enc_free | ( | void * | p | ) |
Free an object allocated by encoding module.
Use this function to release dynamic memory that was allocated by the encoding module.
[in] | p | Pointer to object |
Release the memory for the object pointed to by p.
NULL
and no operation is performed in this case. Definition at line 8868 of file encoding.c.
Referenced by core_get_cancel_key(), core_get_cancel_lock(), core_get_msgid(), and core_post_article().
int enc_get_iso8601_utc | ( | char * | isodate | ) |
Get current UTC date in ISO 8601 conformant format.
[out] | isodate | Buffer for date string (at least 21 characters) |
The date is written to isodate in YYYY-MM-DDTHH-MM-SSZ
format.
time_t
implementations will overflow). Definition at line 4395 of file encoding.c.
unsigned long int enc_lines_decode | ( | const char * | lines | ) |
Decode number of lines.
[in] | lines | Number of lines |
lines must be a RFC 5536 conformant body of the (now obsolete) "Lines" header field.
Definition at line 4098 of file encoding.c.
const char* enc_mime_decode | ( | enum enc_mime_cte | cte, |
enum enc_mime_cs | charset, | ||
const char * | s | ||
) |
Decode MIME text content to UTF-8 NFC.
[in] | cte | MIME content transfer encoding |
[in] | charset | MIME character set |
[in] | s | MIME encoded data |
According to RFC 2049 all transfer encodings not defined in MIME 1.0 are rejected.
Definition at line 7801 of file encoding.c.
References ENC_CTE_Q.
int enc_mime_encode_base64 | ( | const char ** | enc, |
const char * | data, | ||
size_t | len | ||
) |
Encode binary data to base64.
[out] | enc | Pointer to result (zero terminated string) |
[in] | data | Data to encode |
[in] | len | Data length |
If len is zero, data is not dereferenced and the result will be an empty string.
On error, nothing is written to enc .
On success a pointer to the result buffer will be written to enc . The caller is responsible to free the memory allocated for this buffer.
Definition at line 4744 of file encoding.c.
References data.
Referenced by core_get_cancel_key(), core_get_cancel_lock(), core_get_msgid(), and digest_randomart().
const char* enc_mime_flowed_decode | ( | const char * | s, |
unsigned int | delsp, | ||
unsigned int | insline | ||
) |
Decode MIME "text/plain" content with "format=flowed" parameter.
[in] | s | MIME encoded data in canonical form |
[in] | delsp | Delete spaces at EOL if nonzero |
[in] | insline | Add empty line separator after paragraphs if nonzero |
Definition at line 7856 of file encoding.c.
References CONF_QUOTESTYLE, config, and PRINT_ERROR.
void enc_mime_get_cd | ( | const char * | hf_body, |
enum enc_mime_cd * | type, | ||
const char ** | filename | ||
) |
Decode content disposition.
[in] | hf_body | Body of Content-Disposition header field |
[out] | type | Pointer to content disposition type ID |
[out] | filename | Pointer to filename |
The field body hf_body must be unfolded and preprocessed (parameters must ne already decoded according to RFC 2231). The value for the filename parameter must be already converted to UTF-8.
If a filename parameter is present, a new memory block is allocated for filename . Otherwise NULL
is returned.
Definition at line 7619 of file encoding.c.
void enc_mime_get_ct | ( | struct enc_mime_ct * | ct, |
const char * | hf_body, | ||
char * | bo | ||
) |
Decode MIME "Content-Type" header field.
[out] | ct | Pointer to result structure |
[in] | hf_body | Header field body that contains the MIME content type |
[out] | bo | Pointer to buffer for multipart boundary delimiter |
The header field body hf_body is decoded and content IDs are written to the structure pointed to by ct .
The buffer for the boundary string used in messages with content type "multipart" must be allocated by the caller with a size of at least ENC_BO_BUFLEN and a pointer to the start of this buffer must be passed as bo parameter. It is allowed to pass NULL
for bo if the caller is not interested in the boundary string.
According to RFC 2045 the following rules are applied:
According to RFC 2046 the following rules are applied:
According to RFC 3676 the following rules are applied:
The experimental parameter "InsLine" set to "yes" adds an empty line separator after every paragraph that end with an empty line. This allows to declare single lines as paragraphs, e.g. for Smartphones, without losing the separation to the following text (or creating double empty line separation in compatibility view).
ENC_xxx_UNKNOWN
IDs are returned. Definition at line 7235 of file encoding.c.
enum enc_mime_cte enc_mime_get_cte | ( | const char * | hf_body | ) |
Decode content transfer encoding description.
[in] | hf_body | MIME content transfer encoding description string |
This function checks whether the string hf_body represents a supported content transfer encoding and return the corresponding ID for it. According to RFC 2047 the content transfer encoding is treated case-insensitive.
NULL
. This is treated as an error and the return value will indicate an unknown transfer encoding.ENC_CTE_UNKNOWN
on error Definition at line 7536 of file encoding.c.
References ENC_CTE_BIN, and ENC_CTE_BUFLEN.
size_t enc_mime_message | ( | const char * | s, |
size_t | len, | ||
struct enc_mime_mpe ** | mpe | ||
) |
Extract MIME encapsulated message.
[in] | s | MIME encapsulated message |
[in] | len | Length of encapsulated message |
[out] | mpe | MIME multipart entity locations |
On success a pointer to the result array is written to mpe . The caller is responsible to free the memory allocated for this array.
Definition at line 8168 of file encoding.c.
References enc_mime_mpe::len, PRINT_ERROR, and enc_mime_mpe::start.
size_t enc_mime_multipart | ( | const char * | s, |
const char * | b, | ||
struct enc_mime_mpe ** | mpe | ||
) |
Parse MIME multipart content.
[in] | s | MIME encoded multipart data |
[in] | b | MIME boundary delimiter |
[out] | mpe | MIME multipart entity locations |
On success a pointer to the result array is written to mpe . The caller is responsible to free the memory allocated for this array.
Definition at line 8210 of file encoding.c.
References ENC_BO_BUFLEN, enc_mime_mpe::len, PRINT_ERROR, and enc_mime_mpe::start.
int enc_mime_para_decode | ( | const char ** | r, |
const char * | b, | ||
int | m | ||
) |
Decode header field containing potential MIME parameters.
[out] | r | Pointer to result string pointer |
[in] | b | Prepared header field body that contains potential parameters |
[in] | m | Operating mode (see description below) |
The parameter m enable special processing if set to a nonzero value. m should be set to 1 for the Content-Type
header field.
quoted-string
tokens. Whitespace must already be merged into the semantically equivalent single SP (and removed completely before semicolons and around equal signs) by the caller.According to RFC 2231 the following rules are applied:
quoted-string
tokens. Mixing sections of both types is allowed => quoted-string
tokens must already be decoded in b by the caller.According to RFC 3629 the following rules are applied:
On success, the address of the result buffer is written to the location pointed to by r (this may be the same as b if there is nothing to do). The caller is responsible to free the potentially allocated memory. On error NULL
is written to the location pointed to by r .
Definition at line 6817 of file encoding.c.
References ENC_MIME_PARA_LENGTH_MAX, and PRINT_ERROR.
int enc_mime_save_to_file | ( | const char * | pn, |
enum enc_mime_cte | cte, | ||
const char * | entity | ||
) |
Decode MIME content transfer encoding and save to file.
[in] | pn | Pathname of file |
[in] | cte | MIME content transfer encoding |
[in] | entity | MIME entity body |
According to RFC 2049 all transfer encodings not defined in MIME 1.0 are rejected.
Definition at line 7717 of file encoding.c.
References ENC_CTE_Q.
int enc_mime_word_decode | ( | const char ** | r, |
const char * | b | ||
) |
Decode header field containing potential MIME encoded-word
tokens.
[out] | r | Pointer to result string pointer |
[in] | b | Header field body that contains potential encoded-words |
The header field body b must be unfolded before calling this function.
According to RFC 2047 the following rules are applied:
According to RFC 3629 the following rules are applied:
On success, the address of the result buffer is written to the location pointed to by r (this may be the same as b if there is nothing to do). The caller is responsible to free the potentially allocated memory. On error NULL
is written to the location pointed to by r .
Definition at line 6518 of file encoding.c.
int enc_mime_word_encode | ( | const char ** | r, |
const char * | b, | ||
size_t | pl | ||
) |
Encode header field body using MIME encoded-word
tokens.
This function use quoted-printable encoding.
[out] | r | Pointer to result string pointer |
[in] | b | Header field body that contains potential Unicode data |
[in] | pl | Length of header field prefix (Length limit: 25) |
The header field body b must be verified by the caller to be valid UTF-8 (this function will do the normalization to NFC). The CRLF termination must be removed before calling this function.
The length pl must include the header field name, the colon and any potential white space not included in b .
According to RFC 5536 the following rules are applied:
According to RFC 2047 the following rules are applied:
force_unicode
option in configfile).force_unicode
option in configfile).According to RFC 5198 the following rules are applied:
On success, the address of the result buffer is written to the location pointed to by r (this may be the same as b if there is nothing to do). The caller is responsible to free the potentially allocated memory. On error NULL
is written to the location pointed to by r .
Definition at line 6103 of file encoding.c.
References enc_ascii_check_printable(), ENC_CS_UTF_8, enc_uc_check_utf8(), and PRINT_ERROR.
int enc_percent_decode | ( | char * | s, |
int | clean | ||
) |
Percent decoder.
[in] | s | String to decode (URI or MIME parameter value) |
[in] | clean | Replace NUL and ';' with '_' if nonzero |
If s is NULL
no operation is performed and success is returned.
Definition at line 8318 of file encoding.c.
References enc_mime_mpe::len.
void enc_rot13 | ( | char * | data | ) |
Encode or decode data with ROT13 algorithm.
[in] | data | Pointer to buffer with Data to encode/decode |
Any character that is not a latin ASCII character in the ranges A..Z and a..z will stay unchanged.
No memory is allocated. The operation is executed in the buffer pointed to by data .
Definition at line 4692 of file encoding.c.
References data.
core_time_t enc_timestamp_decode | ( | const char * | timestamp | ) |
Decode canonical timestamp to POSIX time (seconds since epoche)
According to RFC 5322 all military timezones should be treated as UTC because there was an error in RFC 822 => We do so and accept "Z" as valid because it means UTC
[in] | timestamp | RFC 5536 conformant timestamp string |
Definition at line 4154 of file encoding.c.
References PRINT_ERROR.
int enc_uc_check_utf8 | ( | const char * | s | ) |
Verify UTF-8 encoding.
[in] | s | String to verify |
According to RFC 3629 the following rules are applied:
Definition at line 5162 of file encoding.c.
Referenced by core_get_signature(), enc_create_wildmat(), enc_mime_word_encode(), and enc_uc_repair_utf8().
void enc_uc_encode_utf8 | ( | char * | buf, |
size_t * | i, | ||
long int * | dbuf, | ||
size_t * | di | ||
) |
Encode Unicode codepoints to UTF-8.
[out] | buf | Encoded UTF-8 string |
[in,out] | i | Current index in buf |
[in] | dbuf | Codepoint buffer |
[in,out] | di | Number of codepoints in dbuf |
On success, the start index of the next codepoint is written to the location pointed to by i and zero is written to the location pointed to by di .
Definition at line 1008 of file encoding.c.
References data, and PRINT_ERROR.
const char* enc_uc_repair_utf8 | ( | const char * | s | ) |
Repair UTF-8 encoding.
[in] | s | String to repair |
Invalid UTF-8 sequences and invalid codepoints are replaced with U+FFFD.
NULL
on error Definition at line 5181 of file encoding.c.
References enc_uc_check_utf8(), and PRINT_ERROR.
const char* enc_uri_percent_encode | ( | const char * | s, |
enum enc_uri_scheme | sch | ||
) |
Percent encoding for URI content.
[in] | s | URI body to encode |
[in] | sch | URI scheme |
Passing NULL
for parameter s is allowed and treated as error.
Generic URI syntax is defined in RFC 3986.
The scheme "ftp" is defined in RFC 1738.
The scheme "http" is defined in RFC 7230.
The scheme "mailto" is defined in RFC 6068.
The scheme "news" is defined in RFC 5538.
The following characters are percent encoded:
Definition at line 8402 of file encoding.c.
References ENC_URI_SCHEME_FTP, ENC_URI_SCHEME_HTTP, ENC_URI_SCHEME_MAILTO, ENC_URI_SCHEME_NEWS, and PRINT_ERROR.