Commit | Line | Data |
67d7b5ef |
1 | #ifndef ENCODE_H |
2 | #define ENCODE_H |
3 | |
4 | #ifndef U8 |
85982a32 |
5 | /* |
6 | A tad devious this: |
7 | perl normally has a #define for U8 - if that isn't present then we |
8 | typedef it - leaving it #ifndef so we can do data parts without |
67d7b5ef |
9 | getting extern references to the code parts |
85982a32 |
10 | */ |
67d7b5ef |
11 | typedef unsigned char U8; |
12 | #endif |
13 | |
14 | typedef struct encpage_s encpage_t; |
15 | |
67d7b5ef |
16 | struct encpage_s |
17 | { |
85982a32 |
18 | /* fields ordered to pack nicely on 32-bit machines */ |
19 | const U8 *seq; /* Packed output sequences we generate |
20 | if we match */ |
21 | encpage_t *next; /* Page to go to if we match */ |
22 | U8 min; /* Min value of octet to match this entry */ |
23 | U8 max; /* Max value of octet to match this entry */ |
24 | U8 dlen; /* destination length - |
25 | size of entries in seq */ |
26 | U8 slen; /* source length - |
27 | number of source octets needed */ |
67d7b5ef |
28 | }; |
29 | |
30 | /* |
85982a32 |
31 | At any point in a translation there is a page pointer which points |
32 | at an array of the above structures. |
33 | |
34 | Basic operation : |
35 | get octet from source stream. |
36 | if (octet >= min && octet < max) { |
37 | if slen is 0 then we cannot represent this character. |
38 | if we have less than slen octets (including this one) then |
39 | we have a partial character. |
40 | otherwise |
41 | copy dlen octets from seq + dlen*(octet-min) to output |
42 | (dlen may be zero if we don't know yet.) |
43 | load page pointer with next to continue. |
44 | (is slen is one this is end of a character) |
45 | get next octet. |
46 | } |
47 | else { |
48 | increment the page pointer to look at next slot in the array |
49 | } |
50 | |
51 | arrays SHALL be constructed so there is an entry which matches |
52 | ..0xFF at the end, and either maps it or indicates no |
53 | representation. |
54 | |
55 | if MSB of slen is set then mapping is an approximate "FALLBACK" entry. |
67d7b5ef |
56 | |
57 | */ |
58 | |
59 | |
60 | typedef struct encode_s encode_t; |
61 | struct encode_s |
62 | { |
85982a32 |
63 | encpage_t *t_utf8; /* Starting table for translation from |
64 | the encoding to UTF-8 form */ |
65 | encpage_t *f_utf8; /* Starting table for translation |
66 | from UTF-8 to the encoding */ |
67 | const U8 *rep; /* Replacement character in this encoding |
68 | e.g. "?" */ |
69 | int replen; /* Number of octets in rep */ |
70 | U8 min_el; /* Minimum octets to represent a character */ |
71 | U8 max_el; /* Maximum octets to represent a character */ |
72 | const char *name[2]; /* name(s) of this encoding */ |
67d7b5ef |
73 | }; |
74 | |
75 | #ifdef U8 |
76 | /* See comment at top of file for deviousness */ |
77 | |
78 | extern int do_encode(encpage_t *enc, const U8 *src, STRLEN *slen, |
79 | U8 *dst, STRLEN dlen, STRLEN *dout, int approx); |
80 | |
81 | extern void Encode_DefineEncoding(encode_t *enc); |
82 | |
85982a32 |
83 | #endif /* U8 */ |
67d7b5ef |
84 | |
85 | #define ENCODE_NOSPACE 1 |
86 | #define ENCODE_PARTIAL 2 |
87 | #define ENCODE_NOREP 3 |
88 | #define ENCODE_FALLBACK 4 |
85982a32 |
89 | |
90 | #define FBCHAR_UTF8 "\xEF\xBF\xBD" |
91 | |
92 | #define ENCODE_DIE_ON_ERR 0x0001 /* croaks immediately */ |
93 | #define ENCODE_WARN_ON_ERR 0x0002 /* warn on error; may proceed */ |
94 | #define ENCODE_RETURN_ON_ERR 0x0004 /* immediately returns on NOREP */ |
95 | #define ENCODE_LEAVE_SRC 0x0008 /* $src updated unless set */ |
96 | #define ENCODE_PERLQQ 0x0100 /* perlqq fallback string */ |
af1f55d9 |
97 | #define ENCODE_HTMLCREF 0x0200 /* HTML character ref. fb mode */ |
98 | #define ENCODE_XMLCREF 0x0400 /* XML character ref. fb mode */ |
85982a32 |
99 | |
100 | #define ENCODE_FB_DEFAULT 0x0000 |
101 | #define ENCODE_FB_CROAK 0x0001 |
102 | #define ENCODE_FB_QUIET ENCODE_RETURN_ON_ERR |
103 | #define ENCODE_FB_WARN (ENCODE_RETURN_ON_ERR|ENCODE_WARN_ON_ERR) |
104 | #define ENCODE_FB_PERLQQ ENCODE_PERLQQ |
af1f55d9 |
105 | #define ENCODE_FB_HTMLCREF ENCODE_HTMLCREF |
106 | #define ENCODE_FB_XMLCREF ENCODE_XMLCREF |
85982a32 |
107 | |
108 | #endif /* ENCODE_H */ |