Commit | Line | Data |
2f2b4ff2 |
1 | #ifndef ENCODE_H |
2 | #define ENCODE_H |
b1e7e56f |
3 | |
017e2add |
4 | #ifndef U8 |
b1e7e56f |
5 | /* A tad devious this: |
6 | perl normally has a #define for U8 - if that isn't present |
7 | then we typedef it - leaving it #ifndef so we can do data parts without |
8 | getting extern references to the code parts |
9 | */ |
017e2add |
10 | typedef unsigned char U8; |
11 | #endif |
12 | |
13 | typedef struct encpage_s encpage_t; |
14 | |
b1e7e56f |
15 | |
017e2add |
16 | struct encpage_s |
17 | { |
b1e7e56f |
18 | /* fields ordered to pack nicely on 32-bit machines */ |
19 | const U8 *seq; /* Packed output sequences we generate if we match */ |
20 | encpage_t *next; /* Page to go to if we match */ |
21 | U8 min; /* Min value of octet to match this entry */ |
22 | U8 max; /* Max value of octet to match this entry */ |
23 | U8 dlen; /* destination length - size of entries in seq */ |
24 | U8 slen; /* source length - number of source octets needed */ |
017e2add |
25 | }; |
26 | |
b1e7e56f |
27 | /* |
28 | At any point in a translation there is a page pointer which points at an array |
29 | of the above structures. |
30 | |
31 | Basic operation : |
32 | get octet from source stream. |
33 | if (octet >= min && octet < max) { |
34 | if slen is 0 then we cannot represent this character. |
35 | if we have less than slen octets (including this one) then we have a partial character. |
36 | otherwise |
37 | copy dlen octets from seq + dlen*(octet-min) to output |
38 | (dlen may be zero if we don't know yet.) |
39 | load page pointer with next to continue. |
40 | (is slen is one this is end of a character) |
41 | get next octet. |
42 | } |
43 | else { |
44 | increment the page pointer to look at next slot in the array |
45 | } |
46 | |
47 | arrays SHALL be constructed so there is an entry which matches ..0xFF at the end, |
48 | and either maps it or indicates no representation. |
49 | |
50 | if MSB of slen is set then mapping is an approximate "FALLBACK" entry. |
51 | |
52 | */ |
53 | |
54 | |
017e2add |
55 | typedef struct encode_s encode_t; |
56 | struct encode_s |
57 | { |
b1e7e56f |
58 | encpage_t *t_utf8; /* Starting table for translation from the encoding to UTF-8 form */ |
59 | encpage_t *f_utf8; /* Starting table for translation from UTF-8 to the encoding */ |
60 | const U8 *rep; /* Replacement character in this encoding e.g. "?" */ |
61 | int replen; /* Number of octets to represent replacement character */ |
62 | U8 min_el; /* Minimum octets to represent a character */ |
63 | U8 max_el; /* Maximum octets to represent a character */ |
64 | const char *name[2]; /* name(s) of this encoding */ |
017e2add |
65 | }; |
66 | |
2f2b4ff2 |
67 | #ifdef U8 |
b1e7e56f |
68 | /* See comment at top of file for deviousness */ |
69 | |
2f2b4ff2 |
70 | extern int do_encode(encpage_t *enc, const U8 *src, STRLEN *slen, |
9b37254d |
71 | U8 *dst, STRLEN dlen, STRLEN *dout, int approx); |
2f2b4ff2 |
72 | |
73 | extern void Encode_DefineEncoding(encode_t *enc); |
b1e7e56f |
74 | |
2f2b4ff2 |
75 | #endif |
76 | |
9b37254d |
77 | #define ENCODE_NOSPACE 1 |
78 | #define ENCODE_PARTIAL 2 |
79 | #define ENCODE_NOREP 3 |
80 | #define ENCODE_FALLBACK 4 |
2f2b4ff2 |
81 | #endif |