[p5sagit/p5-mst-13.2.git] / ext / Encode / encode.h

#ifndef ENCODE_H
#define ENCODE_H

#ifndef U8
/* A tad devious this:
   perl normally has a #define for U8 - if that isn't present
   then we typedef it - leaving it #ifndef so we can do data parts without
   getting extern references to the code parts
 */
typedef unsigned char U8;
#endif

typedef struct encpage_s encpage_t;


struct encpage_s
{
 /* fields ordered to pack nicely on 32-bit machines */
 const U8   *seq;       /* Packed output sequences we generate if we match */
 encpage_t  *next;      /* Page to go to if we match */
 U8         min;        /* Min value of octet to match this entry */
 U8         max;        /* Max value of octet to match this entry */
 U8         dlen;       /* destination length - size of entries in seq */
 U8         slen;       /* source length - number of source octets needed */
};

/*
   At any point in a translation there is a page pointer which points at an array
   of the above structures.

   Basic operation :
   get octet from source stream.
   if (octet >= min && octet < max) {
      if slen is 0 then we cannot represent this character.
      if we have less than slen octets (including this one) then we have a partial character.
      otherwise
       copy dlen octets from seq + dlen*(octet-min) to output
       (dlen may be zero if we don't know yet.)
       load page pointer with next to continue.
       (is slen is one this is end of a character)
       get next octet.
   }
   else {
      increment the page pointer to look at next slot in the array
   }

   arrays SHALL be constructed so there is an entry which matches ..0xFF at the end,
   and either maps it or indicates no representation.

   if MSB of slen is set then mapping is an approximate "FALLBACK" entry.

*/


typedef struct encode_s encode_t;
struct encode_s
{
 encpage_t  *t_utf8;    /* Starting table for translation from the encoding to UTF-8 form */
 encpage_t  *f_utf8;    /* Starting table for translation from UTF-8 to the encoding */
 const U8   *rep;       /* Replacement character in this encoding e.g. "?" */
 int        replen;     /* Number of octets to represent replacement character */
 U8         min_el;     /* Minimum octets to represent a character */
 U8         max_el;     /* Maximum octets to represent a character */
 const char *name[2];   /* name(s) of this encoding */
};

#ifdef U8
/* See comment at top of file for deviousness */

extern int do_encode(encpage_t *enc, const U8 *src, STRLEN *slen,
                     U8 *dst, STRLEN dlen, STRLEN *dout, int approx);

extern void Encode_DefineEncoding(encode_t *enc);

#endif

#define ENCODE_NOSPACE  1
#define ENCODE_PARTIAL  2
#define ENCODE_NOREP    3
#define ENCODE_FALLBACK 4
#endif
Commit	Line	Data
2f2b4ff2	1	#ifndef ENCODE_H
2f2b4ff2	2	#define ENCODE_H
b1e7e56f	3
017e2add	4	#ifndef U8
b1e7e56f	5	/* A tad devious this:
	6	perl normally has a #define for U8 - if that isn't present
	7	then we typedef it - leaving it #ifndef so we can do data parts without
	8	getting extern references to the code parts
	9	*/
017e2add	10	typedef unsigned char U8;
	11	#endif
	12
	13	typedef struct encpage_s encpage_t;
	14
b1e7e56f	15
017e2add	16	struct encpage_s
017e2add	17	{
b1e7e56f	18	/* fields ordered to pack nicely on 32-bit machines */
	19	const U8 seq; / Packed output sequences we generate if we match */
	20	encpage_t next; / Page to go to if we match */
	21	U8 min; /* Min value of octet to match this entry */
	22	U8 max; /* Max value of octet to match this entry */
	23	U8 dlen; /* destination length - size of entries in seq */
	24	U8 slen; /* source length - number of source octets needed */
017e2add	25	};
017e2add	26
b1e7e56f	27	/*
	28	At any point in a translation there is a page pointer which points at an array
	29	of the above structures.
	30
	31	Basic operation :
	32	get octet from source stream.
	33	if (octet >= min && octet < max) {
	34	if slen is 0 then we cannot represent this character.
	35	if we have less than slen octets (including this one) then we have a partial character.
	36	otherwise
	37	copy dlen octets from seq + dlen*(octet-min) to output
	38	(dlen may be zero if we don't know yet.)
	39	load page pointer with next to continue.
	40	(is slen is one this is end of a character)
	41	get next octet.
	42	}
	43	else {
	44	increment the page pointer to look at next slot in the array
	45	}
	46
	47	arrays SHALL be constructed so there is an entry which matches ..0xFF at the end,
	48	and either maps it or indicates no representation.
	49
	50	if MSB of slen is set then mapping is an approximate "FALLBACK" entry.
	51
	52	*/
	53
	54
017e2add	55	typedef struct encode_s encode_t;
	56	struct encode_s
	57	{
b1e7e56f	58	encpage_t t_utf8; / Starting table for translation from the encoding to UTF-8 form */
	59	encpage_t f_utf8; / Starting table for translation from UTF-8 to the encoding */
	60	const U8 rep; / Replacement character in this encoding e.g. "?" */
	61	int replen; /* Number of octets to represent replacement character */
	62	U8 min_el; /* Minimum octets to represent a character */
	63	U8 max_el; /* Maximum octets to represent a character */
	64	const char name[2]; / name(s) of this encoding */
017e2add	65	};
017e2add	66
2f2b4ff2	67	#ifdef U8
b1e7e56f	68	/* See comment at top of file for deviousness */
b1e7e56f	69
2f2b4ff2	70	extern int do_encode(encpage_t enc, const U8 src, STRLEN *slen,
9b37254d	71	U8 dst, STRLEN dlen, STRLEN dout, int approx);
2f2b4ff2	72
2f2b4ff2	73	extern void Encode_DefineEncoding(encode_t *enc);
b1e7e56f	74
2f2b4ff2	75	#endif
2f2b4ff2	76
9b37254d	77	#define ENCODE_NOSPACE 1
	78	#define ENCODE_PARTIAL 2
	79	#define ENCODE_NOREP 3
	80	#define ENCODE_FALLBACK 4
2f2b4ff2	81	#endif