Jun Inoue
jun.l****@gmail*****
2005年 8月 31日 (水) 19:10:02 JST
prime.scm の動作を追ってるときに、write/ss が欲しくなったので書きまし た。 = の siod 互換追加も <s>わざわざ外すのがだるかったから</s> 含めてあ ります。ちなみに shiro さんの syntax + apply の話からの類推で bug ではな く extension と見なしました。 read/ss を実装してないのでテストは肉眼で確認するしかありません。一時ファ イルにダンプして文字列として読み込めばいいっぽいですが、やってません。 それと、以前 apply パッチに含めたテストですが、最後のは eq? であってま す。R5RS には "The resulting list is always newly allocated, except that it shares structure with the last list argument." とあって、SRFI-38 にも あるように、この "shares structure" というのは eq? という意味だと思われ ます。 -- Jun Inoue jun.l****@gmail***** -------------- next part -------------- diff -ur sigscheme/debug.c ../.r5rs/sigscheme/debug.c --- sigscheme/debug.c 2005-08-29 22:53:36.000000000 -0700 +++ ../.r5rs/sigscheme/debug.c 2005-08-31 02:55:22.000000000 -0700 @@ -51,13 +51,47 @@ UNKNOWN }; +#if SCM_USE_SRFI38 +typedef size_t hashval_t; +typedef struct { + ScmObj key; + int datum; +} hash_entry; + +typedef struct { + size_t size; /* capacity; MUST be a power of 2 */ + size_t used; /* population */ + hash_entry *ents; +} hash_table; + +typedef struct { + hash_table seen; /* a table of seen objects */ + int next_index; /* the next index to use for #N# */ +} write_ss_context; +#endif + /*======================================= File Local Macro Declarations =======================================*/ +#if SCM_USE_SRFI38 +#define INTERESTINGP(obj) \ + (CONSP(obj) \ + || (STRINGP(obj) && SCM_STRING_LEN(obj)) \ + || VECTORP(obj)) +#define SCM_INVALID NULL +#define OCCUPIED(ent) (!EQ((ent)->key, SCM_INVALID)) +#define HASH_EMPTY(table) (!(table).used) +#define DEFINING_DATUM (-1) +#define GET_DEFINDEX(x) ((unsigned)(x) >> 1) +#endif /*======================================= Variable Declarations =======================================*/ +#if SCM_USE_SRFI38 +/* list of shared structures in the object we're writing out */ +static write_ss_context *write_ss_ctx; +#endif /*======================================= File Local Function Declarations @@ -70,6 +104,13 @@ static void print_port(FILE *f, ScmObj port, enum OutputType otype); static void print_etc(FILE *f, ScmObj obj, enum OutputType otype); +#if SCM_USE_SRFI38 +static void hash_grow(hash_table *tab); +static hash_entry *hash_lookup(hash_table *tab, ScmObj key, int insert); +static void write_ss_scan(ScmObj obj, write_ss_context *ctx); +static int get_shared_index(ScmObj obj); +#endif + /*======================================= Function Implementations =======================================*/ @@ -107,6 +148,21 @@ static void print_ScmObj_internal(FILE *f, ScmObj obj, enum OutputType otype) { +#if SCM_USE_SRFI38 + if (INTERESTINGP(obj)) { + int index = get_shared_index(obj); + if (index > 0) { + /* defined datum */ + fprintf(f, "#%d#", index); + return; + } + if (index < 0) { + /* defining datum, with the new index negated */ + fprintf(f, "#%d=", -index); + /* Print it; the next time it'll be defined. */ + } + } +#endif switch (SCM_TYPE(obj)) { case ScmInt: fprintf(f, "%d", SCM_INT_VALUE(obj)); @@ -229,42 +285,54 @@ static void print_list(FILE *f, ScmObj list, enum OutputType otype) { ScmObj car = SCM_NULL; - ScmObj cdr = SCM_NULL; - ScmObj tmp = SCM_NULL; +#if SCM_USE_SRFI38 + int index; + int necessary_close_parens = 1; + cheap_recursion: +#endif /* print left parenthesis */ fprintf(f, "("); - /* get car and cdr */ - car = CAR(list); - cdr = CDR(list); - - /* print car */ - print_ScmObj_internal(f, car, otype); - if (!NULLP(cdr)) - fprintf(f, " "); - - /* print else for-each */ - for (tmp = cdr; ; tmp = CDR(tmp)) { - if (CONSP(tmp)) { - print_ScmObj_internal(f, CAR(tmp), otype); - if (NULLP(CDR(tmp))) { - fprintf(f, ")"); - return; - } else { - if (!NULLP(CDR(tmp))) - fprintf(f, " "); - } - } else { - if (!NULLP(tmp)) { - fprintf(f, ". "); - print_ScmObj_internal(f, tmp, otype); - } - - fprintf(f, ")"); - return; + for (;;) { + car = CAR(list); + print_ScmObj_internal(f, car, otype); + list = CDR(list); + if (!CONSP(list)) + break; + fputs(" ", f); + +#if SCM_USE_SRFI38 + /* See if the next pair is shared. Note that the case + * where the first pair is shared is handled in + * print_ScmObj_internal(). */ + index = get_shared_index(list); + if (index > 0) { + /* defined datum */ + fprintf(f, ". #%d#", index); + goto close_parens_and_return; } + if (index < 0) { + /* defining datum, with the new index negated */ + fprintf(f, ". #%d=", -index); + necessary_close_parens++; + goto cheap_recursion; + } +#endif + } + + /* last item */ + if (!NULLP(list)) { + fputs(" . ", f); + /* Callee takes care of shared data. */ + print_ScmObj_internal(f, list, otype); } + +#if SCM_USE_SRFI38 + close_parens_and_return: + while (necessary_close_parens--) +#endif + fputc(')', f); } static void print_vector(FILE *f, ScmObj vec, enum OutputType otype) @@ -331,3 +399,157 @@ else if (EQ(obj, SCM_UNDEF)) fprintf(f, "#<undef>"); } + +#if SCM_USE_SRFI38 + +static void hash_grow(hash_table *tab) +{ + size_t old_size = tab->size; + size_t new_size = old_size * 2; + size_t i; + hash_entry *old_ents = tab->ents; + hash_entry *new_ent; + + tab->ents = calloc(new_size, sizeof(hash_entry)); + tab->size = new_size; + + for (i=0; i < old_size; i++) { + /* Don't change the last argument, or hash_lookup() will call + * us again. */ + new_ent = hash_lookup(tab, old_ents[i].key, 0); + *new_ent = old_ents[i]; + } + + free (old_ents); +} + +/** + * @return A pointer to the entry, or NULL if not found. + */ +static hash_entry *hash_lookup(hash_table *tab, ScmObj key, int insert) +{ + size_t i; + unsigned hashval; + hash_entry *ent; + + /* If we have > 32 bits, we'll discard some of them. The lower + * bits are zeroed for alignment or used for tag bits, and in the + * latter case, the tag can only take 3 values: pair, string, or + * vector. We'll drop these bits. KEYs are expected to be + * pointers into the heap, so their higher bis are probably + * uniform. I haven't confirmed either's validity, though. */ + hashval = (unsigned)key; + if (sizeof(hashval) > 4) { + hashval /= sizeof(ScmObjInternal); + hashval &= 0xffffffff; + } + + hashval *= 2654435761UL; /* golden ratio hash */ + + /* We probe linearly, since a) speed isn't a primary concern for + * SigScheme, and b) having a table of primes only for this + * purpose is probably just a waste. */ + for (i=0; i < tab->size; i++) { + ent = &(tab->ents)[(hashval + i) & (tab->size - 1)]; + if (!OCCUPIED(ent)) { + if (insert) { + /* used > size * 2/3 --> overpopulated, grow table */ + if (tab->used * 3 > tab->size * 2) { + hash_grow(tab); + return hash_lookup(tab, key, 1); + } + ent->key = key; + tab->used++; + } + return NULL; + } + if (EQ(ent->key, key)) + return ent; + } + + /* A linear probe should always find a slot. */ + abort(); +} + +/** + * Find out what non-atomic objects a structure shares within itself. + * @param obj The object in question, or a part of it. + * @param ctx Where to put the scan results. + */ +static void write_ss_scan(ScmObj obj, write_ss_context *ctx) +{ + int i; + hash_entry *ent; + /* (for-each mark-as-seen-or-return-if-familiar obj) */ + while (CONSP(obj)) { + ent = hash_lookup(&ctx->seen, obj, 1); + if (ent) { + ent->datum = DEFINING_DATUM; + return; + } + write_ss_scan(CAR(obj), ctx); + obj = CDR(obj); + } + + if (VECTORP(obj)) { + ent = hash_lookup(&ctx->seen, obj, 1); + if (ent) { + ent->datum = DEFINING_DATUM; + return; + } + for (i=0; i < SCM_VECTOR_LEN(obj); i++) + write_ss_scan(SCM_VECTOR_CREF(obj, i), ctx); + return; + } + if (STRINGP(obj) && SCM_STRING_LEN(obj)) { + ent = hash_lookup(&ctx->seen, obj, 1); + if (ent) { + ent->datum = DEFINING_DATUM; + return; + } + } +} + +/** + * @return The index for obj, if it's a defined datum. If it's a + * defining datum, allocate an index for it and return the + * *additive inverse* of the index. If obj is nondefining, + * return zero. + */ +static int get_shared_index(ScmObj obj) +{ + hash_entry *ent; + + if (write_ss_ctx) { + ent = hash_lookup(&write_ss_ctx->seen, obj, 0); + + if (ent->datum == DEFINING_DATUM) { + ent->datum = write_ss_ctx->next_index++; + return - (ent->datum); + } + return ent->datum; + } + return 0; +} + +void SigScm_WriteToPortWithSharedStructure(ScmObj port, ScmObj obj) +{ + write_ss_context ctx = {{0}}; + + ctx.next_index = 1; + ctx.seen.size = 1 << 8; /* arbitrary initial size */ + ctx.seen.ents = calloc(ctx.seen.size, sizeof(hash_entry)); + + write_ss_scan(obj, &ctx); + + /* If no structure is shared, we do a normal write. */ + if (!HASH_EMPTY(ctx.seen)) + write_ss_ctx = &ctx; + + SigScm_WriteToPort(port, obj); + + write_ss_ctx = NULL; + free(ctx.seen.ents); +} + +#endif /* SCM_USE_SRFI38 */ diff -ur sigscheme/main.c ../.r5rs/sigscheme/main.c --- sigscheme/main.c 2005-08-29 22:53:37.000000000 -0700 +++ ../.r5rs/sigscheme/main.c 2005-08-31 02:38:18.000000000 -0700 @@ -77,7 +77,11 @@ s_exp = SigScm_Read(stdin_port)) { result = ScmOp_eval(s_exp, SCM_NULL); - SigScm_DisplayToPort(stdout_port, result); +#if SCM_USE_SRFI38 + SigScm_WriteToPortWithSharedStructure(stdout_port, result); +#else + SigScm_WriteToPort(stdout_port, result); +#endif printf("\nsscm> "); } diff -ur sigscheme/operations.c ../.r5rs/sigscheme/operations.c --- sigscheme/operations.c 2005-08-29 22:53:37.000000000 -0700 +++ ../.r5rs/sigscheme/operations.c 2005-08-31 02:20:19.000000000 -0700 @@ -354,9 +354,11 @@ if CHECK_2_ARGS(args) SigScm_Error("= : Wrong number of arguments\n"); +#if !SCM_COMPAT_SIOD /* type check */ if (FALSEP(ScmOp_numberp(CAR(args)))) SigScm_ErrorObj("= : number required but got ", CAR(args)); +#endif /* Get first value */ val = SCM_INT_VALUE(CAR(args)); @@ -364,8 +366,11 @@ /* compare following value */ for (args = CDR(args); !NULLP(args); args = CDR(args)) { obj = CAR(args); + +#if !SCM_COMPAT_SIOD if (FALSEP(ScmOp_numberp(obj))) SigScm_ErrorObj("= : number required but got ", obj); +#endif if (SCM_INT_VALUE(obj) != val) { @@ -2015,6 +2020,9 @@ #if SCM_USE_SRFI8 #include "operations-srfi8.c" #endif +#if SCM_USE_SRFI38 +#include "operations-srfi38.c" +#endif #if SCM_COMPAT_SIOD #include "operations-siod.c" #endif diff -ur sigscheme/sigscheme.c ../.r5rs/sigscheme/sigscheme.c --- sigscheme/sigscheme.c 2005-08-29 22:53:37.000000000 -0700 +++ ../.r5rs/sigscheme/sigscheme.c 2005-08-31 02:19:20.000000000 -0700 @@ -319,6 +319,12 @@ =======================================================================*/ Scm_RegisterFuncRawListTailRec("receive", ScmOp_SRFI8_receive); #endif +#if SCM_USE_SRFI38 + /*======================================================================= + SRFI-8 Procedure + =======================================================================*/ + Scm_RegisterFuncEvaledList("write-with-shared-structure", ScmOp_SRFI38_write_with_shared_structure); +#endif #if SCM_COMPAT_SIOD /*======================================================================= diff -ur sigscheme/sigscheme.h ../.r5rs/sigscheme/sigscheme.h --- sigscheme/sigscheme.h 2005-08-29 22:53:37.000000000 -0700 +++ ../.r5rs/sigscheme/sigscheme.h 2005-08-31 02:23:59.000000000 -0700 @@ -66,8 +66,9 @@ Macro Declarations =======================================*/ #define SCM_USE_EUCJP 1 /* use EUC-JP as internal encoding */ -#define SCM_USE_SRFI1 0 /* use SRFI-1 procedures writtein in C */ -#define SCM_USE_SRFI8 1 /* use SRFI-8 receive procedure writtein in C */ +#define SCM_USE_SRFI1 0 /* use SRFI-1 procedures written in C */ +#define SCM_USE_SRFI8 1 /* use SRFI-8 receive procedure written in C */ +#define SCM_USE_SRFI38 1 /* use SRFI-38 write/ss written in C */ #define SCM_USE_NONSTD_FEATURES 1 /* use Non-R5RS standard features */ #define SCM_COMPAT_SIOD 1 /* use SIOD compatible features */ #define SCM_COMPAT_SIOD_BUGS 1 /* emulate the buggy behaviors of SIOD */ @@ -333,6 +334,9 @@ void SigScm_Display(ScmObj obj); void SigScm_WriteToPort(ScmObj port, ScmObj obj); void SigScm_DisplayToPort(ScmObj port, ScmObj obj); +#if SCM_USE_SRFI38 +void SigScm_WriteToPortWithSharedStructure(ScmObj port, ScmObj obj); +#endif #if SCM_USE_SRFI1 /* operations-srfi1.c */ @@ -348,6 +352,9 @@ /* operations-srfi8.c */ ScmObj ScmOp_SRFI8_receive(ScmObj args, ScmObj *envp); #endif +#if SCM_USE_SRFI38 +ScmObj ScmOp_SRFI38_write_with_shared_structure(ScmObj arg, ScmObj env); +#endif #if SCM_COMPAT_SIOD /* operations-siod.c */ ScmObj ScmOp_symbol_boundp(ScmObj obj); diff -ur sigscheme/test/test-list.scm ../.r5rs/sigscheme/test/test-list.scm --- sigscheme/test/test-list.scm 2005-08-21 04:48:30.000000000 -0700 +++ ../.r5rs/sigscheme/test/test-list.scm 2005-08-31 02:20:30.000000000 -0700 @@ -53,7 +53,7 @@ (define z '(why)) (assert-equal? "append test4" '(n o d o car why . ta) (append w x y () z 'ta)) (assert-equal? "append test5" '(n o) w) ; test non-destructiveness -(assert-equal? "append test6" x (cdr (append '((Calpis hosi-)) x))) ; share last +(assert-eq? "append test6" x (cdr (append '((Calpis hosi-)) x))) ; share last ; reverse (assert-equal? "reverse test1" '(c b a) (reverse '(a b c)))