From 0b38bc996c4c7e2693931bbd5103c7772b56b4bd Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Mon, 31 Jul 2017 17:33:59 -0700 Subject: txr-015 2009-10-15 --- ChangeLog | 430 ++++++++++++++ Makefile | 28 +- dep.mk | 9 + depend.txr | 11 + extract.h | 37 -- extract.l | 760 ------------------------- extract.y | 1850 ------------------------------------------------------------ gc.c | 43 +- gc.h | 4 +- lib.c | 571 ++++++++----------- lib.h | 57 +- match.c | 1643 +++++++++++++++++++++++++++++++++++++++++++++++++++++ match.h | 27 + parser.h | 36 ++ parser.l | 523 +++++++++++++++++ parser.y | 593 +++++++++++++++++++ regex.c | 17 +- stream.c | 641 +++++++++++++++++++++ stream.h | 48 ++ txr.1 | 419 +++++++++++++- txr.c | 336 +++++++++++ txr.h | 33 ++ unwind.c | 235 +++++++- unwind.h | 92 ++- 24 files changed, 5369 insertions(+), 3074 deletions(-) create mode 100644 dep.mk create mode 100644 depend.txr delete mode 100644 extract.h delete mode 100644 extract.l delete mode 100644 extract.y create mode 100644 match.c create mode 100644 match.h create mode 100644 parser.h create mode 100644 parser.l create mode 100644 parser.y create mode 100644 stream.c create mode 100644 stream.h create mode 100644 txr.c create mode 100644 txr.h diff --git a/ChangeLog b/ChangeLog index 2461f007..25531be5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,433 @@ +2009-10-14 Kaz Kylheku + + Version 015 + + Code restructuring. + + Corruption bugfix in gc-debugging code. + + The nil symbol more properly implemented. + + Semantics change: collect treated as a failed match if it + does not collect anything. + + Bugfix in function argument reconciliation: must only + be done for unbound parameters. + + New @(local) directive (synonym of forget) for expressing + local variables in functions. + + Quasi-literals: backquote-delimited literals that contain interpolated + variables. Useful in next, output, bind and function calls. + + Hygiene: some implementation-inserted syntax tree elements + are now in their own namespace so they can't clash with user-defined + constructs. + + Rewritten streams implementation. + + Exception handling: try/catch/finally. + + Exceptions used internally and externally. + + File errors are mapped to exceptions now. + + Hash bang (#!) scripting supported. + + New -f paramater, allowing entire query to be specified + as argument rather than from a file or stdin. + + * txr.c: (version): Bump to 014. + * txr.1: Bump version to 014. More documentation about + exceptions. + +2009-10-14 Kaz Kylheku + + Support for hash bang execution, and embedding query + in a command line argument. + + * txr.c (remove_hash_bang_line): New function. + (main): Added -f option. Initialize and gc-protect yyin_stream, and + use it in all places where yyin was previously set up. + Diagnose when -a, -D and -f are wrongly clumped with other options. + Remove the first line of the query if it starts with #!. + * parser.h (yyin): Declaration removed. + (yyin_stream): Declared. + * parser.l (YY_INPUT): Macro defined. + (yyin_stream): New global. + * stream.c (string_in_get_line, string_in_get_char): Bugfix: + wrong length function used. + (string_in_ops): Bugfix: wrong get_char function wired in. + (get_char): New function. + * stream.h (get_char): Declared. + * txr.1: -f option documented. + +2009-10-14 Kaz Kylheku + + * lib.c (obj_print, obj_pprint): Print # + syntax if an object has a bad type code; do not just return + without printing anything. + +2009-10-14 Kaz Kylheku + + Code cleanup and documentation. + + * txr.1: Start documenting quasiliterals, exception handling and + nothrow in next and output. + * parser.y (catch_clauses_opt): Add missing empty production, so that + a try block doesn't have to have a finally clause. + * lib.h (or2, or3, or4): New macros. + * match.c (match_files): Allow output and next forms which just + have one argument that is nothrow, as documented. + * stream.c common_vformat, string_out_vcformat, string_out_vcformat, + make_string_output_stream, make_dir_stream, close_stream, get_line, + vformat, vcformat, format, cformat, put_string, put_cstring, + put_char): Switch to new style type assertions. + +2009-10-13 Kaz Kylheku + + New syntax for next and output directives, taking advantage + of quasi-literals. Non-throwing behavior can be specified in + both using nothrow. The old syntax is supported, and has + the old semantics (non-throwing). Hence, the test cases + pass again without modification. + + File open errors thrown as file_error type. + + * lib.c (nothrow, file_error): New symbol globals. + (obj_init): New symbols interned. + * lib.h (nothrow, file_error): Declared. + * match.c (file_err): New function. + (eval_form): Bugfix: if input is nil, or an atom other than a symbol, + return the value hoisted into a cons. A nil return strictly means, + unbound variable. + (match_files): Support new syntax for next and and output. + Throw open errors as file_err. + * parser.l (grammar): Change how OUTPUT is returned to the + style similar to DEFINE, so interior forms can be parsed. + * parser.y (grammar): Fix up output_clause with new syntax. + * unwind.c (uw_throw): Do not abort on unhandled file_error, + but terminate with a failed status. + (uw_init): Register file_error as a subtype of error exception. + +2009-10-13 Kaz Kylheku + + First cut at working try/catch/finally implementation. + + * lib.c (try, catch, finally): New symbol globals. + (obj_init): New symbols interned. + * lib.h (try, catch, finally: Declared. + * parser.y (TRY, CATCH, FINALLY): New tokens. + (try_clause, catch_clauses_opt): New nonterminal grammar symbols. + * parser.l (yybadtoken): TRY, CATCH and FINALLY handled. + (grammar): New cases for try, catch and finally. + * unwind.h (struct uw_catch): New member called visible. + (uw_continue): New parameter added. + (uw_exception_subtype_p): Declared. + (uw_catch_begin): Macro rewritten to use switch logic + around setjmp. + (uw_do_unwind, uw_catch, uw_unwind): New macros. + (uw_catch_end): Rewritten to close switch, and automatically + continue the unwinding if the block is entered as an unwind. + * unwind.c (uw_unwind_to_exit_point): Exception catching + frames made invisible via new flag prior to control passing to them. + longjmp code 2 introduced for distinguishing a catch from + an unwind. Visibility flag is checked and invisible frames + are skipped. + (uw_push_catch): cont member of the unwind frame initialized to zero. + (exception_subtype_p): Renamed to uw_exception_subtype_p, changed + to extern. Fixed wrong order of arguments to assoc. + (uw_throw): Honor visibility flag: do not consider invisible + catch frames. + (uw_register_subtype): sup/sub mixup bugfix. + (uw_continue): Takes extra argument: the continuation frame + that (re)establishes the exit point for the unwinding. + This allows nested unwinding action to take place in a finally, + and then to continue to the original exit point. + * match.c (match_files): Handling for try directive added. + +2009-10-13 Kaz Kylheku + + * parser.l (yybadtoken): Bugfix: added missing LITCHAR case. + * unwind.h (internal_error): Fixed broken macro. + * match.c (match_line, match_files): sem_error bugfix: used %a instead + of ~a. + (match_files): Wrap block handler in compound statement, otherwise the + macroexpansion declares a variable in the middle of a statement, which + is a gcc extension to C90 (or a C99 feature, + but we aren't using C99). + +2009-10-08 Kaz Kylheku + + Exception handling for query errors. + Verbose logging decoupled from yyerror functions. + Superior object-oriented formatting used for cleaner code. + + * lib.c (query_error): New symbol global. + (obj_init): New symbol interned. + * lib.h (query_error): Declared. + * match.c (output_produced): Variable changed to external linkage. + (debugf, debuglf, debuglcf, sem_error): New static functions. + (dest_bind, match_line, match_files): Regtargetted away from + the yyerrorf and yyerrorlf functions to use debugf, + debuglf, debuglcf for logging and sem_error for throwing + query errors as exceptions. + * parser.h (spec_file_str): New global declared. + * parser.l (yyerror): Calls yyerrorf instead of yyerrorlf; + lets yyerrorf increment error count. + (yyerrorf): Loses level argument. + (yyerrorlf): Function removed. + (yybadtoken): Retargetted from yyerrorlf to yyerrorf. + (grammar): yyerrorf call fixed up. + * txr.c (spec_file_str): New global defined. + (main): Protects new global against gc, and initializes it. + * unwind.c (uw_throw): If an unhandled exception is of + type query_error, it results in an exit rather than abort. + The false string is conditionally printed. + (uw_init): Register query_error as subtype of error. + +2009-10-08 Kaz Kylheku + + Exception handling framework implemented. + + * lib.c (cobj_t, error, type_error, internal_err, numeric_err, + range_err): New symbol globals. + (prog_string): New string global. + (code2type): New static function. + (typeof): Rewritten using code2type. + (type_check, type_check2): New static functions. + (car, cdr, list, plus, minus, length_str, chr_p, chr_str, + chr_str_set, apply, funcall, funcall1, funcall2, + vec_get_fill, vecref_l, lazy_stream_cons): Checks and + assertions rewritten using new functions and macros. + (obj_init): prog_string protected from gc. + New symbols interned. + (init): uw_init() call moved after obj_init() because + it needs stable symbols. + * lib.h (cobj_t, error, type_error, internal_err, numeric_err, + range_err, prog_string, type_check, type_check2): Declared. + * match.c (dump_var, complex_snarf, complex_close): abort + calls rewritten to use exception handling. + * regex.c (nfa_all_states, nfa_closure, nfa_move): Likewise. + * stream.c (string_out_vcformat): Bugfix: fill index not updated. + (make_string_output_stream): Bugfix: initial buffer not null terminated. + (get_string_from_stream): New function. + * stream.h (get_string_from_stream): Declared. + * txr.c (main): Some error prints turned to throws. + * unwind.c (unwind_to_exit_point): Supports UW_CATCH frames, + whose finalization logic has to be invoked during unwinding, + and as target exit points. + (uw_init): Installs exception symbols into + subtyping hirearchy. + (uw_push_catch, exception_subtype_p, uw_throw, uw_throwf, + uw_errorf, uw_throwcf, uw_errorcf, type_mismatch, + uw_register_subtype, uw_continue): New functions. + (exception_subtypes): New static global. + * unwind.h (noreturn): New macro, conditionally defined on __GNUC__. + (enum uw_frtype): New member, UW_CATCH. + (struct uw_catch): New struct type. + (union uw_frame): New member, ca. + (uw_push_catch, exception_subtype_p, uw_throw, uw_throwf, + uw_errorf, uw_throwcf, uw_errorcf, type_mismatch, + uw_register_subtype, uw_continue): New functions declared. + (uw_catch_begin, uw_catch_end, internal_error, type_assert, + bug_unless, numeric_assert, range_bug_unless): New macros. + +2009-10-07 Kaz Kylheku + + Rewritten streams implementation. + + * stream.h, stream.c: New files. + * Makefile (OBJS): New object file stream.o. + * dep.mk: Dependencies updated. + * gc.c (finalize): STREAM case removed. Call destroy only if not null. + (mark_obj): STREAM case removed. + * lib.c (push, pop): New functions. + (equal): STREAM case removed. + (sub_str): Allow from parameter to be nil, defaulting to zero. + (stdio_line_read, stdio_line_write, stdio_close, stdio_line_stream, + pipe_close, pipe_line_stream, dirent_read, dirent_close, + dirent_stream, stream_get, stream_pushback, stream_put, + stream_close): Functions removed. + (stream_ops dirent_stream_ops, stdio_line_stream_ops, + struct stream_ops, pipe_line_stream_op): Static structs removed. + (lazy_stream_func, lazy_stream_cons): Retargetted to new streams. + (cobj_print_op): Likewise. + (init): Disables and restores GC, instead of doing it in obj_init. + (obj_print): Retargetted to new streams. + (obj_pprint): New function. + (obj_init): Does not manipulate gc_state any more, moved to init. + Call to stream_init added. + (d, snarf): Retargetted to new streams. + (snarf_line): Removed, now appears in stream.c, retargetted + to new streams. + * lib.h (enum type): STREAM removed. + (struct stream, struct stream_ops): Removed. + (struct cobj_ops): Retargetted to new streams. + (union obj): sm member removed. + (push, pop, obj_pprint): Declared. + (stdio_line_stream, pipe_line_stream, dirent_stream, stream_get, + stream_pushback, stream_put, stream_close, snarf_line): Removed. + (cobj_print_op, dump, snarf): Modified. + * match.c (dump_bindings, complex_snarf): Retargetted to new streams. + * txr.c (main): format used to dump bindings and specs in verbose mode. + +2009-10-07 Kaz Kylheku + + Implemented quasi-literals: string literals which may + contain variables to be interpolated. + + Also, took care of a hygiene problem with respect to some + parser-generated forms, which must be invisible to the user. + + * Makefile (LEX_DB_FLAGS): New variable; helpful + in generating a lexical analyzer with debug tracing. + * parser.l (nesting, closechar): Static variables removed. + (char_esc): Add \` escape for quasi-literals. + (stack): New %option, to generate a scanner which has + a start condition stack. + (QSILIT): New start condition. + (grammar): Refactored to use start condition stacks. + Quasi-literal lexical analysis added. + * parser.y (lit_char_helper): New function, for factoring out + some common logic between string literals and quasi literals. + (quasilit, quasi_item, quasi_items): New grammar symbols and + production rules. + (strlit): Rule shortened with new helper function. + Bugfix: error case assigns nil to $$. + (chrlist): Bugfix: error case assigns nil to $$. + (LITCHAR): Added to %prec table to fix shift-reduce problem. + (expr): Production now can generate a quasilit. + * lib.c (quasi): New symbol global. + (obj_init): Intern quasi as "$quasi", so the user can + make a function called quasi. Also, var and regex are now interned + with the names "$var" and "$regex" for the same reason. + * lib.h (quasi): Declared. + * match.c (eval_form): Rewritten with recursive processing + to handle deeply embedded variables, as well as quasi-strings. + (subst_vars): Handles quasi-strings. + (match_files): Function calls now use eval_form for function + argument evaluation, except of course in the special case that if an + argument is a symbol, it may be unbound. + +2009-10-06 Kaz Kylheku + + * match.c (match_files): No error message for merging to + a symbol which is already bound; the existing behavior + is to destructively update the binding, which is useful, + and so the error is pointless. + +2009-10-06 Kaz Kylheku + + Introduce local as synonym to forget. It does exactly the + same thing; a previous binding is forgotten. This spelling + is nicer for functions. + * lib.h (local): Declared. + * lib.c (local): Defined. + (obj_init): New symbol interned. + +2009-10-06 Kaz Kylheku + + Bugfix: function parameter reconciliation (after function call + completes) must only consider the unbound parameters. + Otherwise false mismatches result if the function destructively + manipulated some bindings of bound parameters. + E.g. @(define foo (a)) is called as @(foo "bar") and internally + it rebinds bound parameter a to "baz". This situation is + not a mismatch. The rebinding is thrown away. + + * match.c (match_files): When processing a function call, + keep an alist which associates arguments and unbound parameters. + Then, after the function call, process the alist, rather + than the full parameter list. + +2009-10-06 Kaz Kylheku + + Semantics change: collect fails if it does not collect + anything. Non-failing behavior can be obtained by + wrapping with @(maybe) (but no such workaround for coll yet). + + * match.c (match_line): Return nil if coll collected nothing. + (match_files): Return nil if collect collected nothing. + + +2009-10-06 Kaz Kylheku + + Bugfix: nil must be on the list of interned symbols. + + * lib.c (sym_name): Function removed. This was like + symbol_name but did not accept nil. + (intern): Use symbol_name instead of sym_name, allowing + nil to be on the list of interned symbols. + (obj_init): Add nil to interned_syms list. + (nil_string): Changed from "NIL" to "nil". + * match.c (dest_bind): Treat nil as a value, not a symbol. + (match_files): Treat nil as a value when it's + a function argument. + +2009-10-06 Kaz Kylheku + + * gc.c (more): Bugfix: free_tail was incorectly calculated, + thereby destroying the validity of the FIFO recycling algorithm + used when GC debugging is enabled. This showed up as mysterious + assertions and crashes. + (mark_obj): Do not abort if a free object is marked. + (mark_mem_region): Renamed bottom and top variables to low + and high. The naming was confusing inverted relative + to that in the caller. + (sweep): Abort if somehow a block is free and marked reachable. + +2009-10-06 Kaz Kylheku + + * match.c (match_files): Fixed nonexitent symbol warning for merge + directive (complained about wrong symbol). + +2009-10-05 Kaz Kylheku + + Refactoring matching code. + + * lib.h (cobj_ops): New function pointer, mark. + * gc.c (mark_obj): For a COBJ type, call the mark function + if the pointer is non-null. + (gc_mark): New public function, wrapper that calls + the private mark_obj. Implementations of mark for COBJ + objects will need to call this. + * gc.h (mark_obj): Declared. + * regex.c (regex_obj_ops): Explicitly initialize mark function pointer + to null. + +2009-10-05 Kaz Kylheku + + Code restructuring. + + * Makefile (match.o): New object file. + (depend): New rule for generating dep.mk, using txr. + (lib.o, lex.yy.o, regex.o, y.tab.o unwind.o, txr.o, match.o, gc.o): + Dependency rules removed. + * dep.mk: New make include file; captures dependencies. Generated + by new depend rule in Makefile, using txr. + * depend.txr: Txr query to generate dependencies. + * extract.y: File renamed to parser.y + (output_produced): Variable removed, + moved into new file match.c. + (dump_shell_string, dump_shell_string, dump_var, dump_bindings, depth, + weird_merge, map_leaf_lists, dest_bind, eval_form, match_line, + format_field, subs_vars, complex_open, complex_open_failed, + complex_close, complex_snarf, robust_length, bind_car, bind_cdr, + extract_vars, extract_bindings, do_output_line, do_output, + match_files, extract): Functions removed, added to match.c. + (struct fpip): Definition removed, added to match.c + (, , , , , + "gc.h", "unwind.h"): Unneeded headers removed. + * match.c: New file. + * extract.l: Renamed to parser.l. + * extract.h: Renamed to parser.h. + (opt_loglevel, opt_nobindings, opt_arraydims, version, progname): + Declarations moved to txr.h. + (extract): Dclaration moved to match.h. + * txr.h, match.h: New headers. + * gc.h (opt_gc_debug): Moved to txr.h. + 2009-10-03 Kaz Kylheku Version 014 diff --git a/Makefile b/Makefile index d4fd87d4..b36dc41f 100644 --- a/Makefile +++ b/Makefile @@ -25,40 +25,36 @@ # Test data in the tests/ directory is in the public domain, # unless it contains notices to the contrary. + OPT_FLAGS := -O2 LANG_FLAGS := -ansi -D_GNU_SOURCE DIAG_FLAGS := -Wall DBG_FLAGS := -g +LEX_DBG_FLAGS := TXR_DBG_OPTS := --gc-debug LEXLIB := fl CFLAGS := $(LANG_FLAGS) $(DIAG_FLAGS) $(OPT_FLAGS) $(DBG_FLAGS) -txr: lex.yy.o y.tab.o lib.o regex.o gc.o unwind.o +OBJS := txr.o lex.yy.o y.tab.o match.o lib.o regex.o gc.o unwind.o stream.o +txr: $(OBJS) $(CC) $(CFLAGS) -o $@ $^ -l$(LEXLIB) -lex.yy.o y.tab.o: y.tab.h extract.h lib.h gc.h - -y.tab.o: regex.h - -lib.o: lib.h gc.h - -regex.o: regex.h lib.h gc.h +-include dep.mk -gc.o: gc.h lib.h gc.h +lex.yy.c: parser.l + $(LEX) $(LEX_DBG_FLAGS) $< -unwind.o: unwind.h lib.h - -lex.yy.c: extract.l - $(LEX) $< - -y.tab.c y.tab.h: extract.y +y.tab.c y.tab.h: parser.y if $(YACC) -v -d $< ; then true ; else rm $@ ; false ; fi clean: - rm -f txr lex.yy.o y.tab.o lib.o regex.o gc.o unwind.o \ + rm -f txr $(OBJS) \ y.tab.c lex.yy.c y.tab.h y.output $(TESTS:.ok=.out) +depend: txr + ./txr depend.txr > dep.mk + TESTS := $(patsubst %.txr,%.ok,$(shell find tests -name '*.txr' | sort)) tests: txr $(TESTS) diff --git a/dep.mk b/dep.mk new file mode 100644 index 00000000..80e48327 --- /dev/null +++ b/dep.mk @@ -0,0 +1,9 @@ +lib.o: lib.h gc.h unwind.h stream.h +lex.yy.o: y.tab.h lib.h gc.h parser.h +regex.o: lib.h unwind.h regex.h +y.tab.o: lib.h regex.h parser.h +unwind.o: lib.h gc.h stream.h unwind.h +txr.o: lib.h stream.h gc.h unwind.h parser.h match.h txr.h +match.o: lib.h gc.h unwind.h regex.h stream.h parser.h txr.h match.h +stream.o: lib.h gc.h stream.h +gc.o: lib.h stream.h txr.h gc.h diff --git a/depend.txr b/depend.txr new file mode 100644 index 00000000..7fa2183c --- /dev/null +++ b/depend.txr @@ -0,0 +1,11 @@ +@(next)$. +@(collect) +@file.c +@(next)@file.c +@(collect) +#include "@header" +@(end) +@(output) +@file.o:@(rep) @header@(end) +@(end) +@(end) diff --git a/extract.h b/extract.h deleted file mode 100644 index cff5d432..00000000 --- a/extract.h +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2009 - * Kaz Kylheku - * Vancouver, Canada - * All rights reserved. - * - * BSD License: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - */ - -#include -long lineno; -extern int opt_loglevel; -extern int opt_nobindings; -extern int opt_arraydims; -int yyparse(void); -obj_t *get_spec(void); -int extract(obj_t *spec, obj_t *filenames, obj_t *bindings); -void yyerrorf(int level, const char *s, ...); -void yyerrorlf(int level, long spec_lineno, const char *s, ...); -void yybadtoken(int tok, const char *context); diff --git a/extract.l b/extract.l deleted file mode 100644 index ab041bb9..00000000 --- a/extract.l +++ /dev/null @@ -1,760 +0,0 @@ -/* Copyright 2009 - * Kaz Kylheku - * Vancouver, Canada - * All rights reserved. - * - * BSD License: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - */ - -%{ - -#include -#include -#include -#include -#include -#include -#include -#include "y.tab.h" -#include "lib.h" -#include "gc.h" -#include "extract.h" - -#define YY_NO_UNPUT - -const char *version = "014"; -const char *progname = "txr"; -const char *spec_file = "stdin"; -long lineno = 1; -int opt_loglevel = 1; /* 0 - quiet; 1 - normal; 2 - verbose */ -int opt_nobindings = 0; -int opt_arraydims = 1; - -static int nesting; -static int closechar; -static int errors; - -/* - * Can implement an emergency allocator here from a fixed storage - * pool, which sets an OOM flag. Program can check flag - * and gracefully terminate instead of aborting like this. - */ -void *oom_realloc_handler(void *old, size_t size) -{ - fprintf(stderr, "%s: out of memory\n", progname); - puts("false"); - abort(); -} - -void yyerror(const char *s) -{ - yyerrorlf(1, lineno, "%s", s); - errors++; -} - -void yyerrorf(int level, const char *s, ...) -{ - if (opt_loglevel >= level) { - va_list vl; - va_start (vl, s); - fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, lineno); - vfprintf(stderr, s, vl); - putc('\n', stderr); - va_end (vl); - } - - if (level < 2) - errors++; -} - -void yyerrorlf(int level, long spec_lineno, const char *s, ...) -{ - if (opt_loglevel >= level) { - va_list vl; - va_start (vl, s); - fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, spec_lineno); - vfprintf(stderr, s, vl); - putc('\n', stderr); - va_end (vl); - } - - if (level < 2) - errors++; -} - -void yybadtoken(int tok, const char *context) -{ - const char *problem = 0; - - switch (tok) { - case TEXT: problem = "text"; break; - case IDENT: problem = "identifier"; break; - case ALL: problem = "\"all\""; break; - case SOME: problem = "\"some\""; break; - case NONE: problem = "\"none\""; break; - case MAYBE: problem = "\"maybe\""; break; - case CASES: problem = "\"cases\""; break; - case AND: problem = "\"and\""; break; - case OR: problem = "\"or\""; break; - case END: problem = "\"end\""; break; - case COLLECT: problem = "\"collect\""; break; - case UNTIL: problem = "\"until\""; break; - case COLL: problem = "\"coll\""; break; - case OUTPUT: problem = "\"output\""; break; - case REPEAT: problem = "\"repeat\""; break; - case REP: problem = "\"rep\""; break; - case SINGLE: problem = "\"single\""; break; - case FIRST: problem = "\"first\""; break; - case LAST: problem = "\"last\""; break; - case EMPTY: problem = "\"empty\""; break; - case DEFINE: problem = "\"define\""; break; - case NUMBER: problem = "\"number\""; break; - case REGCHAR: problem = "regular expression character"; break; - } - - if (problem != 0) - if (context) - yyerrorlf(1, lineno, "misplaced %s in %s", problem, context); - else - yyerrorlf(1, lineno, "unexpected %s", problem); - else - if (context) - yyerrorlf(1, lineno, "unterminated %s", context); - else - yyerrorlf(1, lineno, "unexpected end of input"); -} - -static int char_esc(int letter) -{ - switch (letter) { - case 'a': return '\a'; - case 'b': return '\b'; - case 't': return '\t'; - case 'n': return '\n'; - case 'v': return '\v'; - case 'f': return '\f'; - case 'r': return '\r'; - case 'e': return 27; - case '"': return '"'; - case '\'': return '\''; - } - - abort(); -} - -static int num_esc(char *num) -{ - if (num[0] == 'x') { - if (strlen(num) > 3) - yyerror("too many digits in hex character escape"); - return strtol(num + 1, 0, 16); - } else { - if (strlen(num) > 3) - yyerror("too many digits in octal character escape"); - return strtol(num, 0, 8); - } -} - -%} - -TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+ -ID_END [^a-zA-Z0-9_] -NUM_END [^0-9] -WS [\t ]* -HEX [0-9A-Fa-f] -OCT [0-7] - -%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT - -%% - -{TOK} { - long val; - char *errp; - - errno = 0; - - val = strtol(yytext, &errp, 10); - - if (nesting == 0) - BEGIN(INITIAL); - - if (*errp != 0) { - /* not a number */ - yylval.lexeme = strdup(yytext); - return IDENT; - } - - if ((val == LONG_MAX || val == LONG_MIN) - && errno == ERANGE) - yyerror("numeric overflow in token"); - - yylval.num = val; - return NUMBER; - } - - -\({WS}all{WS}\) { - BEGIN(INITIAL); - return ALL; - } - -\({WS}some{WS}\) { - BEGIN(INITIAL); - return SOME; - } - -\({WS}none{WS}\) { - BEGIN(INITIAL); - return NONE; - } - -\({WS}maybe{WS}\) { - BEGIN(INITIAL); - return MAYBE; - } - -\({WS}cases{WS}\) { - BEGIN(INITIAL); - return CASES; - } - -\({WS}and{WS}\) { - BEGIN(INITIAL); - return AND; - } - -\({WS}or{WS}\) { - BEGIN(INITIAL); - return OR; - } - -\({WS}end{WS}\) { - BEGIN(INITIAL); - return END; - } - -\({WS}collect{WS}\) { - BEGIN(INITIAL); - return COLLECT; - } - -\({WS}coll{WS}\) { - BEGIN(INITIAL); - return COLL; - } - -\({WS}until{WS}\) { - BEGIN(INITIAL); - return UNTIL; - } - -\({WS}output{WS}\) { - BEGIN(INITIAL); - return OUTPUT; - } - -\({WS}repeat{WS}\) { - BEGIN(INITIAL); - return REPEAT; - } - - -\({WS}rep{WS}\) { - BEGIN(INITIAL); - return REP; - } - -\({WS}single{WS}\) { - BEGIN(INITIAL); - return SINGLE; - } - -\({WS}first{WS}\) { - BEGIN(INITIAL); - return FIRST; - } - -\({WS}last{WS}\) { - BEGIN(INITIAL); - return LAST; - } - -\({WS}empty{WS}\) { - BEGIN(INITIAL); - return EMPTY; - } - -\({WS}define/{ID_END} { - nesting++; - closechar = ')'; - BEGIN(NESTED); - return DEFINE; - } - -\{|\( { - nesting++; - if (yytext[0] == '{') - closechar = '}'; - else - closechar = ')'; - BEGIN(NESTED); - return yytext[0]; - } - -\}|\) { - if (yytext[0] != closechar) { - yyerror("paren mismatch"); - BEGIN(INITIAL); - } else { - switch (--nesting) { - case 1: - BEGIN(SPECIAL); - break; - case 0: - BEGIN(INITIAL); - break; - } - - return yytext[0]; - } - } - -[\t ]+ { /* Eat whitespace in directive */ } - -\" { - BEGIN(STRLIT); - return '"'; - } - -\' { - BEGIN(CHRLIT); - return '\''; - } - -@ { - if (nesting == 0) { - BEGIN(INITIAL); - yylval.lexeme = strdup("@"); - return TEXT; - } - } - -\n { - lineno++; - } - -[/] { - BEGIN(REGEX); - return '/'; - } - -\. { - yylval.chr = '.'; - return '.'; - } - -[\\][abtnvfre] { - char lexeme[2]; - lexeme[0] = char_esc(yytext[1]); - lexeme[1] = 0; - yylval.lexeme = strdup(lexeme); - BEGIN(INITIAL); - return TEXT; - } - -[\\](x{HEX}+|{OCT}+) { - char lexeme[2]; - lexeme[0] = num_esc(yytext + 1); - lexeme[1] = 0; - yylval.lexeme = strdup(lexeme); - BEGIN(INITIAL); - return TEXT; - } - -. { - yyerrorf(0, "bad character in directive: '%c'", - yytext[0]); - } - -[/] { - if (nesting == 0) - BEGIN(INITIAL); - else - BEGIN(NESTED); - yylval.chr = '/'; - return '/'; - } - - -[\\][abtnvfre] { - yylval.chr = char_esc(yytext[1]); - return REGCHAR; - } - -[\\](x{HEX}+|{OCT}+) { - yylval.chr = num_esc(yytext + 1); - return REGCHAR; - } - -\n { - lineno++; - yyerror("newline in regex"); - } - -[.*?+^] { - yylval.chr = yytext[0]; - return yytext[0]; - } - - -[\[\]\-] { - yylval.chr = yytext[0]; - return yytext[0]; - } - -[()|] { - yylval.chr = yytext[0]; - return yytext[0]; - } - -[\\]. { - yylval.chr = yytext[1]; - return REGCHAR; - } - -. { - yylval.chr = yytext[0]; - return REGCHAR; - } - -[^@\n]+ { - yylval.lexeme = strdup(yytext); - return TEXT; - } - -\n { - lineno++; - return '\n'; - } - -@{WS}\* { - BEGIN(SPECIAL); - return '*'; - } - -@ { - BEGIN(SPECIAL); - } - -^@#.*\n { - /* eat whole line comment */ - lineno++; - } - -@#.* { - /* comment to end of line */ - } - -\" { - if (nesting == 0) - BEGIN(INITIAL); - else - BEGIN(NESTED); - return '"'; - } - -\' { - if (nesting == 0) - BEGIN(INITIAL); - else - BEGIN(NESTED); - return '\''; - } - -[\\][abtnvfre] { - yylval.chr = char_esc(yytext[1]); - return LITCHAR; - } - -[\\](x{HEX}+|{OCT}+) { - yylval.chr = num_esc(yytext + 1); - return LITCHAR; - } -\n { - yyerror("newline in string literal"); - lineno++; - yylval.chr = yytext[0]; - return LITCHAR; - } -\n { - yyerror("newline in character literal"); - lineno++; - yylval.chr = yytext[0]; - return LITCHAR; - } -. { - yylval.chr = yytext[0]; - return LITCHAR; - } - -%% - -void help(void) -{ - const char *text = -"\n" -"txr version %s\n" -"\n" -"copyright 2009, Kaz Kylheku \n" -"\n" -"usage:\n" -"\n" -" %s [ options ] query-file { data-file }*\n" -"\n" -"The query-file or data-file arguments may be specified as -, in which case\n" -"standard input is used. If these arguments end with a | character, then\n" -"they are treated as command pipes. Leading arguments which begin with a -\n" -"followed by one or more characters, and which are not arguments to options\n" -"are interpreted as options. The -- option indicates the end of the options.\n" -"\n" -"If no data-file arguments sare supplied, then the query itself must open a\n" -"a data source prior to attempting to make any pattern match, or it will\n" -"simply fail due to a match which has run out of data.\n" -"\n" -"options:\n" -"\n" -"-Dvar=value Pre-define variable var, with the given value.\n" -" A list value can be specified using commas.\n" -"-Dvar Predefine variable var, with empty string value.\n" -"-q Quiet: don't report errors during query matching.\n" -"-v Verbose: extra logging from matcher.\n" -"-b Don't dump list of bindings.\n" -"-a num Generate array variables up to num-dimensions.\n" -" Default is 1. Additional dimensions are fudged\n" -" by generating numeric suffixes\n" -"--help You already know!\n" -"--version Display program version\n" -"\n" -"Options that take no argument can be combined. The -q and -v options\n" -"are mutually exclusive; the right-most one dominates.\n" -"\n" - ; - fprintf(stdout, text, version, progname); -} - -void hint(void) -{ - fprintf(stderr, "%s: incorrect arguments: try --help\n", progname); -} - -int main(int argc, char **argv) -{ - obj_t *stack_bottom_0 = nil; - obj_t *spec = nil; - obj_t *bindings = nil; - int match_loglevel = opt_loglevel; - progname = argv[0] ? argv[0] : progname; - obj_t *stack_bottom_1 = nil; - - init(progname, oom_realloc_handler, &stack_bottom_0, &stack_bottom_1); - - if (argc <= 1) { - hint(); - return EXIT_FAILURE; - } - - argc--, argv++; - - while (argc > 0 && (*argv)[0] == '-') { - if (!strcmp(*argv, "--")) { - argv++, argc--; - break; - } - - if (!strcmp(*argv, "-")) - break; - - if (!strncmp(*argv, "-D", 2)) { - char *var = *argv + 2; - char *equals = strchr(var, '='); - char *has_comma = (equals != 0) ? strchr(equals, ',') : 0; - - if (has_comma) { - char *val = equals + 1; - obj_t *list = nil; - - *equals = 0; - - for (;;) { - size_t piece = strcspn(val, ","); - char comma_p = val[piece]; - - val[piece] = 0; - - list = cons(string(strdup(val)), list); - - if (!comma_p) - break; - - val += piece + 1; - } - - list = nreverse(list); - bindings = cons(cons(intern(string(strdup(var))), list), bindings); - } else if (equals) { - char *val = equals + 1; - *equals = 0; - bindings = cons(cons(intern(string(strdup(var))), - string(strdup(val))), bindings); - } else { - bindings = cons(cons(intern(string(strdup(var))), - null_string), bindings); - } - - argc--, argv++; - continue; - } - - if (!strcmp(*argv, "--version")) { - printf("%s: version %s\n", progname, version); - return 0; - } - - if (!strcmp(*argv, "--help")) { - help(); - return 0; - } - - if (!strcmp(*argv, "-a")) { - long val; - char *errp; - char opt = (*argv)[1]; - - if (argc == 1) { - fprintf(stderr, "%s: option %c needs argument\n", progname, opt); - - return EXIT_FAILURE; - } - - argv++, argc--; - - switch (opt) { - case 'a': - val = strtol(*argv, &errp, 10); - if (*errp != 0) { - fprintf(stderr, "%s: option %c needs numeric argument, not %s\n", - progname, opt, *argv); - return EXIT_FAILURE; - } - - opt_arraydims = val; - break; - } - - argv++, argc--; - continue; - } - - if (!strcmp(*argv, "--gc-debug")) { - opt_gc_debug = 1; - argv++, argc--; - continue; - } - - { - char *popt; - for (popt = (*argv)+1; *popt != 0; popt++) { - switch (*popt) { - case 'v': - match_loglevel = 2; - break; - case 'q': - match_loglevel = 0; - break; - case 'b': - opt_nobindings = 1; - break; - case '-': - fprintf(stderr, "%s: unrecognized long option: --%s\n", - progname, popt + 1); - return EXIT_FAILURE; - default: - fprintf(stderr, "%s: unrecognized option: %c\n", progname, *popt); - return EXIT_FAILURE; - } - } - - argc--, argv++; - } - } - - if (argc < 1) { - hint(); - return EXIT_FAILURE; - } - - if (strcmp(*argv, "-") != 0) { - yyin = fopen(*argv, "r"); - if (yyin == 0) { - fprintf(stderr, "%s: unable to open %s\n", progname, *argv); - return EXIT_FAILURE; - } - spec_file = *argv; - } - - argc--, argv++; - - { - int gc; - - gc = gc_state(0); - yyparse(); - gc_state(gc); - - if (errors) - return EXIT_FAILURE; - spec = get_spec(); - - - opt_loglevel = match_loglevel; - - if (opt_loglevel >= 2) { - fputs("spec:\n", stderr); - dump(spec, stderr); - - fputs("bindings:\n", stderr); - dump(bindings, stderr); - } - - { - int retval; - list_collect_decl(filenames, iter); - - while (*argv) - list_collect(iter, string(*argv++)); - - retval = extract(spec, filenames, bindings); - - return errors ? EXIT_FAILURE : retval; - } - } -} diff --git a/extract.y b/extract.y deleted file mode 100644 index 5ac61b2b..00000000 --- a/extract.y +++ /dev/null @@ -1,1850 +0,0 @@ -/* Copyright 2009 - * Kaz Kylheku - * Vancouver, Canada - * All rights reserved. - * - * BSD License: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - */ - -%{ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "lib.h" -#include "gc.h" -#include "unwind.h" -#include "regex.h" -#include "extract.h" - -int yylex(void); -void yyerror(const char *); - -obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts); -obj_t *define_transform(obj_t *define_form); - -static obj_t *parsed_spec; -static int output_produced; - -%} - -%union { - char *lexeme; - union obj *obj; - char chr; - long num; -} - -%token TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT -%token UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE -%token NUMBER -%token REGCHAR LITCHAR - -%type spec clauses clause all_clause some_clause none_clause maybe_clause -%type cases_clause collect_clause clause_parts additional_parts -%type output_clause define_clause line elems_opt elems elem var var_op -%type list exprs expr out_clauses out_clauses_opt out_clause -%type repeat_clause repeat_parts_opt o_line -%type o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt -%type regex regexpr regbranch -%type regterm regclass regclassterm regrange -%type strlit chrlit litchars -%type regchar -%nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL -%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE -%nonassoc '{' '}' '[' ']' '(' ')' -%right IDENT TEXT NUMBER -%left '|' '/' -%right '*' '?' '+' -%right '^' '.' '\\' REGCHAR - -%% - -spec : clauses { parsed_spec = $1; } - | { parsed_spec = nil; } - | error { parsed_spec = nil; - yybadtoken(yychar, 0); } - ; - -clauses : clause { $$ = cons($1, nil); } - | clause clauses { $$ = cons($1, $2); } - ; - -clause : all_clause { $$ = list(num(lineno - 1), $1, nao); } - | some_clause { $$ = list(num(lineno - 1), $1, nao); } - | none_clause { $$ = list(num(lineno - 1), $1, nao); } - | maybe_clause { $$ = list(num(lineno - 1), $1, nao); } - | cases_clause { $$ = list(num(lineno - 1), $1, nao); } - | collect_clause { $$ = list(num(lineno - 1), $1, nao); } - | define_clause { $$ = list(num(lineno - 1), - define_transform($1), nao); } - | output_clause { $$ = list(num(lineno - 1), $1, nao); } - | line { $$ = $1; } - | repeat_clause { $$ = nil; - yyerror("repeat outside of output"); } - ; - -all_clause : ALL newl clause_parts { $$ = cons(all, $3); } - | ALL newl error { $$ = nil; - yybadtoken(yychar, - "all clause"); } - | ALL newl END newl { $$ = nil; - yyerror("empty all clause"); } - - ; - -some_clause : SOME newl clause_parts { $$ = cons(some, $3); } - | SOME newl error { $$ = nil; - yybadtoken(yychar, - "some clause"); } - | SOME newl END newl { $$ = nil; - yyerror("empty some clause"); } - ; - -none_clause : NONE newl clause_parts { $$ = cons(none, $3); } - | NONE newl error { $$ = nil; - yybadtoken(yychar, - "none clause"); } - | NONE newl END newl { $$ = nil; - yyerror("empty none clause"); } - ; - -maybe_clause : MAYBE newl clause_parts { $$ = cons(maybe, $3); } - | MAYBE newl error { $$ = nil; - yybadtoken(yychar, - "maybe clause"); } - | MAYBE newl END newl { $$ = nil; - yyerror("empty maybe clause"); } - ; - -cases_clause : CASES newl clause_parts { $$ = cons(cases, $3); } - | CASES newl error { $$ = nil; - yybadtoken(yychar, - "cases clause"); } - | CASES newl END newl { $$ = nil; - yyerror("empty cases clause"); } - ; - -collect_clause : COLLECT newl clauses END newl { $$ = list(collect, $3, nao); } - | COLLECT newl clauses - UNTIL newl clauses END newl { $$ = list(collect, $3, - $6, nao); } - | COLLECT newl error { $$ = nil; - if (yychar == UNTIL || yychar == END) - yyerror("empty collect"); - else - yybadtoken(yychar, - "collect clause"); } - ; - -clause_parts : clauses additional_parts { $$ = cons($1, $2); } - ; - -additional_parts : END newl { $$ = nil; } - | AND newl clauses additional_parts { $$ = cons($3, $4); } - | OR newl clauses additional_parts { $$ = cons($3, $4); } - ; - -line : elems_opt '\n' { $$ = $1; } - ; - -elems_opt : elems { $$ = cons(num(lineno - 1), $1); } - | { $$ = nil; } - ; - -elems : elem { $$ = cons($1, nil); } - | elem elems { $$ = cons($1, $2); } - | rep_elem { $$ = nil; - yyerror("rep outside of output"); } - ; - -elem : TEXT { $$ = string($1); } - | var { $$ = $1; } - | list { $$ = $1; } - | regex { $$ = cons(regex_compile($1), $1); } - | COLL elems END { $$ = list(coll, $2, nao); } - | COLL elems - UNTIL elems END { $$ = list(coll, $2, $4, nao); } - | COLL error { $$ = nil; - yybadtoken(yychar, "coll clause"); } - ; - -define_clause : DEFINE exprs ')' newl - clauses - END newl { $$ = list(define, $2, $5, nao); } - | DEFINE ')' newl - clauses - END newl { $$ = list(define, nil, $4, nao); } - | DEFINE exprs ')' newl - END newl { $$ = list(define, $2, nao); } - | DEFINE ')' newl - END newl { $$ = list(define, nao); } - | DEFINE error { yybadtoken(yychar, "list expression"); } - | DEFINE exprs ')' newl - error { yybadtoken(yychar, "define"); } - | DEFINE ')' newl - error { yybadtoken(yychar, "define"); } - ; - -output_clause : OUTPUT o_elems '\n' - out_clauses - END newl { $$ = list(output, $4, $2, nao); } - | OUTPUT newl - out_clauses - END newl { $$ = list(output, $3, nao); } - | OUTPUT o_elems '\n' - error { $$ = nil; - yybadtoken(yychar, "output clause"); } - | OUTPUT newl - error { $$ = nil; - yybadtoken(yychar, "output clause"); } - ; - -out_clauses : out_clause { $$ = cons($1, nil); } - | out_clause out_clauses { $$ = cons($1, $2); } - ; - -out_clause : repeat_clause { $$ = list(num(lineno - 1), $1, nao); } - | o_line { $$ = $1; } - | all_clause { $$ = nil; - yyerror("match clause in output"); } - | some_clause { $$ = nil; - yyerror("match clause in output"); } - | none_clause { $$ = nil; - yyerror("match clause in output"); } - | maybe_clause { $$ = nil; - yyerror("match clause in output"); } - | cases_clause { $$ = nil; - yyerror("match clause in output"); } - | collect_clause { $$ = nil; - yyerror("match clause in output"); } - | define_clause { $$ = nil; - yyerror("match clause in output"); } - | output_clause { $$ = nil; - yyerror("match clause in output"); } - ; - -repeat_clause : REPEAT newl - out_clauses - repeat_parts_opt - END newl { $$ = repeat_rep_helper(repeat, $3, $4); } - | REPEAT newl - error { $$ = nil; - yybadtoken(yychar, "repeat clause"); } - ; - -repeat_parts_opt : SINGLE newl - out_clauses_opt - repeat_parts_opt { $$ = cons(cons(single, $3), $4); } - | FIRST newl - out_clauses_opt - repeat_parts_opt { $$ = cons(cons(frst, $3), $4); } - | LAST newl - out_clauses_opt - repeat_parts_opt { $$ = cons(cons(lst, $3), $4); } - | EMPTY newl - out_clauses_opt - repeat_parts_opt { $$ = cons(cons(empty, $3), $4); } - | /* empty */ { $$ = nil; } - ; - - -out_clauses_opt : out_clauses { $$ = $1; } - | /* empty */ { $$ = null_list; } - -o_line : o_elems_opt '\n' { $$ = $1; } - ; - -o_elems_opt : o_elems { $$ = cons(num(lineno - 1), $1); } - | { $$ = nil; } - ; - -o_elems_opt2 : o_elems { $$ = $1; } - | { $$ = null_list; } - ; - -o_elems : o_elem { $$ = cons($1, nil); } - | o_elem o_elems { $$ = cons($1, $2); } - ; - -o_elem : TEXT { $$ = string($1); } - | var { $$ = $1; } - | rep_elem { $$ = $1; } - ; - -rep_elem : REP o_elems - rep_parts_opt END { $$ = repeat_rep_helper(rep, $2, $3); } - | REP error { $$ = nil; yybadtoken(yychar, "rep clause"); } - ; - -rep_parts_opt : SINGLE o_elems_opt2 - rep_parts_opt { $$ = cons(cons(single, $2), $3); } - | FIRST o_elems_opt2 - rep_parts_opt { $$ = cons(cons(frst, $2), $3); } - | LAST o_elems_opt2 - rep_parts_opt { $$ = cons(cons(lst, $2), $3); } - | EMPTY o_elems_opt2 - rep_parts_opt { $$ = cons(cons(empty, $2), $3); } - | /* empty */ { $$ = nil; } - ; - - -/* This sucks, but factoring '*' into a nonterminal - * that generates an empty phrase causes reduce/reduce conflicts. - */ -var : IDENT { $$ = list(var, intern(string($1)), nao); } - | IDENT elem { $$ = list(var, intern(string($1)), $2, nao); } - | '{' IDENT '}' { $$ = list(var, intern(string($2)), nao); } - | '{' IDENT '}' elem { $$ = list(var, intern(string($2)), $4, nao); } - | '{' IDENT regex '}' { $$ = list(var, intern(string($2)), - nil, cons(regex_compile($3), $3), - nao); } - | '{' IDENT NUMBER '}' { $$ = list(var, intern(string($2)), - nil, num($3), nao); } - | var_op IDENT { $$ = list(var, intern(string($2)), - nil, $1, nao); } - | var_op IDENT elem { $$ = list(var, intern(string($2)), - $3, $1, nao); } - | var_op '{' IDENT '}' { $$ = list(var, intern(string($3)), - nil, $1, nao); } - | var_op '{' IDENT '}' elem { $$ = list(var, intern(string($3)), - $5, $1, nao); } - | IDENT error { $$ = nil; - yybadtoken(yychar, "variable spec"); } - | var_op error { $$ = nil; - yybadtoken(yychar, "variable spec"); } - ; - -var_op : '*' { $$ = t; } - ; - -list : '(' exprs ')' { $$ = $2; } - | '(' ')' { $$ = nil; } - | '(' error { $$ = nil; - yybadtoken(yychar, "list expression"); } - ; - -exprs : expr { $$ = cons($1, nil); } - | expr exprs { $$ = cons($1, $2); } - | expr '.' expr { $$ = cons($1, $3); } - ; - -expr : IDENT { $$ = intern(string($1)); } - | NUMBER { $$ = num($1); } - | list { $$ = $1; } - | regex { $$ = cons(regex_compile($1), $1); } - | chrlit { $$ = $1; } - | strlit { $$ = $1; } - ; - -regex : '/' regexpr '/' { $$ = $2; } - | '/' '/' { $$ = nil; } - | '/' error { $$ = nil; - yybadtoken(yychar, "regex"); } - ; - -regexpr : regbranch { $$ = $1; } - | regbranch '|' regbranch { $$ = list(list(or, $1, - $3, nao), nao); } - ; - -regbranch : regterm { $$ = cons($1, nil); } - | regterm regbranch { $$ = cons($1, $2); } - ; - -regterm : '[' regclass ']' { $$ = cons(set, $2); } - | '[' '^' regclass ']' { $$ = cons(cset, $3); } - | '.' { $$ = wild; } - | '^' { $$ = chr('^'); } - | ']' { $$ = chr(']'); } - | '-' { $$ = chr('-'); } - | regterm '*' { $$ = list(zeroplus, $1, nao); } - | regterm '+' { $$ = list(oneplus, $1, nao); } - | regterm '?' { $$ = list(optional, $1, nao); } - | REGCHAR { $$ = chr($1); } - | '(' regexpr ')' { $$ = cons(compound, $2); } - | '(' error { $$ = nil; - yybadtoken(yychar, "regex subexpression"); } - | '[' error { $$ = nil; - yybadtoken(yychar, "regex character class"); } - ; - -regclass : regclassterm { $$ = cons($1, nil); } - | regclassterm regclass { $$ = cons($1, $2); } - ; - -regclassterm : regrange { $$ = $1; } - | regchar { $$ = chr($1); } - ; - -regrange : regchar '-' regchar { $$ = cons(chr($1), chr($3)); } - -regchar : '?' { $$ = '?'; } - | '.' { $$ = '.'; } - | '*' { $$ = '*'; } - | '+' { $$ = '+'; } - | '(' { $$ = '('; } - | ')' { $$ = ')'; } - | '^' { $$ = '^'; } - | '|' { $$ = '|'; } - | REGCHAR { $$ = $1; } - ; - -newl : '\n' - | error '\n' { yyerror("newline expected after directive"); - yyerrok; } - ; - -strlit : '"' '"' { $$ = null_string; } - | '"' litchars '"' { - if ($2) { - obj_t *len = length($2), *iter, *ix; - $$ = mkustring(len); - for (iter = $2, ix = zero; - iter; - iter = cdr(iter), ix = plus(ix, one)) - { - chr_str_set($$, ix, car(iter)); - } - } else { - $$ = nil; - } - } - | '"' error { yybadtoken(yychar, "string literal"); } - ; - -chrlit : '\'' '\'' { yyerror("empty character literal"); - $$ = nil; } - | '\'' litchars '\'' { $$ = car($2); - if (cdr($2)) - yyerror("multiple characters in " - "character literal"); } - | '\'' error { yybadtoken(yychar, "character literal"); } - ; - -litchars : LITCHAR { $$ = cons(chr($1), nil); } - | LITCHAR litchars { $$ = cons(chr($1), $2); } - ; -%% - -obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts) -{ - obj_t *single_parts = nil; - obj_t *first_parts = nil; - obj_t *last_parts = nil; - obj_t *empty_parts = nil; - obj_t *iter; - - for (iter = parts; iter != nil; iter = cdr(iter)) { - obj_t *part = car(iter); - obj_t *sym = car(part); - obj_t *clauses = cdr(part); - - if (sym == single) - single_parts = nappend2(single_parts, clauses); - else if (sym == frst) - first_parts = nappend2(first_parts, clauses); - else if (sym == lst) - last_parts = nappend2(last_parts, clauses); - else if (sym == empty) - empty_parts = nappend2(empty_parts, clauses); - else - abort(); - } - - return list(sym, main, single_parts, first_parts, - last_parts, empty_parts, nao); -} - -obj_t *define_transform(obj_t *define_form) -{ - obj_t *sym = first(define_form); - obj_t *args = second(define_form); - - if (define_form == nil) - return nil; - - assert (sym == define); - - if (args == nil) { - yyerror("define requires arguments"); - return define_form; - } - - if (!consp(args) || !listp(cdr(args))) { - yyerror("bad define argument syntax"); - return define_form; - } else { - obj_t *name = first(args); - obj_t *params = second(args); - - if (!symbolp(name)) { - yyerror("function name must be a symbol"); - return define_form; - } - - if (!proper_listp(params)) { - yyerror("invalid function parameter list"); - return define_form; - } - - if (!all_satisfy(params, func_n1(symbolp), nil)) - yyerror("function parameters must be symbols"); - } - - return define_form; -} - -obj_t *get_spec(void) -{ - return parsed_spec; -} - -void dump_shell_string(const char *str) -{ - int ch; - - putchar('"'); - while ((ch = *str++) != 0) { - switch (ch) { - case '"': case '`': case '$': case '\\': case '\n': - putchar('\\'); - /* fallthrough */ - default: - putchar(ch); - } - } - putchar('"'); -} - -void dump_var(const char *name, char *pfx1, size_t len1, - char *pfx2, size_t len2, obj_t *value, int level) -{ - if (len1 >= 112 || len2 >= 112) - abort(); - - if (stringp(value) || chrp(value)) { - fputs(name, stdout); - fputs(pfx1, stdout); - fputs(pfx2, stdout); - putchar('='); - if (stringp(value)) { - dump_shell_string(c_str(value)); - } else { - char mini[2]; - mini[0] = c_chr(value); - mini[1] = 0; - dump_shell_string(mini); - } - putchar('\n'); - } else { - obj_t *iter; - int i; - size_t add1 = 0, add2 = 0; - - for (i = 0, iter = value; iter; iter = cdr(iter), i++) { - if (level < opt_arraydims) { - add2 = sprintf(pfx2 + len2, "[%d]", i); - add1 = 0; - } else { - add1 = sprintf(pfx1 + len1, "_%d", i); - add2 = 0; - } - - dump_var(name, pfx1, len1 + add1, pfx2, len2 + add2, car(iter), level + 1); - } - } -} - -void dump_bindings(obj_t *bindings) -{ - if (opt_loglevel >= 2) { - fputs("raw_bindings:\n", stderr); - dump(bindings, stderr); - } - - while (bindings) { - char pfx1[128], pfx2[128]; - obj_t *var = car(car(bindings)); - obj_t *value = cdr(car(bindings)); - const char *name = c_str(symbol_name(var)); - *pfx1 = 0; *pfx2 = 0; - dump_var(name, pfx1, 0, pfx2, 0, value, 0); - bindings = cdr(bindings); - } -} - -obj_t *depth(obj_t *obj) -{ - obj_t *dep = zero; - - if (obj == nil) - return one; - - if (atom(obj)) - return zero; - - while (obj) { - dep = max2(dep, depth(first(obj))); - obj = rest(obj); - } - - return plus(dep, one); -} - -obj_t *weird_merge(obj_t *left, obj_t *right) -{ - obj_t *left_depth = depth(left); - obj_t *right_depth = depth(right); - - while (lt(left_depth, right_depth) || zerop(left_depth)) { - left = cons(left, nil); - left_depth = plus(left_depth, one); - } - - while (lt(right_depth, left_depth) || zerop(right_depth)) { - right = cons(right, nil); - right_depth = plus(right_depth, one); - } - - return append2(left, right); -} - -obj_t *map_leaf_lists(obj_t *func, obj_t *list) -{ - if (atom(list)) - return list; - if (none_satisfy(list, func_n1(listp), nil)) - return funcall1(func, list); - return mapcar(bind2(func_n2(map_leaf_lists), func), list); -} - -obj_t *dest_bind(obj_t *bindings, obj_t *pattern, obj_t *value) -{ - if (nullp(pattern)) - return bindings; - - if (symbolp(pattern)) { - obj_t *existing = assoc(bindings, pattern); - if (existing) { - if (tree_find(value, cdr(existing))) - return bindings; - if (tree_find(cdr(existing), value)) - return bindings; - yyerrorf(2, "bind variable mismatch: %s", c_str(symbol_name(pattern))); - return t; - } - return cons(cons(pattern, value), bindings); - } - - if (consp(pattern)) { - obj_t *piter = pattern, *viter = value; - - while (consp(piter) && consp(viter)) - { - bindings = dest_bind(bindings, car(piter), car(viter)); - if (bindings == t) - return t; - piter = cdr(piter); - viter = cdr(viter); - } while (consp(piter) && consp(viter)); - - if (symbolp(piter)) { - bindings = dest_bind(bindings, piter, viter); - if (bindings == t) - return t; - } - } - - return bindings; -} - -obj_t *eval_form(obj_t *form, obj_t *bindings) -{ - if (symbolp(form)) - return assoc(bindings, form); - return cons(t, form); -} - -obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline, - obj_t *pos, obj_t *spec_lineno, obj_t *data_lineno, - obj_t *file) -{ -#define LOG_MISMATCH(KIND) \ - yyerrorlf(2, c_num(spec_lineno), \ - "%s mismatch, position %ld (%s:%ld)", (KIND), c_num(pos), \ - c_str(file), c_num(data_lineno)); \ - yyerrorlf(2, c_num(spec_lineno), " %s", c_str(dataline)); \ - if (c_num(pos) < 77) \ - yyerrorlf(2, c_num(spec_lineno), " %*s^", (int) c_num(pos), "") - -#define LOG_MATCH(KIND, EXTENT) \ - yyerrorlf(2, c_num(spec_lineno), \ - "%s matched, position %ld-%ld (%s:%ld)", (KIND), \ - c_num(pos), c_num(EXTENT), c_str(file), \ - c_num(data_lineno)); \ - yyerrorlf(2, c_num(spec_lineno), " %s", c_str(dataline)); \ - if (c_num(EXTENT) < 77) \ - yyerrorlf(2, c_num(spec_lineno), " %*s%-*s^", (int) c_num(pos), \ - "", (int) (c_num(EXTENT) - c_num(pos)), "^") - for (;;) { - obj_t *elem; - - if (specline == nil) - break; - - elem = first(specline); - - switch (elem ? elem->t.type : 0) { - case CONS: /* directive */ - { - obj_t *directive = first(elem); - - if (directive == var) { - obj_t *sym = second(elem); - obj_t *pat = third(elem); - obj_t *modifier = fourth(elem); - obj_t *pair = assoc(bindings, sym); /* var exists already? */ - - if (pair) { - /* If the variable already has a binding, we replace - it with its value, and treat it as a string match. - The spec looks like ((var ) ...) - and it must be transformed into - ( ...) */ - if (pat) { - specline = cons(cdr(pair), cons(pat, rest(specline))); - } else if (nump(modifier)) { - obj_t *past = plus(pos, modifier); - - if (c_num(past) > c_num(length_str(dataline)) || - c_num(past) < c_num(pos)) - { - LOG_MISMATCH("fixed field size"); - return nil; - } - - if (!tree_find(trim_str(sub_str(dataline, pos, past)), - cdr(pair))) - { - LOG_MISMATCH("fixed field contents"); - return nil; - } - - LOG_MATCH("fixed field", past); - pos = past; - specline = cdr(specline); - } else { - specline = cons(cdr(pair), rest(specline)); - } - continue; - } else if (pat == nil) { /* match to end of line or with regex */ - if (consp(modifier)) { - obj_t *past = match_regex(dataline, car(modifier), pos); - if (nullp(past)) { - LOG_MISMATCH("var positive regex"); - return nil; - } - LOG_MATCH("var positive regex", past); - bindings = acons_new(bindings, sym, sub_str(dataline, pos, past)); - pos = past; - } else if (nump(modifier)) { - obj_t *past = plus(pos, modifier); - if (c_num(past) > c_num(length_str(dataline)) || - c_num(past) < c_num(pos)) - { - LOG_MISMATCH("count based var"); - return nil; - } - LOG_MATCH("count based var", past); - bindings = acons_new(bindings, sym, trim_str(sub_str(dataline, pos, past))); - pos = past; - } else { - bindings = acons_new(bindings, sym, sub_str(dataline, pos, nil)); - pos = length_str(dataline); - } - } else if (pat->t.type == STR) { - obj_t *find = search_str(dataline, pat, pos, modifier); - if (!find) { - LOG_MISMATCH("var delimiting string"); - return nil; - } - LOG_MATCH("var delimiting string", find); - bindings = acons_new(bindings, sym, sub_str(dataline, pos, find)); - pos = plus(find, length_str(pat)); - } else if (consp(pat) && typeof(first(pat)) == regex) { - obj_t *find = search_regex(dataline, first(pat), pos, modifier); - obj_t *fpos = car(find); - obj_t *flen = cdr(find); - if (!find) { - LOG_MISMATCH("var delimiting regex"); - return nil; - } - LOG_MATCH("var delimiting regex", fpos); - bindings = acons_new(bindings, sym, sub_str(dataline, pos, fpos)); - pos = plus(fpos, flen); - } else if (consp(pat) && first(pat) == var) { - /* Unbound var followed by var: the following one must be bound. */ - obj_t *second_sym = second(pat); - obj_t *next_pat = third(pat); - obj_t *pair = assoc(bindings, second_sym); /* var exists already? */ - - if (!pair) { - yyerrorlf(1, c_num(spec_lineno), "consecutive unbound variables"); - return nil; - } - - /* Re-generate a new spec with an edited version of - the element we just processed, and repeat. */ - { - obj_t *new_elem = list(var, sym, cdr(pair), modifier, nao); - - if (next_pat) - specline = cons(new_elem, cons(next_pat, rest(specline))); - else - specline = cons(new_elem, rest(specline)); - } - - continue; - } else if (consp(pat) && (consp(first(pat)) || stringp(first(pat)))) { - cons_bind (find, len, search_str(dataline, pat, pos, modifier)); - if (!find) { - LOG_MISMATCH("string"); - return nil; - } - bindings = acons_new(bindings, sym, sub_str(dataline, pos, find)); - pos = plus(find, len); - } else { - yyerrorlf(0, c_num(spec_lineno), "variable followed by invalid element"); - return nil; - } - } else if (typeof(directive) == regex) { - obj_t *past = match_regex(dataline, directive, pos); - if (nullp(past)) { - LOG_MISMATCH("regex"); - return nil; - } - LOG_MATCH("regex", past); - pos = past; - } else if (directive == coll) { - obj_t *coll_specline = second(elem); - obj_t *until_specline = third(elem); - obj_t *bindings_coll = nil; - obj_t *iter; - - for (;;) { - cons_bind (new_bindings, new_pos, - match_line(bindings, coll_specline, dataline, pos, - spec_lineno, data_lineno, file)); - - if (until_specline) { - cons_bind (until_bindings, until_pos, - match_line(bindings, until_specline, dataline, pos, - spec_lineno, data_lineno, file)); - - if (until_pos) { - (void) until_bindings; - LOG_MATCH("until", until_pos); - break; - } else { - LOG_MISMATCH("until"); - } - } - - if (new_pos) { - LOG_MATCH("coll", new_pos); - - for (iter = new_bindings; iter && iter != bindings; - iter = cdr(iter)) - { - obj_t *binding = car(iter); - obj_t *existing = assoc(bindings_coll, car(binding)); - - bindings_coll = acons_new(bindings_coll, car(binding), - cons(cdr(binding), cdr(existing))); - } - } - - if (new_pos && !equal(new_pos, pos)) { - pos = new_pos; - assert (c_num(pos) <= c_num(length_str(dataline))); - } else { - pos = plus(pos, one); - } - - if (c_num(pos) >= c_num(length_str(dataline))) - break; - } - - - if (!bindings_coll) - yyerrorlf(2, c_num(spec_lineno), "nothing was collected"); - - for (iter = bindings_coll; iter; iter = cdr(iter)) { - obj_t *pair = car(iter); - obj_t *rev = cons(car(pair), nreverse(cdr(pair))); - bindings = cons(rev, bindings); - } - } else if (consp(directive) || stringp(directive)) { - cons_bind (find, len, search_str_tree(dataline, elem, pos, nil)); - obj_t *newpos; - - if (find == nil || !equal(find, pos)) { - LOG_MISMATCH("string tree"); - return nil; - } - - newpos = plus(find, len); - LOG_MATCH("string tree", newpos); - pos = newpos; - } else { - yyerrorlf(0, c_num(spec_lineno), "unknown directive: %s", - c_str(symbol_name(directive))); - } - } - break; - case STR: - { - obj_t *find = search_str(dataline, elem, pos, nil); - obj_t *newpos; - if (find == nil || !equal(find, pos)) { - LOG_MISMATCH("string"); - return nil; - } - newpos = plus(find, length_str(elem)); - LOG_MATCH("string", newpos); - pos = newpos; - break; - } - default: - yyerrorlf(0, c_num(spec_lineno), "unsupported object in spec"); - } - - specline = cdr(specline); - } - - return cons(bindings, pos); -} - -obj_t *format_field(obj_t *string_or_list, obj_t *spec) -{ - if (!stringp(string_or_list)) - return string_or_list; - - { - obj_t *right = lt(spec, zero); - obj_t *width = if3(lt(spec, zero), neg(spec), spec); - obj_t *diff = minus(width, length_str(string_or_list)); - - if (le(diff, zero)) - return string_or_list; - - if (ge(length_str(string_or_list), width)) - return string_or_list; - - { - obj_t *padding = mkstring(diff, chr(' ')); - - return if3(right, - cat_str(list(padding, string_or_list, nao), nil), - cat_str(list(string_or_list, padding, nao), nil)); - } - } -} - -obj_t *subst_vars(obj_t *spec, obj_t *bindings) -{ - list_collect_decl(out, iter); - - while (spec) { - obj_t *elem = first(spec); - - if (consp(elem) && first(elem) == var) { - obj_t *sym = second(elem); - obj_t *pat = third(elem); - obj_t *modifier = fourth(elem); - obj_t *pair = assoc(bindings, sym); - - if (pair) { - if (pat) - spec = cons(cdr(pair), cons(pat, rest(spec))); - else if (nump(modifier)) - spec = cons(format_field(cdr(pair), modifier), rest(spec)); - else - spec = cons(cdr(pair), rest(spec)); - continue; - } - } - - list_collect(iter, elem); - spec = cdr(spec); - } - - return out; -} - -typedef struct fpip { - FILE *f; - DIR *d; - enum { fpip_fclose, fpip_pclose, fpip_closedir } close; -} fpip_t; - -fpip_t complex_open(obj_t *name, obj_t *output) -{ - fpip_t ret = { 0 }; - - const char *namestr = c_str(name); - long len = c_num(length_str(name)); - - if (len == 0) - return ret; - - if (!strcmp(namestr, "-")) { - ret.close = fpip_fclose; - ret.f = output ? stdout : stdin; - output_produced = output ? 1 : 0; - } else if (namestr[0] == '!') { - ret.close = fpip_pclose; - ret.f = popen(namestr+1, output ? "w" : "r"); - } else if (namestr[0] == '$') { - if (output) - return ret; - ret.close = fpip_closedir; - ret.d = opendir(namestr+1); - } else { - ret.close = fpip_fclose; - ret.f = fopen(namestr, output ? "w" : "r"); - } - - return ret; -} - -int complex_open_failed(fpip_t fp) -{ - return fp.f == 0 && fp.d == 0; -} - -void complex_close(fpip_t fp) -{ - if (fp.f == 0) - return; - switch (fp.close) { - case fpip_fclose: - if (fp.f != stdin && fp.f != stdout) - fclose(fp.f); - return; - case fpip_pclose: - pclose(fp.f); - return; - case fpip_closedir: - closedir(fp.d); - return; - } - - abort(); -} - -obj_t *complex_snarf(fpip_t fp, obj_t *name) -{ - switch (fp.close) { - case fpip_fclose: - return lazy_stream_cons(stdio_line_stream(fp.f, name)); - case fpip_pclose: - return lazy_stream_cons(pipe_line_stream(fp.f, name)); - case fpip_closedir: - return lazy_stream_cons(dirent_stream(fp.d, name)); - } - - abort(); -} - -obj_t *robust_length(obj_t *obj) -{ - if (obj == nil) - return zero; - if (atom(obj)) - return negone; - return length(obj); -} - -obj_t *bind_car(obj_t *bind_cons) -{ - return if3(consp(cdr(bind_cons)), - cons(car(bind_cons), car(cdr(bind_cons))), - bind_cons); -} - -obj_t *bind_cdr(obj_t *bind_cons) -{ - return if3(consp(cdr(bind_cons)), - cons(car(bind_cons), cdr(cdr(bind_cons))), - bind_cons); -} - -obj_t *extract_vars(obj_t *output_spec) -{ - list_collect_decl (vars, tai); - - if (consp(output_spec)) { - if (first(output_spec) == var) { - list_collect (tai, second(output_spec)); - } else { - for (; output_spec; output_spec = cdr(output_spec)) - list_collect_nconc(tai, extract_vars(car(output_spec))); - } - } - - return vars; -} - -obj_t *extract_bindings(obj_t *bindings, obj_t *output_spec) -{ - list_collect_decl (bindings_out, tail); - obj_t *var_list = extract_vars(output_spec); - - for (; bindings; bindings = cdr(bindings)) - if (memq(car(car(bindings)), var_list)) - list_collect(tail, car(bindings)); - - return bindings_out; -} - -void do_output_line(obj_t *bindings, obj_t *specline, - obj_t *spec_lineno, FILE *out) -{ - for (; specline; specline = rest(specline)) { - obj_t *elem = first(specline); - - switch (elem ? elem->t.type : 0) { - case CONS: - { - obj_t *directive = first(elem); - - if (directive == var) { - obj_t *str = cat_str(subst_vars(cons(elem, nil), bindings), nil); - if (str == nil) { - yyerrorlf(1, c_num(spec_lineno), "bad substitution: %s", - c_str(symbol_name(second(elem)))); - continue; - } - fputs(c_str(str), out); - } else if (directive == rep) { - obj_t *main_clauses = second(elem); - obj_t *single_clauses = third(elem); - obj_t *first_clauses = fourth(elem); - obj_t *last_clauses = fifth(elem); - obj_t *empty_clauses = sixth(elem); - obj_t *bind_cp = extract_bindings(bindings, elem); - obj_t *max_depth = reduce_left(func_n2(max2), - bind_cp, zero, - chain(list(func_n1(cdr), - func_n1(robust_length), - nao))); - - if (equal(max_depth, zero) && empty_clauses) { - do_output_line(bindings, empty_clauses, spec_lineno, out); - } else if (equal(max_depth, one) && single_clauses) { - obj_t *bind_a = mapcar(func_n1(bind_car), bind_cp); - do_output_line(bind_a, single_clauses, spec_lineno, out); - } else if (!zerop(max_depth)) { - long i; - - for (i = 0; i < c_num(max_depth); i++) { - obj_t *bind_a = mapcar(func_n1(bind_car), bind_cp); - obj_t *bind_d = mapcar(func_n1(bind_cdr), bind_cp); - - if (i == 0 && first_clauses) { - do_output_line(bind_a, first_clauses, spec_lineno, out); - } else if (i == c_num(max_depth) - 1 && last_clauses) { - do_output_line(bind_a, last_clauses, spec_lineno, out); - } else { - do_output_line(bind_a, main_clauses, spec_lineno, out); - } - - bind_cp = bind_d; - } - } - - } else { - yyerrorlf(0, c_num(spec_lineno), "unknown directive: %s", - c_str(symbol_name(directive))); - } - } - break; - case STR: - fputs(c_str(elem), out); - break; - case 0: - break; - default: - yyerrorlf(0, c_num(spec_lineno), "unsupported object in output spec"); - } - } -} - -void do_output(obj_t *bindings, obj_t *specs, FILE *out) -{ - if (equal(specs, null_list)) - return; - - for (; specs; specs = cdr(specs)) { - cons_bind (spec_lineno, specline, first(specs)); - obj_t *first_elem = first(specline); - - if (consp(first_elem)) { - obj_t *sym = first(first_elem); - - if (sym == repeat) { - obj_t *main_clauses = second(first_elem); - obj_t *single_clauses = third(first_elem); - obj_t *first_clauses = fourth(first_elem); - obj_t *last_clauses = fifth(first_elem); - obj_t *empty_clauses = sixth(first_elem); - obj_t *bind_cp = extract_bindings(bindings, first_elem); - obj_t *max_depth = reduce_left(func_n2(max2), - bind_cp, zero, - chain(list(func_n1(cdr), - func_n1(robust_length), - nao))); - - if (equal(max_depth, zero) && empty_clauses) { - do_output(bind_cp, empty_clauses, out); - } else if (equal(max_depth, one) && single_clauses) { - obj_t *bind_a = mapcar(func_n1(bind_car), bind_cp); - do_output(bind_a, single_clauses, out); - } else if (!zerop(max_depth)) { - long i; - - for (i = 0; i < c_num(max_depth); i++) { - obj_t *bind_a = mapcar(func_n1(bind_car), bind_cp); - obj_t *bind_d = mapcar(func_n1(bind_cdr), bind_cp); - - if (i == 0 && first_clauses) { - do_output(bind_a, first_clauses, out); - } else if (i == c_num(max_depth) - 1 && last_clauses) { - do_output(bind_a, last_clauses, out); - } else { - do_output(bind_a, main_clauses, out); - } - - bind_cp = bind_d; - } - } - continue; - } - } - - do_output_line(bindings, specline, spec_lineno, out); - putc('\n', out); - } -} - -obj_t *match_files(obj_t *spec, obj_t *files, - obj_t *bindings, obj_t *first_file_parsed, - obj_t *data_linenum) -{ - obj_t *data = nil; - long data_lineno = 0; - - if (listp(first_file_parsed)) { - data = first_file_parsed; - data_lineno = c_num(data_linenum); - first_file_parsed = nil; - } else if (files) { - obj_t *name = first(files); - fpip_t fp = (errno = 0, complex_open(name, nil)); - - yyerrorf(2, "opening data source %s", c_str(name)); - - if (complex_open_failed(fp)) { - if (errno != 0) - yyerrorf(2, "could not open %s: %s", c_str(name), strerror(errno)); - else - yyerrorf(2, "could not open %s", c_str(name)); - return nil; - } - - if ((data = complex_snarf(fp, name)) != nil) - data_lineno = 1; - } - - for (; spec; spec = rest(spec), data = rest(data), data_lineno++) -repeat_spec_same_data: - { - obj_t *specline = rest(first(spec)); - obj_t *dataline = first(data); - obj_t *spec_linenum = first(first(spec)); - obj_t *first_spec = first(specline); - long spec_lineno = spec_linenum ? c_num(spec_linenum) : 0; - - if (consp(first_spec)) { - obj_t *sym = first(first_spec); - - if (sym == skip) { - obj_t *max = first(rest(first_spec)); - long cmax = nump(max) ? c_num(max) : 0; - long reps = 0; - - if (rest(specline)) - yyerrorlf(1, spec_lineno, "unexpected material after skip directive"); - - if ((spec = rest(spec)) == nil) - break; - - { - uw_block_begin(nil, result); - - while (dataline && (!max || reps++ < cmax)) { - cons_bind (new_bindings, success, - match_files(spec, files, bindings, - data, num(data_lineno))); - - if (success) { - yyerrorlf(2, spec_lineno, "skip matched %s:%ld", - c_str(first(files)), data_lineno); - result = cons(new_bindings, cons(data, num(data_lineno))); - break; - } - - yyerrorlf(2, spec_lineno, "skip didn't match %s:%ld", - c_str(first(files)), data_lineno); - data = rest(data); - data_lineno++; - dataline = first(data); - } - - uw_block_end; - - if (result) - return result; - } - - yyerrorlf(2, spec_lineno, "skip failed"); - return nil; - } else if (sym == trailer) { - if (rest(specline)) - yyerrorlf(1, spec_lineno, "unexpected material after trailer directive"); - if ((spec = rest(spec)) == nil) - break; - - { - cons_bind (new_bindings, success, - match_files(spec, files, bindings, - data, num(data_lineno))); - - if (success) - return cons(new_bindings, cons(data, num(data_lineno))); - return nil; - } - } else if (sym == block) { - obj_t *name = first(rest(first_spec)); - if (rest(specline)) - yyerrorlf(1, spec_lineno, "unexpected material after block directive"); - if ((spec = rest(spec)) == nil) - break; - uw_block_begin(name, result); - result = match_files(spec, files, bindings, data, num(data_lineno)); - uw_block_end; - return result; - } else if (sym == fail || sym == accept) { - obj_t *target = first(rest(first_spec)); - - if (rest(specline)) - yyerrorlf(1, spec_lineno, "unexpected material after %s", - c_str(symbol_name(sym))); - - uw_block_return(target, - if2(sym == accept, - cons(bindings, - if3(data, cons(data, num(data_lineno)), t)))); - if (target) - yyerrorlf(1, spec_lineno, "%s: no block named %s in scope", - c_str(symbol_name(sym)), c_str(symbol_name(target))); - else - yyerrorlf(1, spec_lineno, "%s: not anonymous block in scope", - c_str(symbol_name(sym))); - - return nil; - } else if (sym == next) { - if (rest(first_spec)) - yyerrorlf(0, spec_lineno, "next takes no args"); - - if ((spec = rest(spec)) == nil) - break; - - if (rest(specline)) { - obj_t *sub = subst_vars(rest(specline), bindings); - obj_t *str = cat_str(sub, nil); - if (str == nil) { - yyerrorlf(2, spec_lineno, "bad substitution in next file spec"); - continue; - } - files = cons(str, files); - } else { - files = rest(files); - } - - /* We recursively process the file list, but the new - data position we return to the caller must be in the - original file we we were called with. Hence, we can't - make a straight tail call here. */ - { - cons_bind (new_bindings, success, - match_files(spec, files, bindings, t, nil)); - if (success) - return cons(new_bindings, - if3(data, cons(data, num(data_lineno)), t)); - return nil; - } - } else if (sym == some || sym == all || sym == none || sym == maybe || - sym == cases) - { - obj_t *specs; - obj_t *all_match = t; - obj_t *some_match = nil; - obj_t *max_line = zero; - obj_t *max_data = nil; - - for (specs = rest(first_spec); specs != nil; specs = rest(specs)) - { - obj_t *nested_spec = first(specs); - obj_t *data_linenum = num(data_lineno); - - cons_bind (new_bindings, success, - match_files(nested_spec, files, bindings, - data, data_linenum)); - - if (success) { - bindings = new_bindings; - some_match = t; - - if (success == t) { - max_data = t; - } else if (consp(success) && max_data != t) { - cons_bind (new_data, new_line, success); - if (gt(new_line, max_line)) { - max_line = new_line; - max_data = new_data; - } - } - if (sym == cases) - break; - } else { - all_match = nil; - } - } - - if (sym == all && !all_match) { - yyerrorlf(2, spec_lineno, "all: some clauses didn't match"); - return nil; - } - - if ((sym == some || sym == cases) && !some_match) { - yyerrorlf(2, spec_lineno, "some/cases: no clauses matched"); - return nil; - } - - if (sym == none && some_match) { - yyerrorlf(2, spec_lineno, "none: some clauses matched"); - return nil; - } - - /* No check for maybe, since it always succeeds. */ - - if (consp(max_data)) { - data_lineno = c_num(max_line); - data = max_data; - } else if (max_data == t) { - data = nil; - } - - if ((spec = rest(spec)) == nil) - break; - - goto repeat_spec_same_data; - } else if (sym == collect) { - obj_t *coll_spec = second(first_spec); - obj_t *until_spec = third(first_spec); - obj_t *bindings_coll = nil; - obj_t *iter; - - uw_block_begin(nil, result); - - result = t; - - while (data) { - cons_bind (new_bindings, success, - match_files(coll_spec, files, bindings, - data, num(data_lineno))); - - /* Until clause sees un-collated bindings from collect. */ - if (until_spec) - { - cons_bind (discarded_bindings, success, - match_files(until_spec, files, new_bindings, - data, num(data_lineno))); - - if (success) { - (void) discarded_bindings; - break; - } - } - - if (success) { - yyerrorlf(2, spec_lineno, "collect matched %s:%ld", - c_str(first(files)), data_lineno); - - for (iter = new_bindings; iter && iter != bindings; - iter = cdr(iter)) - { - obj_t *binding = car(iter); - obj_t *existing = assoc(bindings_coll, car(binding)); - - bindings_coll = acons_new(bindings_coll, car(binding), - cons(cdr(binding), cdr(existing))); - } - } - - if (success) { - if (consp(success)) { - yyerrorlf(2, spec_lineno, - "collect advancing from line %ld to %ld", - data_lineno, c_num(cdr(success))); - data = car(success); - data_lineno = c_num(cdr(success)); - } else { - yyerrorlf(2, spec_lineno, "collect consumed entire file"); - data = nil; - break; - } - } else { - data = rest(data); - data_lineno++; - } - } - - uw_block_end; - - if (!result) { - yyerrorlf(2, spec_lineno, "collect explicitly failed"); - return nil; - } - - if (!bindings_coll) - yyerrorlf(2, spec_lineno, "nothing was collected"); - - for (iter = bindings_coll; iter; iter = cdr(iter)) { - obj_t *pair = car(iter); - obj_t *rev = cons(car(pair), nreverse(cdr(pair))); - bindings = cons(rev, bindings); - } - - if ((spec = rest(spec)) == nil) - break; - - goto repeat_spec_same_data; - } else if (sym == flattn) { - obj_t *iter; - - for (iter = rest(first_spec); iter; iter = rest(iter)) { - obj_t *sym = first(iter); - - if (!symbolp(sym)) { - yyerrorlf(1, spec_lineno, "non-symbol in flatten directive"); - continue; - } else { - obj_t *existing = assoc(bindings, sym); - - if (existing) - *cdr_l(existing) = flatten(cdr(existing)); - } - } - - if ((spec = rest(spec)) == nil) - break; - - goto repeat_spec_same_data; - } else if (sym == forget) { - bindings = alist_remove(bindings, rest(first_spec)); - - if ((spec = rest(spec)) == nil) - break; - - goto repeat_spec_same_data; - } else if (sym == mrge) { - obj_t *target = first(rest(first_spec)); - obj_t *args = rest(rest(first_spec)); - obj_t *exists = assoc(bindings, target); - obj_t *merged = nil; - - if (!target || !symbolp(target)) - yyerrorlf(1, spec_lineno, "bad merge directive"); - - if (exists) - yyerrorlf(1, spec_lineno, "merge: symbol %s already bound", - c_str(symbol_name(target))); - - for (; args; args = rest(args)) { - obj_t *other_sym = first(args); - - if (other_sym) { - obj_t *other_lookup = assoc(bindings, other_sym); - - if (!symbolp(other_sym)) - yyerrorlf(1, spec_lineno, "non-symbol in merge directive"); - else if (!other_lookup) - yyerrorlf(1, spec_lineno, "merge: nonexistent symbol %s", - c_str(symbol_name(sym))); - - if (merged) - merged = weird_merge(merged, cdr(other_lookup)); - else - merged = cdr(other_lookup); - } - } - - bindings = acons_new(bindings, target, merged); - - if ((spec = rest(spec)) == nil) - break; - - goto repeat_spec_same_data; - } else if (sym == bind) { - obj_t *args = rest(first_spec); - obj_t *pattern = first(args); - obj_t *form = second(args); - obj_t *val = eval_form(form, bindings); - - if (!val) - yyerrorlf(1, spec_lineno, "bind: unbound variable on right side"); - - bindings = dest_bind(bindings, pattern, cdr(val)); - - if (bindings == t) - return nil; - - if ((spec = rest(spec)) == nil) - break; - - goto repeat_spec_same_data; - } else if (sym == cat) { - obj_t *iter; - - for (iter = rest(first_spec); iter; iter = rest(iter)) { - obj_t *sym = first(iter); - - if (!symbolp(sym)) { - yyerrorlf(1, spec_lineno, "non-symbol in cat directive"); - continue; - } else { - obj_t *existing = assoc(bindings, sym); - obj_t *sep = nil; - - if (rest(specline)) { - obj_t *sub = subst_vars(rest(specline), bindings); - sep = cat_str(sub, nil); - } - - if (existing) - *cdr_l(existing) = cat_str(flatten(cdr(existing)), sep); - } - } - - if ((spec = rest(spec)) == nil) - break; - - goto repeat_spec_same_data; - } else if (sym == output) { - obj_t *specs = second(first_spec); - obj_t *dest_opt = third(first_spec); - obj_t *dest = dest_opt ? cat_str(subst_vars(dest_opt, bindings), nil) - : string(chk_strdup("-")); - fpip_t fp = (errno = 0, complex_open(dest, t)); - - yyerrorf(2, "opening data sink %s", c_str(dest)); - - if (complex_open_failed(fp)) { - if (errno != 0) - yyerrorf(2, "could not open %s: %s", c_str(dest), strerror(errno)); - else - yyerrorf(2, "could not open %s", c_str(dest)); - } else { - do_output(bindings, specs, fp.f); - complex_close(fp); - } - - if ((spec = rest(spec)) == nil) - break; - - goto repeat_spec_same_data; - } else if (sym == define) { - obj_t *args = second(first_spec); - obj_t *body = third(first_spec); - obj_t *name = first(args); - obj_t *params = second(args); - - if (rest(specline)) - yyerrorlf(1, spec_lineno, "unexpected material after define"); - - uw_set_func(name, cons(params, body)); - - if ((spec = rest(spec)) == nil) - break; - - goto repeat_spec_same_data; - } else { - obj_t *func = uw_get_func(sym); - - if (func) { - obj_t *args = rest(first_spec); - obj_t *params = car(func); - obj_t *body = cdr(func); - obj_t *piter, *aiter; - obj_t *bindings_cp = copy_alist(bindings); - - if (!equal(length(args), length(params))) { - yyerrorlf(1, spec_lineno, "function %s takes %ld argument(s)", - c_str(sym), c_num(length(params))); - return nil; - } - - for (piter = params, aiter = args; piter; - piter = cdr(piter), aiter = cdr(aiter)) - { - obj_t *param = car(piter); - obj_t *arg = car(aiter); - - if (symbolp(arg)) { - obj_t *existing = assoc(bindings, arg); - if (existing) { - bindings_cp = acons_new(bindings_cp, - param, - cdr(existing)); - } else { - bindings_cp = alist_remove(bindings_cp, cons(param, nil)); - } - } else { - bindings_cp = acons_new(bindings_cp, param, arg); - } - } - - { - uw_block_begin(nil, result); - uw_env_begin; - result = match_files(body, files, bindings_cp, - data, num(data_lineno)); - uw_env_end; - uw_block_end; - - if (!result) { - yyerrorlf(2, spec_lineno, "function failed"); - return nil; - } - - { - cons_bind (new_bindings, success, result); - - for (piter = params, aiter = args; piter; - piter = cdr(piter), aiter = cdr(aiter)) - { - obj_t *param = car(piter); - obj_t *arg = car(aiter); - - if (symbolp(arg)) { - obj_t *newbind = assoc(new_bindings, param); - if (newbind) { - bindings = dest_bind(bindings, arg, cdr(newbind)); - if (bindings == t) - return nil; - } - } - } - - if (consp(success)) { - yyerrorlf(2, spec_lineno, - "function matched; advancing from line %ld to %ld", - data_lineno, c_num(cdr(success))); - data = car(success); - data_lineno = c_num(cdr(success)); - } else { - yyerrorlf(2, spec_lineno, "function consumed entire file"); - data = nil; - } - } - } - - if ((spec = rest(spec)) == nil) - break; - - goto repeat_spec_same_data; - } - } - } - - if (dataline == nil) - return nil; - - { - cons_bind (new_bindings, success, - match_line(bindings, specline, dataline, zero, - spec_linenum, num(data_lineno), first(files))); - - if (nump(success) && c_num(success) < c_num(length_str(dataline))) { - yyerrorf(2, "spec only matches line to position %ld: %s", - c_num(success), c_str(dataline)); - return nil; - } - - if (!success) - return nil; - - bindings = new_bindings; - } - } - - return cons(bindings, if3(data, cons(data, num(data_lineno)), t)); -} - -int extract(obj_t *spec, obj_t *files, obj_t *predefined_bindings) -{ - cons_bind (bindings, success, match_files(spec, files, predefined_bindings, - t, nil)); - - if (!output_produced) { - if (!opt_nobindings) { - if (bindings) { - bindings = nreverse(bindings); - dump_bindings(bindings); - } - } - - if (!success) - puts("false"); - } - - return success ? 0 : EXIT_FAILURE; -} diff --git a/gc.c b/gc.c index 6bd28b16..a9c74c6f 100644 --- a/gc.c +++ b/gc.c @@ -31,6 +31,8 @@ #include #include #include "lib.h" +#include "stream.h" +#include "txr.h" #include "gc.h" #define PROT_STACK_SIZE 1024 @@ -102,13 +104,15 @@ static void more() heap_t *heap = (heap_t *) chk_malloc(sizeof *heap); obj_t *block = heap->block, *end = heap->block + HEAP_SIZE; + assert (free_list == 0); + while (block < end) { block->t.next = free_list; block->t.type = FREE; free_list = block++; } - free_tail = &block[-1].t.next; + free_tail = &heap->block[0].t.next; heap->next = heap_list; heap_list = heap; @@ -161,13 +165,11 @@ static void finalize(obj_t *obj) obj->v.vec = 0; } break; - case STREAM: - stream_close(obj); - break; case LCONS: break; case COBJ: - obj->co.ops->destroy(obj); + if (obj->co.ops->destroy) + obj->co.ops->destroy(obj); break; default: assert (0 && "corrupt type field"); @@ -224,9 +226,6 @@ static void mark_obj(obj_t *obj) mark_obj(obj->v.vec[i]); } break; - case STREAM: - mark_obj(obj->sm.label_pushback); - break; case LCONS: mark_obj(obj->lc.car); mark_obj(obj->lc.cdr); @@ -234,6 +233,8 @@ static void mark_obj(obj_t *obj) break; case COBJ: mark_obj(obj->co.cls); + if (obj->co.ops->mark) + obj->co.ops->mark(obj); break; default: assert (0 && "corrupt type field"); @@ -253,22 +254,22 @@ static int in_heap(obj_t *ptr) return 0; } -static void mark_mem_region(obj_t **bottom, obj_t **top) +static void mark_mem_region(obj_t **low, obj_t **high) { - if (bottom > top) { - obj_t **tmp = top; - top = bottom; - bottom = tmp; + if (low > high) { + obj_t **tmp = high; + high = low; + low = tmp; } - while (bottom < top) { - obj_t *maybe_obj = *bottom; + while (low < high) { + obj_t *maybe_obj = *low; if (in_heap(maybe_obj)) { type_t t = maybe_obj->t.type; if ((t & FREE) == 0) mark_obj(maybe_obj); } - bottom++; + low++; } } @@ -300,6 +301,9 @@ static void sweep(void) block < end; block++) { + if ((block->t.type & (REACHABLE | FREE)) == (REACHABLE | FREE)) + abort(); + if (block->t.type & REACHABLE) { block->t.type &= ~REACHABLE; continue; @@ -310,7 +314,7 @@ static void sweep(void) if (0 && dbg) { fprintf(stderr, "%s: finalizing: ", progname); - obj_print(block, stderr); + obj_print(block, std_error); putc('\n', stderr); } finalize(block); @@ -356,6 +360,11 @@ void gc_init(obj_t **stack_bottom) gc_stack_bottom = stack_bottom; } +void gc_mark(obj_t *obj) +{ + mark_obj(obj); +} + /* * Useful functions for gdb'ing. */ diff --git a/gc.h b/gc.h index 4fe8033f..664b079c 100644 --- a/gc.h +++ b/gc.h @@ -23,9 +23,6 @@ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ - -extern int opt_gc_debug; - void gc_init(obj_t **stack_bottom); obj_t *prot1(obj_t **loc); void rel1(obj_t **loc); @@ -34,3 +31,4 @@ void release(obj_t **, ...); obj_t *make_obj(void); void gc(void); int gc_state(int); +void gc_mark(obj_t *); diff --git a/lib.c b/lib.c index bce4d088..afad3fe2 100644 --- a/lib.c +++ b/lib.c @@ -36,6 +36,7 @@ #include "lib.h" #include "gc.h" #include "unwind.h" +#include "stream.h" #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) @@ -43,12 +44,15 @@ obj_t *interned_syms; obj_t *null, *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t; -obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus; -obj_t *zeroplus, *optional, *compound, *or; +obj_t *stream_t, *lcons_t, *cobj_t, *var, *regex, *set, *cset, *wild, *oneplus; +obj_t *zeroplus, *optional, *compound, *or, *quasi; obj_t *skip, *trailer, *block, *next, *fail, *accept; obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll; obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep; -obj_t *flattn, *forget, *mrge, *bind, *cat, *dir; +obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *dir; +obj_t *try, *catch, *finally, *nothrow; +obj_t *error, *type_error, *internal_err, *numeric_err, *range_err; +obj_t *query_error, *file_error; obj_t *zero, *one, *two, *negone, *maxint, *minint; obj_t *null_string; @@ -59,6 +63,8 @@ obj_t *identity_f; obj_t *equal_f; const char *progname; +obj_t *prog_string; + void *(*oom_realloc)(void *, size_t); @@ -75,11 +81,9 @@ static obj_t *identity_tramp(obj_t *env, obj_t *obj) static obj_t *equal_tramp(obj_t *env, obj_t *, obj_t *); -obj_t *typeof(obj_t *obj) +static obj_t *code2type(int code) { - if (obj == nil) - return null; - switch (obj->t.type) { + switch (code) { case CONS: return cons_t; case STR: return str_t; case CHR: return chr_t; @@ -87,11 +91,39 @@ obj_t *typeof(obj_t *obj) case SYM: return sym_t; case FUN: return fun_t; case VEC: return vec_t; - case STREAM: return stream_t; case LCONS: return lcons_t; - case COBJ: return obj->co.cls; + case COBJ: return cobj_t; } - assert (0 && "corrupt type field"); + return nil; +} + +obj_t *typeof(obj_t *obj) +{ + if (obj == nil) { + return null; + } else if (obj->t.type == COBJ) { + return obj->co.cls; + } else { + obj_t *type = code2type(obj->t.type); + if (!type) + internal_error("corrupt type field"); + return type; + } +} + +obj_t *type_check(obj_t *obj, int type) +{ + if (!obj || obj->t.type != type) + type_mismatch("~s is not of type ~s", obj, code2type(type), nao); + return t; +} + +obj_t *type_check2(obj_t *obj, int t1, int t2) +{ + if (!obj || (obj->t.type != t1 && obj->t.type != t2)) + type_mismatch("~s is not of type ~s or ~s", obj, + code2type(t1), code2type(t2), nao); + return t; } obj_t *car(obj_t *cons) @@ -110,7 +142,7 @@ obj_t *car(obj_t *cons) return cons->lc.car; } default: - assert (0 && "corrupt type field"); + type_mismatch("~s is not a cons", cons, nao); } } @@ -130,7 +162,7 @@ obj_t *cdr(obj_t *cons) return cons->lc.cdr; } default: - assert (0 && "corrupt type field"); + type_mismatch("~s is not a cons", cons, nao); } } @@ -143,7 +175,7 @@ obj_t **car_l(obj_t *cons) funcall1(cons->lc.func, cons); return &cons->lc.car; default: - assert (0 && "corrupt type field"); + type_mismatch("~s is not a cons", cons, nao); } } @@ -156,7 +188,7 @@ obj_t **cdr_l(obj_t *cons) funcall1(cons->lc.func, cons); return &cons->lc.cdr; default: - assert (0 && "corrupt type field"); + type_mismatch("~s is not a cons", cons, nao); } } @@ -202,6 +234,18 @@ obj_t **tail(obj_t *cons) return cdr_l(cons); } +obj_t *pop(obj_t **plist) +{ + obj_t *ret = car(*plist); + *plist = cdr(*plist); + return ret; +} + +obj_t *push(obj_t *val, obj_t **plist) +{ + return *plist = cons(val, *plist); +} + obj_t *copy_list(obj_t *list) { list_collect_decl (out, tail); @@ -407,16 +451,13 @@ obj_t *equal(obj_t *left, obj_t *right) return t; } return nil; - case STREAM: - return nil; /* Different stream objects never equal. */ case COBJ: if (right->t.type == COBJ) return left->co.ops->equal(left, right); return nil; } - assert (0 && "notreached"); - return nil; + internal_error("unhandled case in equal function"); } static obj_t *equal_tramp(obj_t *env, obj_t *left, obj_t *right) @@ -473,7 +514,7 @@ obj_t *list(obj_t *first, ...) do { *ptr++ = next; if (ptr == array + 32) - abort(); + internal_error("runaway arguments in list function"); next = va_arg(vl, obj_t *); } while (next != nao); @@ -536,7 +577,7 @@ obj_t *num(long val) long c_num(obj_t *num) { - assert (num && num->t.type == NUM); + type_check(num, NUM); return num->n.val; } @@ -550,8 +591,8 @@ obj_t *plus(obj_t *anum, obj_t *bnum) long a = c_num(anum); long b = c_num(bnum); - assert (a <= 0 || b <= 0 || LONG_MAX - b >= a); - assert (a >= 0 || b >= 0 || LONG_MIN - b >= a); + numeric_assert (a <= 0 || b <= 0 || LONG_MAX - b >= a); + numeric_assert (a >= 0 || b >= 0 || LONG_MIN - b >= a); return num(a + b); } @@ -561,9 +602,9 @@ obj_t *minus(obj_t *anum, obj_t *bnum) long a = c_num(anum); long b = c_num(bnum); - assert (b != LONG_MIN || LONG_MIN == -LONG_MAX); - assert (a <= 0 || -b <= 0 || LONG_MAX + b >= a); - assert (a >= 0 || -b >= 0 || LONG_MIN + b >= a); + numeric_assert (b != LONG_MIN || LONG_MIN == -LONG_MAX); + numeric_assert (a <= 0 || -b <= 0 || LONG_MAX + b >= a); + numeric_assert (a >= 0 || -b >= 0 || LONG_MIN + b >= a); return num(a - b); } @@ -659,7 +700,7 @@ obj_t *stringp(obj_t *str) obj_t *length_str(obj_t *str) { - assert (str && str->t.type == STR); + type_check (str, STR); if (!str->st.len) str->st.len = num(strlen(str->st.str)); return str->st.len; @@ -667,7 +708,7 @@ obj_t *length_str(obj_t *str) const char *c_str(obj_t *obj) { - assert (obj); + type_check2(obj, STR, SYM); switch (obj->t.type) { case STR: @@ -721,7 +762,7 @@ obj_t *sub_str(obj_t *str_in, obj_t *from_num, obj_t *to_num) { const char *str = c_str(str_in); size_t len = c_num(length_str(str_in)); - long from = c_num(from_num); + long from = from_num ? c_num(from_num) : 0; long to = to_num ? c_num(to_num) : len; if (to < 0) @@ -838,7 +879,7 @@ obj_t *chrp(obj_t *chr) int c_chr(obj_t *chr) { - assert (chr && chr->t.type == CHR); + type_check(chr, CHR); return chr->ch.ch; } @@ -848,7 +889,7 @@ obj_t *chr_str(obj_t *str, obj_t *index) long i = c_num(index); const char *s = c_str(str); - assert (i < l); + bug_unless (i < l); return chr(s[i]); } @@ -859,17 +900,18 @@ obj_t *chr_str_set(obj_t *str, obj_t *index, obj_t *chr) long i = c_num(index); char *s = str->st.str; - assert (i < l); + bug_unless (i < l); s[i] = c_chr(chr); return chr; } -obj_t *sym_name(obj_t *sym) +obj_t *symbol_name(obj_t *sym) { - assert (sym && sym->t.type == SYM); - return sym->s.name; + if (sym) + type_check(sym, SYM); + return sym ? sym->s.name : nil_string; } obj_t *make_sym(obj_t *name) @@ -887,7 +929,7 @@ obj_t *intern(obj_t *str) for (iter = interned_syms; iter != nil; iter = cdr(iter)) { obj_t *sym = car(iter); - if (equal(sym_name(sym), str)) + if (equal(symbol_name(sym), str)) return sym; } @@ -900,12 +942,6 @@ obj_t *symbolp(obj_t *sym) return (sym == nil || sym->s.type == SYM) ? t : nil; } -obj_t *symbol_name(obj_t *sym) -{ - assert (sym == nil || sym->t.type == SYM); - return sym ? sym->s.name : nil_string; -} - obj_t *func_f0(obj_t *env, obj_t *(*fun)(obj_t *)) { obj_t *obj = make_obj(); @@ -1010,8 +1046,10 @@ obj_t *apply(obj_t *fun, obj_t *arglist) { obj_t *arg[4], **p = arg; - assert (fun && fun->f.type == FUN); - assert (arglist == nil || consp(arglist)); + type_check (fun, FUN); + + type_assert (listp(arglist), + ("apply arglist ~s is not a list", arglist, nao)); *p++ = car(arglist); arglist = cdr(arglist); *p++ = car(arglist); arglist = cdr(arglist); @@ -1040,15 +1078,15 @@ obj_t *apply(obj_t *fun, obj_t *arglist) case N4: return fun->f.f.n4(arg[0], arg[1], arg[2], arg[3]); case FINTERP: - abort(); + internal_error("unsupported function type"); } - assert (0 && "bad functype"); + internal_error("corrupt function type field"); } obj_t *funcall(obj_t *fun) { - assert (fun && fun->f.type == FUN); + type_check(fun, FUN); switch (fun->f.functype) { case F0: @@ -1062,7 +1100,7 @@ obj_t *funcall(obj_t *fun) obj_t *funcall1(obj_t *fun, obj_t *arg) { - assert (fun && fun->f.type == FUN); + type_check(fun, FUN); switch (fun->f.functype) { case F1: @@ -1076,7 +1114,7 @@ obj_t *funcall1(obj_t *fun, obj_t *arg) obj_t *funcall2(obj_t *fun, obj_t *arg1, obj_t *arg2) { - assert (fun && fun->f.type == FUN); + type_check(fun, FUN); switch (fun->f.functype) { case F2: @@ -1147,13 +1185,13 @@ obj_t *vector(obj_t *alloc) obj_t *vec_get_fill(obj_t *vec) { - assert (vec && vec->v.type == VEC); + type_check(vec, VEC); return vec->v.vec[vec_fill]; } obj_t *vec_set_fill(obj_t *vec, obj_t *fill) { - assert (vec && vec->v.type == VEC); + type_check(vec, VEC); { long new_fill = c_num(fill); @@ -1185,8 +1223,8 @@ obj_t *vec_set_fill(obj_t *vec, obj_t *fill) obj_t **vecref_l(obj_t *vec, obj_t *ind) { - assert (vec && vec->v.type == VEC); - assert (c_num(ind) < c_num(vec->v.vec[vec_fill])); + type_check(vec, VEC); + range_bug_unless (c_num(ind) < c_num(vec->v.vec[vec_fill])); return vec->v.vec + c_num(ind); } @@ -1198,160 +1236,6 @@ obj_t *vec_push(obj_t *vec, obj_t *item) return fill; } - -static obj_t *stdio_line_read(struct stream *sm) -{ - if (sm->handle == 0) { - return nil; - } else { - char *line = snarf_line((FILE *) sm->handle); - - if (!line) - return nil; - - return string(line); - } -} - -static obj_t *stdio_line_write(struct stream *sm, obj_t *obj) -{ - assert (obj->t.type == STR); - if (sm->handle == 0) - return nil; - if (fputs(c_str(obj), (FILE *) sm->handle) == EOF) - return nil; - if (putc('\n', (FILE *) sm->handle) == EOF) - return nil; - return t; -} - -static obj_t *stdio_close(struct stream *sm) -{ - FILE *f = (FILE *) sm->handle; - - if (f != 0 && f != stdin && f != stdout) { - fclose((FILE *) sm->handle); - sm->handle = 0; - return t; - } - return nil; -} - -static struct stream_ops stdio_line_stream_ops = { - stdio_line_read, stdio_line_write, stdio_close -}; - -obj_t *stdio_line_stream(FILE *f, obj_t *label) -{ - obj_t *sm = make_obj(); - sm->sm.type = STREAM; - sm->sm.handle = f; - sm->sm.ops = &stdio_line_stream_ops; - sm->sm.label_pushback = label; - assert (atom(label)); - return sm; -} - -static obj_t *pipe_close(struct stream *sm) -{ - if (sm->handle != 0) { - pclose((FILE *) sm->handle); - sm->handle = 0; - return t; - } - return nil; -} - -static struct stream_ops pipe_line_stream_ops = { - stdio_line_read, stdio_line_write, pipe_close -}; - -obj_t *pipe_line_stream(FILE *f, obj_t *label) -{ - obj_t *sm = make_obj(); - sm->sm.type = STREAM; - sm->sm.handle = f; - sm->sm.ops = &pipe_line_stream_ops; - sm->sm.label_pushback = label; - assert (atom(label)); - return sm; -} - -obj_t *dirent_read(struct stream *sm) -{ - if (sm->handle == 0) { - return nil; - } else { - for (;;) { - struct dirent *e = readdir(sm->handle); - if (!e) - return nil; - if (!strcmp(e->d_name, ".") || !strcmp(e->d_name, "..")) - continue; - return string(chk_strdup(e->d_name)); - } - } -} - -obj_t *dirent_close(struct stream *sm) -{ - if (sm->handle != 0) { - closedir((DIR *) sm->handle); - sm->handle = 0; - return t; - } - - return nil; -} - -static struct stream_ops dirent_stream_ops = { - dirent_read, 0, dirent_close -}; - -obj_t *dirent_stream(DIR *d, obj_t *label) -{ - obj_t *sm = make_obj(); - sm->sm.type = STREAM; - sm->sm.handle = d; - sm->sm.ops = &dirent_stream_ops; - sm->sm.label_pushback = label; - assert (atom(label)); - return sm; -} - -obj_t *stream_get(obj_t *sm) -{ - assert (sm->sm.type == STREAM); - - if (consp(sm->sm.label_pushback)) { - obj_t *ret = car(sm->sm.label_pushback); - sm->sm.label_pushback = cdr(sm->sm.label_pushback); - return ret; - } - - return sm->sm.ops->read(&sm->sm); -} - -obj_t *stream_pushback(obj_t *sm, obj_t *obj) -{ - assert (sm->sm.type == STREAM); - sm->sm.label_pushback = cons(obj, sm->sm.label_pushback); - return obj; -} - -obj_t *stream_put(obj_t *sm, obj_t *obj) -{ - assert (sm->sm.type == STREAM); - return sm->sm.ops->write(&sm->sm, obj); -} - -obj_t *stream_close(obj_t *sm) -{ - assert (sm->sm.type == STREAM); - return sm->sm.ops->close(&sm->sm); -} - - static obj_t *make_lazycons(obj_t *func) { obj_t *obj = make_obj(); @@ -1361,36 +1245,36 @@ static obj_t *make_lazycons(obj_t *func) return obj; } -static obj_t *lazy_stream_func(obj_t *stream, obj_t *lcons) +static obj_t *lazy_stream_func(obj_t *env, obj_t *lcons) { - obj_t *next = stream_get(stream); - obj_t *ahead = stream_get(stream); + obj_t *stream = car(env); + obj_t *next = cdr(env) ? pop(cdr_l(env)) : get_line(stream); + obj_t *ahead = get_line(stream); lcons->lc.car = next; lcons->lc.cdr = if2(ahead, make_lazycons(lcons->lc.func)); lcons->lc.func = nil; if (!next || !ahead) - stream_close(stream); + close_stream(stream); if (ahead) - stream_pushback(stream, ahead); + push(ahead, cdr_l(env)); return next; } obj_t *lazy_stream_cons(obj_t *stream) { - obj_t *first = stream_get(stream); + obj_t *first = get_line(stream); if (!first) { - stream_close(stream); + close_stream(stream); return nil; } - stream_pushback(stream, first); - - return make_lazycons(func_f1(stream, lazy_stream_func)); + return make_lazycons(func_f1(cons(stream, cons(first, nil)), + lazy_stream_func)); } obj_t *cobj(void *handle, obj_t *cls_sym, struct cobj_ops *ops) @@ -1403,11 +1287,11 @@ obj_t *cobj(void *handle, obj_t *cls_sym, struct cobj_ops *ops) return obj; } -void cobj_print_op(obj_t *obj, FILE *out) +void cobj_print_op(obj_t *obj, obj_t *out) { - fprintf(out, "#<"); + put_cstring(out, "#<"); obj_print(obj->co.cls, out); - fprintf(out, ": %p>", obj->co.handle); + cformat(out, ": %p>", obj->co.handle); } obj_t *assoc(obj_t *list, obj_t *key) @@ -1567,8 +1451,6 @@ obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun) static void obj_init(void) { - int gc_save = gc_state(0); - /* * No need to GC-protect the convenience variables which hold the interned * symbols, because the interned_syms list holds a reference to all the @@ -1579,7 +1461,9 @@ static void obj_init(void) &two, &negone, &maxint, &minint, &null_string, &nil_string, &null_list, &equal_f, - &identity_f, 0); + &identity_f, &prog_string, 0); + + nil_string = string(strdup("nil")); null = intern(string(strdup("null"))); t = intern(string(strdup("t"))); @@ -1592,8 +1476,9 @@ static void obj_init(void) vec_t = intern(string(strdup("vec"))); stream_t = intern(string(strdup("stream"))); lcons_t = intern(string(strdup("lcons"))); - var = intern(string(strdup("var"))); - regex = intern(string(strdup("regex"))); + cobj_t = intern(string(strdup("cobj"))); + var = intern(string(strdup("$var"))); + regex = intern(string(strdup("$regex"))); set = intern(string(strdup("set"))); cset = intern(string(strdup("cset"))); wild = intern(string(strdup("wild"))); @@ -1602,6 +1487,7 @@ static void obj_init(void) optional = intern(string(strdup("?"))); compound = intern(string(strdup("compound"))); or = intern(string(strdup("or"))); + quasi = intern(string(strdup("$quasi"))); skip = intern(string(strdup("skip"))); trailer = intern(string(strdup("trailer"))); block = intern(string(strdup("block"))); @@ -1626,10 +1512,24 @@ static void obj_init(void) rep = intern(string(strdup("rep"))); flattn = intern(string(strdup("flatten"))); forget = intern(string(strdup("forget"))); + local = intern(string(strdup("local"))); mrge = intern(string(strdup("merge"))); bind = intern(string(strdup("bind"))); cat = intern(string(strdup("cat"))); dir = intern(string(strdup("dir"))); + try = intern(string(strdup("try"))); + catch = intern(string(strdup("catch"))); + finally = intern(string(strdup("finally"))); + nothrow = intern(string(strdup("nothrow"))); + error = intern(string(strdup("error"))); + type_error = intern(string(strdup("type_error"))); + internal_err = intern(string(strdup("internal_error"))); + numeric_err = intern(string(strdup("numeric_error"))); + range_err = intern(string(strdup("range_error"))); + query_error = intern(string(strdup("query_error"))); + file_error = intern(string(strdup("file_error"))); + + interned_syms = cons(nil, interned_syms); zero = num(0); one = num(1); @@ -1639,20 +1539,18 @@ static void obj_init(void) minint = num(LONG_MIN); null_string = string(strdup("")); - nil_string = string(strdup("NIL")); null_list = cons(nil, nil); equal_f = func_f2(nil, equal_tramp); identity_f = func_f1(nil, identity_tramp); - - gc_state(gc_save); + prog_string = string(strdup(progname)); } -void obj_print(obj_t *obj, FILE *out) +void obj_print(obj_t *obj, obj_t *out) { if (obj == nil) { - fputs("nil", out); + put_cstring(out, "nil"); return; } @@ -1661,108 +1559,161 @@ void obj_print(obj_t *obj, FILE *out) case LCONS: { obj_t *iter; - putc('(', out); + put_cchar(out, '('); for (iter = obj; consp(iter); iter = cdr(iter)) { obj_print(car(iter), out); if (nullp(cdr(iter))) { - putc(')', out); + put_cchar(out, ')'); } else if (consp(cdr(iter))) { - putc(' ', out); + put_cchar(out, ' '); } else { - fputs(" . ", out); + put_cstring(out, " . "); obj_print(cdr(iter), out); - putc(')', out); + put_cchar(out, ')'); } } } - break; + return; case STR: { const char *ptr; - putc('"', out); + put_cchar(out, '"'); for (ptr = obj->st.str; *ptr; ptr++) { switch (*ptr) { - case '\a': fputs("\\a", out); break; - case '\b': fputs("\\b", out); break; - case '\t': fputs("\\t", out); break; - case '\n': fputs("\\n", out); break; - case '\v': fputs("\\v", out); break; - case '\f': fputs("\\f", out); break; - case '\r': fputs("\\r", out); break; - case '"': fputs("\\\"", out); break; - case '\\': fputs("\\\\", out); break; - case 27: fputs("\\e", out); break; + case '\a': put_cstring(out, "\\a"); break; + case '\b': put_cstring(out, "\\b"); break; + case '\t': put_cstring(out, "\\t"); break; + case '\n': put_cstring(out, "\\n"); break; + case '\v': put_cstring(out, "\\v"); break; + case '\f': put_cstring(out, "\\f"); break; + case '\r': put_cstring(out, "\\r"); break; + case '"': put_cstring(out, "\\\""); break; + case '\\': put_cstring(out, "\\\\"); break; + case 27: put_cstring(out, "\\e"); break; default: if (isprint(*ptr)) - putc(*ptr, out); + put_cchar(out, *ptr); else - fprintf(out, "\\%03o", (int) *ptr); + cformat(out, "\\%03o", (int) *ptr); } } - putc('"', out); + put_cchar(out, '"'); } - break; + return; case CHR: { int ch = obj->ch.ch; - putc('\'', out); + put_cchar(out, '\''); switch (ch) { - case '\a': fputs("\\a", out); break; - case '\b': fputs("\\b", out); break; - case '\t': fputs("\\t", out); break; - case '\n': fputs("\\n", out); break; - case '\v': fputs("\\v", out); break; - case '\f': fputs("\\f", out); break; - case '\r': fputs("\\r", out); break; - case '"': fputs("\\\"", out); break; - case '\\': fputs("\\\\", out); break; - case 27: fputs("\\e", out); break; + case '\a': put_cstring(out, "\\a"); break; + case '\b': put_cstring(out, "\\b"); break; + case '\t': put_cstring(out, "\\t"); break; + case '\n': put_cstring(out, "\\n"); break; + case '\v': put_cstring(out, "\\v"); break; + case '\f': put_cstring(out, "\\f"); break; + case '\r': put_cstring(out, "\\r"); break; + case '"': put_cstring(out, "\\\""); break; + case '\\': put_cstring(out, "\\\\"); break; + case 27: put_cstring(out, "\\e"); break; default: if (isprint(ch)) - putc(ch, out); + put_cchar(out, ch); else - fprintf(out, "\\%03o", ch); + cformat(out, "\\%03o", ch); } - putc('\'', out); + put_cchar(out, '\''); } - break; + return; case NUM: - fprintf(out, "%ld", c_num(obj)); - break; + cformat(out, "%ld", c_num(obj)); + return; case SYM: - fputs(c_str(symbol_name(obj)), out); - break; + put_string(out, symbol_name(obj)); + return; case FUN: - fprintf(out, "#", (int) obj->f.functype); - break; + cformat(out, "#", (int) obj->f.functype); + return; case VEC: { long i, fill = c_num(obj->v.vec[vec_fill]); - fputs("#(", out); + put_cstring(out, "#("); for (i = 0; i < fill; i++) { obj_print(obj->v.vec[i], out); if (i < fill - 1) - putc(' ', out); + put_cchar(out, ' '); } - putc(')', out); + put_cchar(out, ')'); } - break; - case STREAM: - fprintf(out, "#co.ops->print(obj, out); + return; + } + + cformat(out, "#", (void *) obj); +} + +void obj_pprint(obj_t *obj, obj_t *out) +{ + if (obj == nil) { + put_cstring(out, "nil"); + return; + } + + switch (obj->t.type) { + case CONS: + case LCONS: { obj_t *iter; - /* skip stream pushback items to find label */ - for (iter = obj->sm.label_pushback; consp(iter); iter = cdr(iter)) - ; - obj_print(iter, out); + put_cchar(out, '('); + for (iter = obj; consp(iter); iter = cdr(iter)) { + obj_pprint(car(iter), out); + if (nullp(cdr(iter))) { + put_cchar(out, ')'); + } else if (consp(cdr(iter))) { + put_cchar(out, ' '); + } else { + put_cstring(out, " . "); + obj_pprint(cdr(iter), out); + put_cchar(out, ')'); + } + } } - fprintf(out, ", %p>", (void *) obj->sm.handle); - break; + return; + case STR: + put_string(out, obj); + return; + case CHR: + put_char(out, obj); + return; + case NUM: + cformat(out, "%ld", c_num(obj)); + return; + case SYM: + put_string(out, symbol_name(obj)); + return; + case FUN: + cformat(out, "#", (int) obj->f.functype); + return; + case VEC: + { + long i, fill = c_num(obj->v.vec[vec_fill]); + put_cstring(out, "#("); + for (i = 0; i < fill; i++) { + obj_pprint(obj->v.vec[i], out); + if (i < fill - 1) + put_cchar(out, ' '); + } + put_cchar(out, ')'); + } + return; case COBJ: obj->co.ops->print(obj, out); - break; + return; } + + cformat(out, "#", (void *) obj); } void init(const char *pn, void *(*oom)(void *, size_t), @@ -1771,6 +1722,7 @@ void init(const char *pn, void *(*oom)(void *, size_t), int growsdown; obj_t *local_bottom = nil; progname = pn; + int gc_save = gc_state(0); /* If the local_bottom variable has a smaller address than either of the two possible top variables from @@ -1785,14 +1737,17 @@ void init(const char *pn, void *(*oom)(void *, size_t), ? max(maybe_bottom_0, maybe_bottom_1) : min(maybe_bottom_0, maybe_bottom_1)); - uw_init(); obj_init(); + uw_init(); + stream_init(); + + gc_state(gc_save); } -void dump(obj_t *obj, FILE *out) +void dump(obj_t *obj, obj_t *out) { obj_print(obj, out); - putc('\n', out); + put_cchar(out, '\n'); } /* @@ -1802,48 +1757,16 @@ void dump(obj_t *obj, FILE *out) */ void d(obj_t *obj) { - dump(obj, stdout); -} - -char *snarf_line(FILE *in) -{ - const size_t min_size = 512; - size_t size = 0; - size_t fill = 0; - char *buf = 0; - - for (;;) { - int ch = getc(in); - - if (ch == EOF && buf == 0) - break; - - if (fill >= size) { - size_t newsize = size ? size * 2 : min_size; - buf = chk_realloc(buf, newsize); - size = newsize; - } - - if (ch == '\n' || ch == EOF) { - buf[fill++] = 0; - break; - } - buf[fill++] = ch; - } - - if (buf) - buf = chk_realloc(buf, fill); - - return buf; + dump(obj, std_output); } -obj_t *snarf(FILE *in) +obj_t *snarf(obj_t *in) { list_collect_decl (list, iter); - char *str; + obj_t *str; - while ((str = snarf_line(in)) != 0) - list_collect (iter, string(str)); + while ((str = get_line(in)) != 0) + list_collect (iter, str); return list; } diff --git a/lib.h b/lib.h index 20cb2a77..1ecb577d 100644 --- a/lib.h +++ b/lib.h @@ -25,7 +25,7 @@ */ typedef enum type { - CONS = 1, STR, CHR, NUM, SYM, FUN, VEC, STREAM, LCONS, COBJ + CONS = 1, STR, CHR, NUM, SYM, FUN, VEC, LCONS, COBJ } type_t; typedef enum functype @@ -99,19 +99,6 @@ struct vec { obj_t **vec; }; -struct stream { - type_t type; - void *handle; - struct stream_ops *ops; - obj_t *label_pushback; /* label-terminated pushback stack */ -}; - -struct stream_ops { - obj_t *(*read)(struct stream *); - obj_t *(*write)(struct stream *, obj_t *); - obj_t *(*close)(struct stream *); -}; - /* * Lazy cons. When initially constructed, acts as a promise. The car and cdr * cache pointers are nil, and func points to a function. The job of the @@ -135,8 +122,9 @@ struct cobj { struct cobj_ops { obj_t *(*equal)(obj_t *self, obj_t *other); - void (*print)(obj_t *self, FILE *); + void (*print)(obj_t *self, obj_t *stream); void (*destroy)(obj_t *self); + void (*mark)(obj_t *self); }; union obj { @@ -148,7 +136,6 @@ union obj { struct sym s; struct func f; struct vec v; - struct stream sm; struct lazy_cons lc; struct cobj co; }; @@ -157,24 +144,31 @@ extern obj_t *interned_syms; extern obj_t *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t; extern obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus; -extern obj_t *zeroplus, *optional, *compound, *or; +extern obj_t *zeroplus, *optional, *compound, *or, *quasi; extern obj_t *skip, *trailer, *block, *next, *fail, *accept; extern obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll; extern obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep; -extern obj_t *flattn, *forget, *mrge, *bind, *cat, *dir; +extern obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *dir; +extern obj_t *try, *catch, *finally, *nothrow; +extern obj_t *error, *type_error, *internal_err, *numeric_err, *range_err; +extern obj_t *query_error, *file_error; extern obj_t *zero, *one, *two, *negone, *maxint, *minint; extern obj_t *null_string; -extern obj_t *null_list; /* (NIL) */ +extern obj_t *null_list; /* (nil) */ extern obj_t *identity_f; extern obj_t *equal_f; extern const char *progname; +extern obj_t *prog_string; + extern void *(*oom_realloc)(void *, size_t); obj_t *identity(obj_t *obj); obj_t *typeof(obj_t *obj); +obj_t *type_check(obj_t *obj, int); +obj_t *type_check2(obj_t *obj, int, int); obj_t *car(obj_t *cons); obj_t *cdr(obj_t *cons); obj_t **car_l(obj_t *cons); @@ -187,6 +181,8 @@ obj_t *fourth(obj_t *cons); obj_t *fifth(obj_t *cons); obj_t *sixth(obj_t *cons); obj_t **tail(obj_t *cons); +obj_t *pop(obj_t **plist); +obj_t *push(obj_t *val, obj_t **plist); obj_t *copy_list(obj_t *list); obj_t *nreverse(obj_t *in); obj_t *reverse(obj_t *in); @@ -275,16 +271,9 @@ obj_t *vec_get_fill(obj_t *vec); obj_t *vec_set_fill(obj_t *vec, obj_t *fill); obj_t **vecref_l(obj_t *vec, obj_t *ind); obj_t *vec_push(obj_t *vec, obj_t *item); -obj_t *stdio_line_stream(FILE *f, obj_t *label); -obj_t *pipe_line_stream(FILE *f, obj_t *label); -obj_t *dirent_stream(DIR *d, obj_t *label); -obj_t *stream_get(obj_t *sm); -obj_t *stream_pushback(obj_t *sm, obj_t *obj); -obj_t *stream_put(obj_t *sm, obj_t *obj); -obj_t *stream_close(obj_t *sm); obj_t *lazy_stream_cons(obj_t *stream); obj_t *cobj(void *handle, obj_t *cls_sym, struct cobj_ops *ops); -void cobj_print_op(obj_t *, FILE *); /* Print function for struct cobj_ops */ +void cobj_print_op(obj_t *, obj_t *); /* Default function for struct cobj_ops */ obj_t *assoc(obj_t *list, obj_t *key); obj_t *acons_new(obj_t *list, obj_t *key, obj_t *value); obj_t *alist_remove(obj_t *list, obj_t *keys); @@ -295,12 +284,12 @@ obj_t *mapcar(obj_t *fun, obj_t *list); obj_t *mappend(obj_t *fun, obj_t *list); obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun); -void obj_print(obj_t *obj, FILE *); +void obj_print(obj_t *obj, obj_t *stream); +void obj_pprint(obj_t *obj, obj_t *stream); void init(const char *progname, void *(*oom_realloc)(void *, size_t), obj_t **maybe_bottom_0, obj_t **maybe_bottom_1); -void dump(obj_t *obj, FILE *); -char *snarf_line(FILE *in); -obj_t *snarf(FILE *in); +void dump(obj_t *obj, obj_t *stream); +obj_t *snarf(obj_t *in); obj_t *match(obj_t *spec, obj_t *data); #define nil ((obj_t *) 0) @@ -313,6 +302,12 @@ obj_t *match(obj_t *spec, obj_t *data); #define if3(a, b, c) ((a) ? (b) : (c)) +#define or2(a, b) ((a) ? (a) : (b)) + +#define or3(a, b, c) or2(a, or2(b, c)) + +#define or4(a, b, c, d) or2(a, or3(b, c, d)) + #define list_collect_decl(OUT, PTAIL) \ obj_t *OUT = nil, **PTAIL = &OUT diff --git a/match.c b/match.c new file mode 100644 index 00000000..d14df9a3 --- /dev/null +++ b/match.c @@ -0,0 +1,1643 @@ +/* Copyright 2009 + * Kaz Kylheku + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "lib.h" +#include "gc.h" +#include "unwind.h" +#include "regex.h" +#include "stream.h" +#include "parser.h" +#include "txr.h" +#include "match.h" + +int output_produced; + +static void debugf(const char *fmt, ...) +{ + if (opt_loglevel >= 2) { + va_list vl; + va_start (vl, fmt); + format(std_error, "~a: ", prog_string, nao); + vformat(std_error, fmt, vl); + put_cchar(std_error, '\n'); + va_end (vl); + } +} + +static void debuglf(obj_t *line, const char *fmt, ...) +{ + if (opt_loglevel >= 2) { + va_list vl; + va_start (vl, fmt); + format(std_error, "~a: (~a:~a) ", prog_string, spec_file_str, line, nao); + vformat(std_error, fmt, vl); + put_cchar(std_error, '\n'); + va_end (vl); + } +} + +static void debuglcf(obj_t *line, const char *fmt, ...) +{ + if (opt_loglevel >= 2) { + va_list vl; + va_start (vl, fmt); + format(std_error, "~a: (~a:~a)", prog_string, spec_file_str, line, nao); + vcformat(std_error, fmt, vl); + put_cchar(std_error, '\n'); + va_end (vl); + } +} + +static void sem_error(obj_t *line, const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + format(stream, "~a: ", prog_string, nao); + if (line) + format(stream, "(~a:~a) ", spec_file_str, line, nao); + (void) vformat(stream, fmt, vl); + va_end (vl); + + uw_throw(query_error, get_string_from_stream(stream)); + abort(); +} + +static void file_err(obj_t *line, const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + format(stream, "~a: ", prog_string, nao); + if (line) + format(stream, "(~a:~a) ", spec_file_str, line, nao); + (void) vformat(stream, fmt, vl); + va_end (vl); + + uw_throw(file_error, get_string_from_stream(stream)); + abort(); +} + + +void dump_shell_string(const char *str) +{ + int ch; + + putchar('"'); + while ((ch = *str++) != 0) { + switch (ch) { + case '"': case '`': case '$': case '\\': case '\n': + putchar('\\'); + /* fallthrough */ + default: + putchar(ch); + } + } + putchar('"'); +} + +void dump_var(const char *name, char *pfx1, size_t len1, + char *pfx2, size_t len2, obj_t *value, int level) +{ + if (len1 >= 112 || len2 >= 112) + internal_error("too much depth in bindings"); + + if (stringp(value) || chrp(value)) { + fputs(name, stdout); + fputs(pfx1, stdout); + fputs(pfx2, stdout); + putchar('='); + if (stringp(value)) { + dump_shell_string(c_str(value)); + } else { + char mini[2]; + mini[0] = c_chr(value); + mini[1] = 0; + dump_shell_string(mini); + } + putchar('\n'); + } else { + obj_t *iter; + int i; + size_t add1 = 0, add2 = 0; + + for (i = 0, iter = value; iter; iter = cdr(iter), i++) { + if (level < opt_arraydims) { + add2 = sprintf(pfx2 + len2, "[%d]", i); + add1 = 0; + } else { + add1 = sprintf(pfx1 + len1, "_%d", i); + add2 = 0; + } + + dump_var(name, pfx1, len1 + add1, pfx2, len2 + add2, car(iter), level + 1); + } + } +} + +void dump_bindings(obj_t *bindings) +{ + if (opt_loglevel >= 2) { + fputs("raw_bindings:\n", stderr); + dump(bindings, std_error); + } + + while (bindings) { + char pfx1[128], pfx2[128]; + obj_t *var = car(car(bindings)); + obj_t *value = cdr(car(bindings)); + const char *name = c_str(symbol_name(var)); + *pfx1 = 0; *pfx2 = 0; + dump_var(name, pfx1, 0, pfx2, 0, value, 0); + bindings = cdr(bindings); + } +} + +obj_t *depth(obj_t *obj) +{ + obj_t *dep = zero; + + if (obj == nil) + return one; + + if (atom(obj)) + return zero; + + while (obj) { + dep = max2(dep, depth(first(obj))); + obj = rest(obj); + } + + return plus(dep, one); +} + +obj_t *weird_merge(obj_t *left, obj_t *right) +{ + obj_t *left_depth = depth(left); + obj_t *right_depth = depth(right); + + while (lt(left_depth, right_depth) || zerop(left_depth)) { + left = cons(left, nil); + left_depth = plus(left_depth, one); + } + + while (lt(right_depth, left_depth) || zerop(right_depth)) { + right = cons(right, nil); + right_depth = plus(right_depth, one); + } + + return append2(left, right); +} + +obj_t *map_leaf_lists(obj_t *func, obj_t *list) +{ + if (atom(list)) + return list; + if (none_satisfy(list, func_n1(listp), nil)) + return funcall1(func, list); + return mapcar(bind2(func_n2(map_leaf_lists), func), list); +} + +obj_t *dest_bind(obj_t *bindings, obj_t *pattern, obj_t *value) +{ + if (nullp(pattern)) + return bindings; + + if (symbolp(pattern)) { + obj_t *existing = assoc(bindings, pattern); + if (existing) { + if (tree_find(value, cdr(existing))) + return bindings; + if (tree_find(cdr(existing), value)) + return bindings; + debugf("bind variable mismatch: ~a", pattern, nao); + return t; + } + return cons(cons(pattern, value), bindings); + } + + if (consp(pattern)) { + obj_t *piter = pattern, *viter = value; + + while (consp(piter) && consp(viter)) + { + bindings = dest_bind(bindings, car(piter), car(viter)); + if (bindings == t) + return t; + piter = cdr(piter); + viter = cdr(viter); + } while (consp(piter) && consp(viter)); + + if (symbolp(piter)) { + bindings = dest_bind(bindings, piter, viter); + if (bindings == t) + return t; + } + } + + return bindings; +} + +obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline, + obj_t *pos, obj_t *spec_lineno, obj_t *data_lineno, + obj_t *file) +{ +#define LOG_MISMATCH(KIND) \ + debuglf(spec_lineno, KIND " mismatch, position ~a (~a:~a)", pos, \ + file, data_lineno, nao); \ + debuglf(spec_lineno, " ~a", dataline, nao); \ + if (c_num(pos) < 77) \ + debuglcf(spec_lineno, " %*s^", (int) c_num(pos), "") + +#define LOG_MATCH(KIND, EXTENT) \ + debuglf(spec_lineno, KIND " matched, position ~a-~a (~a:~a)", \ + pos, EXTENT, file, data_lineno, nao); \ + debuglf(spec_lineno, " ~a", dataline, nao); \ + if (c_num(EXTENT) < 77) \ + debuglcf(spec_lineno, " %*s%-*s^", (int) c_num(pos), \ + "", (int) (c_num(EXTENT) - c_num(pos)), "^") + + for (;;) { + obj_t *elem; + + if (specline == nil) + break; + + elem = first(specline); + + switch (elem ? elem->t.type : 0) { + case CONS: /* directive */ + { + obj_t *directive = first(elem); + + if (directive == var) { + obj_t *sym = second(elem); + obj_t *pat = third(elem); + obj_t *modifier = fourth(elem); + obj_t *pair = assoc(bindings, sym); /* var exists already? */ + + if (pair) { + /* If the variable already has a binding, we replace + it with its value, and treat it as a string match. + The spec looks like ((var ) ...) + and it must be transformed into + ( ...) */ + if (pat) { + specline = cons(cdr(pair), cons(pat, rest(specline))); + } else if (nump(modifier)) { + obj_t *past = plus(pos, modifier); + + if (c_num(past) > c_num(length_str(dataline)) || + c_num(past) < c_num(pos)) + { + LOG_MISMATCH("fixed field size"); + return nil; + } + + if (!tree_find(trim_str(sub_str(dataline, pos, past)), + cdr(pair))) + { + LOG_MISMATCH("fixed field contents"); + return nil; + } + + LOG_MATCH("fixed field", past); + pos = past; + specline = cdr(specline); + } else { + specline = cons(cdr(pair), rest(specline)); + } + continue; + } else if (pat == nil) { /* match to end of line or with regex */ + if (consp(modifier)) { + obj_t *past = match_regex(dataline, car(modifier), pos); + if (nullp(past)) { + LOG_MISMATCH("var positive regex"); + return nil; + } + LOG_MATCH("var positive regex", past); + bindings = acons_new(bindings, sym, sub_str(dataline, pos, past)); + pos = past; + } else if (nump(modifier)) { + obj_t *past = plus(pos, modifier); + if (c_num(past) > c_num(length_str(dataline)) || + c_num(past) < c_num(pos)) + { + LOG_MISMATCH("count based var"); + return nil; + } + LOG_MATCH("count based var", past); + bindings = acons_new(bindings, sym, trim_str(sub_str(dataline, pos, past))); + pos = past; + } else { + bindings = acons_new(bindings, sym, sub_str(dataline, pos, nil)); + pos = length_str(dataline); + } + } else if (pat->t.type == STR) { + obj_t *find = search_str(dataline, pat, pos, modifier); + if (!find) { + LOG_MISMATCH("var delimiting string"); + return nil; + } + LOG_MATCH("var delimiting string", find); + bindings = acons_new(bindings, sym, sub_str(dataline, pos, find)); + pos = plus(find, length_str(pat)); + } else if (consp(pat) && typeof(first(pat)) == regex) { + obj_t *find = search_regex(dataline, first(pat), pos, modifier); + obj_t *fpos = car(find); + obj_t *flen = cdr(find); + if (!find) { + LOG_MISMATCH("var delimiting regex"); + return nil; + } + LOG_MATCH("var delimiting regex", fpos); + bindings = acons_new(bindings, sym, sub_str(dataline, pos, fpos)); + pos = plus(fpos, flen); + } else if (consp(pat) && first(pat) == var) { + /* Unbound var followed by var: the following one must be bound. */ + obj_t *second_sym = second(pat); + obj_t *next_pat = third(pat); + obj_t *pair = assoc(bindings, second_sym); /* var exists already? */ + + if (!pair) + sem_error(spec_lineno, "consecutive unbound variables", nao); + + /* Re-generate a new spec with an edited version of + the element we just processed, and repeat. */ + { + obj_t *new_elem = list(var, sym, cdr(pair), modifier, nao); + + if (next_pat) + specline = cons(new_elem, cons(next_pat, rest(specline))); + else + specline = cons(new_elem, rest(specline)); + } + + continue; + } else if (consp(pat) && (consp(first(pat)) || stringp(first(pat)))) { + cons_bind (find, len, search_str(dataline, pat, pos, modifier)); + if (!find) { + LOG_MISMATCH("string"); + return nil; + } + bindings = acons_new(bindings, sym, sub_str(dataline, pos, find)); + pos = plus(find, len); + } else { + sem_error(spec_lineno, + "variable followed by invalid element", nao); + } + } else if (typeof(directive) == regex) { + obj_t *past = match_regex(dataline, directive, pos); + if (nullp(past)) { + LOG_MISMATCH("regex"); + return nil; + } + LOG_MATCH("regex", past); + pos = past; + } else if (directive == coll) { + obj_t *coll_specline = second(elem); + obj_t *until_specline = third(elem); + obj_t *bindings_coll = nil; + obj_t *iter; + + for (;;) { + cons_bind (new_bindings, new_pos, + match_line(bindings, coll_specline, dataline, pos, + spec_lineno, data_lineno, file)); + + if (until_specline) { + cons_bind (until_bindings, until_pos, + match_line(bindings, until_specline, dataline, pos, + spec_lineno, data_lineno, file)); + + if (until_pos) { + (void) until_bindings; + LOG_MATCH("until", until_pos); + break; + } else { + LOG_MISMATCH("until"); + } + } + + if (new_pos) { + LOG_MATCH("coll", new_pos); + + for (iter = new_bindings; iter && iter != bindings; + iter = cdr(iter)) + { + obj_t *binding = car(iter); + obj_t *existing = assoc(bindings_coll, car(binding)); + + bindings_coll = acons_new(bindings_coll, car(binding), + cons(cdr(binding), cdr(existing))); + } + } + + if (new_pos && !equal(new_pos, pos)) { + pos = new_pos; + assert (c_num(pos) <= c_num(length_str(dataline))); + } else { + pos = plus(pos, one); + } + + if (c_num(pos) >= c_num(length_str(dataline))) + break; + } + + + if (!bindings_coll) { + debuglf(spec_lineno, "nothing was collected", nao); + return nil; + } + + for (iter = bindings_coll; iter; iter = cdr(iter)) { + obj_t *pair = car(iter); + obj_t *rev = cons(car(pair), nreverse(cdr(pair))); + bindings = cons(rev, bindings); + } + } else if (consp(directive) || stringp(directive)) { + cons_bind (find, len, search_str_tree(dataline, elem, pos, nil)); + obj_t *newpos; + + if (find == nil || !equal(find, pos)) { + LOG_MISMATCH("string tree"); + return nil; + } + + newpos = plus(find, len); + LOG_MATCH("string tree", newpos); + pos = newpos; + } else { + sem_error(spec_lineno, "unknown directive: ~a", directive, nao); + } + } + break; + case STR: + { + obj_t *find = search_str(dataline, elem, pos, nil); + obj_t *newpos; + if (find == nil || !equal(find, pos)) { + LOG_MISMATCH("string"); + return nil; + } + newpos = plus(find, length_str(elem)); + LOG_MATCH("string", newpos); + pos = newpos; + break; + } + default: + sem_error(spec_lineno, "unsupported object in spec: ~s", elem, nao); + } + + specline = cdr(specline); + } + + return cons(bindings, pos); +} + +obj_t *format_field(obj_t *string_or_list, obj_t *spec) +{ + if (!stringp(string_or_list)) + return string_or_list; + + { + obj_t *right = lt(spec, zero); + obj_t *width = if3(lt(spec, zero), neg(spec), spec); + obj_t *diff = minus(width, length_str(string_or_list)); + + if (le(diff, zero)) + return string_or_list; + + if (ge(length_str(string_or_list), width)) + return string_or_list; + + { + obj_t *padding = mkstring(diff, chr(' ')); + + return if3(right, + cat_str(list(padding, string_or_list, nao), nil), + cat_str(list(string_or_list, padding, nao), nil)); + } + } +} + +obj_t *subst_vars(obj_t *spec, obj_t *bindings) +{ + list_collect_decl(out, iter); + + while (spec) { + obj_t *elem = first(spec); + + if (consp(elem)) { + if (first(elem) == var) { + obj_t *sym = second(elem); + obj_t *pat = third(elem); + obj_t *modifier = fourth(elem); + obj_t *pair = assoc(bindings, sym); + + if (pair) { + if (pat) + spec = cons(cdr(pair), cons(pat, rest(spec))); + else if (nump(modifier)) + spec = cons(format_field(cdr(pair), modifier), rest(spec)); + else + spec = cons(cdr(pair), rest(spec)); + continue; + } + } else if (first(elem) == quasi) { + obj_t *nested = subst_vars(rest(elem), bindings); + list_collect_append(iter, nested); + spec = cdr(spec); + continue; + } else { + obj_t *nested = subst_vars(elem, bindings); + list_collect_append(iter, nested); + spec = cdr(spec); + continue; + } + } + + list_collect(iter, elem); + spec = cdr(spec); + } + + return out; +} + +obj_t *eval_form(obj_t *form, obj_t *bindings) +{ + if (!form) + return cons(t, form); + else if (symbolp(form)) + return assoc(bindings, form); + else if (consp(form)) { + if (car(form) == quasi) { + return cons(t, cat_str(subst_vars(rest(form), bindings), nil)); + } else { + obj_t *subforms = mapcar(bind2other(func_n2(eval_form), bindings), form); + + if (all_satisfy(subforms, identity_f, nil)) + return cons(t, mapcar(func_n1(cdr), subforms)); + return nil; + } + } if (stringp(form)) { + return cons(t, form); + } + + return cons(t, form); +} + +typedef struct fpip { + FILE *f; + DIR *d; + enum { fpip_fclose, fpip_pclose, fpip_closedir } close; +} fpip_t; + +fpip_t complex_open(obj_t *name, obj_t *output) +{ + fpip_t ret = { 0 }; + + const char *namestr = c_str(name); + long len = c_num(length_str(name)); + + if (len == 0) + return ret; + + if (!strcmp(namestr, "-")) { + ret.close = fpip_fclose; + ret.f = output ? stdout : stdin; + output_produced = output ? 1 : 0; + } else if (namestr[0] == '!') { + ret.close = fpip_pclose; + ret.f = popen(namestr+1, output ? "w" : "r"); + } else if (namestr[0] == '$') { + if (output) + return ret; + ret.close = fpip_closedir; + ret.d = opendir(namestr+1); + } else { + ret.close = fpip_fclose; + ret.f = fopen(namestr, output ? "w" : "r"); + } + + return ret; +} + +int complex_open_failed(fpip_t fp) +{ + return fp.f == 0 && fp.d == 0; +} + +void complex_close(fpip_t fp) +{ + if (fp.f == 0) + return; + switch (fp.close) { + case fpip_fclose: + if (fp.f != stdin && fp.f != stdout) + fclose(fp.f); + return; + case fpip_pclose: + pclose(fp.f); + return; + case fpip_closedir: + closedir(fp.d); + return; + } + + internal_error("bad input source type code"); +} + +obj_t *complex_snarf(fpip_t fp, obj_t *name) +{ + switch (fp.close) { + case fpip_fclose: + return lazy_stream_cons(make_stdio_stream(fp.f, t, nil)); + case fpip_pclose: + return lazy_stream_cons(make_pipe_stream(fp.f, t, nil)); + case fpip_closedir: + return lazy_stream_cons(make_dir_stream(fp.d)); + } + + internal_error("bad input source type"); +} + +obj_t *robust_length(obj_t *obj) +{ + if (obj == nil) + return zero; + if (atom(obj)) + return negone; + return length(obj); +} + +obj_t *bind_car(obj_t *bind_cons) +{ + return if3(consp(cdr(bind_cons)), + cons(car(bind_cons), car(cdr(bind_cons))), + bind_cons); +} + +obj_t *bind_cdr(obj_t *bind_cons) +{ + return if3(consp(cdr(bind_cons)), + cons(car(bind_cons), cdr(cdr(bind_cons))), + bind_cons); +} + +obj_t *extract_vars(obj_t *output_spec) +{ + list_collect_decl (vars, tai); + + if (consp(output_spec)) { + if (first(output_spec) == var) { + list_collect (tai, second(output_spec)); + } else { + for (; output_spec; output_spec = cdr(output_spec)) + list_collect_nconc(tai, extract_vars(car(output_spec))); + } + } + + return vars; +} + +obj_t *extract_bindings(obj_t *bindings, obj_t *output_spec) +{ + list_collect_decl (bindings_out, tail); + obj_t *var_list = extract_vars(output_spec); + + for (; bindings; bindings = cdr(bindings)) + if (memq(car(car(bindings)), var_list)) + list_collect(tail, car(bindings)); + + return bindings_out; +} + +void do_output_line(obj_t *bindings, obj_t *specline, + obj_t *spec_lineno, FILE *out) +{ + for (; specline; specline = rest(specline)) { + obj_t *elem = first(specline); + + switch (elem ? elem->t.type : 0) { + case CONS: + { + obj_t *directive = first(elem); + + if (directive == var) { + obj_t *str = cat_str(subst_vars(cons(elem, nil), bindings), nil); + if (str == nil) + sem_error(spec_lineno, "bad substitution: ~a", second(elem), nao); + fputs(c_str(str), out); + } else if (directive == rep) { + obj_t *main_clauses = second(elem); + obj_t *single_clauses = third(elem); + obj_t *first_clauses = fourth(elem); + obj_t *last_clauses = fifth(elem); + obj_t *empty_clauses = sixth(elem); + obj_t *bind_cp = extract_bindings(bindings, elem); + obj_t *max_depth = reduce_left(func_n2(max2), + bind_cp, zero, + chain(list(func_n1(cdr), + func_n1(robust_length), + nao))); + + if (equal(max_depth, zero) && empty_clauses) { + do_output_line(bindings, empty_clauses, spec_lineno, out); + } else if (equal(max_depth, one) && single_clauses) { + obj_t *bind_a = mapcar(func_n1(bind_car), bind_cp); + do_output_line(bind_a, single_clauses, spec_lineno, out); + } else if (!zerop(max_depth)) { + long i; + + for (i = 0; i < c_num(max_depth); i++) { + obj_t *bind_a = mapcar(func_n1(bind_car), bind_cp); + obj_t *bind_d = mapcar(func_n1(bind_cdr), bind_cp); + + if (i == 0 && first_clauses) { + do_output_line(bind_a, first_clauses, spec_lineno, out); + } else if (i == c_num(max_depth) - 1 && last_clauses) { + do_output_line(bind_a, last_clauses, spec_lineno, out); + } else { + do_output_line(bind_a, main_clauses, spec_lineno, out); + } + + bind_cp = bind_d; + } + } + + } else { + sem_error(spec_lineno, "unknown directive: ~a", directive, nao); + } + } + break; + case STR: + fputs(c_str(elem), out); + break; + case 0: + break; + default: + sem_error(spec_lineno, "unsupported object in output spec: ~s", elem); + } + } +} + +void do_output(obj_t *bindings, obj_t *specs, FILE *out) +{ + if (equal(specs, null_list)) + return; + + for (; specs; specs = cdr(specs)) { + cons_bind (spec_lineno, specline, first(specs)); + obj_t *first_elem = first(specline); + + if (consp(first_elem)) { + obj_t *sym = first(first_elem); + + if (sym == repeat) { + obj_t *main_clauses = second(first_elem); + obj_t *single_clauses = third(first_elem); + obj_t *first_clauses = fourth(first_elem); + obj_t *last_clauses = fifth(first_elem); + obj_t *empty_clauses = sixth(first_elem); + obj_t *bind_cp = extract_bindings(bindings, first_elem); + obj_t *max_depth = reduce_left(func_n2(max2), + bind_cp, zero, + chain(list(func_n1(cdr), + func_n1(robust_length), + nao))); + + if (equal(max_depth, zero) && empty_clauses) { + do_output(bind_cp, empty_clauses, out); + } else if (equal(max_depth, one) && single_clauses) { + obj_t *bind_a = mapcar(func_n1(bind_car), bind_cp); + do_output(bind_a, single_clauses, out); + } else if (!zerop(max_depth)) { + long i; + + for (i = 0; i < c_num(max_depth); i++) { + obj_t *bind_a = mapcar(func_n1(bind_car), bind_cp); + obj_t *bind_d = mapcar(func_n1(bind_cdr), bind_cp); + + if (i == 0 && first_clauses) { + do_output(bind_a, first_clauses, out); + } else if (i == c_num(max_depth) - 1 && last_clauses) { + do_output(bind_a, last_clauses, out); + } else { + do_output(bind_a, main_clauses, out); + } + + bind_cp = bind_d; + } + } + continue; + } + } + + do_output_line(bindings, specline, spec_lineno, out); + putc('\n', out); + } +} + +obj_t *match_files(obj_t *spec, obj_t *files, + obj_t *bindings, obj_t *first_file_parsed, + obj_t *data_linenum) +{ + obj_t *data = nil; + long data_lineno = 0; + + if (listp(first_file_parsed)) { + data = first_file_parsed; + data_lineno = c_num(data_linenum); + first_file_parsed = nil; + } else if (files) { + obj_t *spec = first(files); + obj_t *name = consp(spec) ? cdr(spec) : spec; + fpip_t fp = (errno = 0, complex_open(name, nil)); + + debugf("opening data source ~a", name, nao); + + if (complex_open_failed(fp)) { + if (consp(spec) && car(spec) == nothrow) { + debugf("could not open ~a: treating as failed match due to nothrow", + name, nao); + return nil; + } else if (errno != 0) + file_err(nil, "could not open ~a (error ~a/~a)", name, + num(errno), string(strdup(strerror(errno))), nao); + else + file_err(nil, "could not open ~a", name, nao); + return nil; + } + + if ((data = complex_snarf(fp, name)) != nil) + data_lineno = 1; + } + + for (; spec; spec = rest(spec), data = rest(data), data_lineno++) +repeat_spec_same_data: + { + obj_t *specline = rest(first(spec)); + obj_t *dataline = first(data); + obj_t *spec_linenum = first(first(spec)); + obj_t *first_spec = first(specline); + + if (consp(first_spec)) { + obj_t *sym = first(first_spec); + + if (sym == skip) { + obj_t *max = first(rest(first_spec)); + long cmax = nump(max) ? c_num(max) : 0; + long reps = 0; + + if (rest(specline)) + sem_error(spec_linenum, + "unexpected material after skip directive", nao); + + if ((spec = rest(spec)) == nil) + break; + + { + uw_block_begin(nil, result); + + while (dataline && (!max || reps++ < cmax)) { + cons_bind (new_bindings, success, + match_files(spec, files, bindings, + data, num(data_lineno))); + + if (success) { + debuglf(spec_linenum, "skip matched ~a:~a", first(files), + num(data_lineno), nao); + result = cons(new_bindings, cons(data, num(data_lineno))); + break; + } + + debuglf(spec_linenum, "skip didn't match ~a:~a", first(files), + num(data_lineno), nao); + data = rest(data); + data_lineno++; + dataline = first(data); + } + + uw_block_end; + + if (result) + return result; + } + + debuglf(spec_linenum, "skip failed", nao); + return nil; + } else if (sym == trailer) { + if (rest(specline)) + sem_error(spec_linenum, + "unexpected material after trailer directive", nao); + + if ((spec = rest(spec)) == nil) + break; + + { + cons_bind (new_bindings, success, + match_files(spec, files, bindings, + data, num(data_lineno))); + + if (success) + return cons(new_bindings, cons(data, num(data_lineno))); + return nil; + } + } else if (sym == block) { + obj_t *name = first(rest(first_spec)); + if (rest(specline)) + sem_error(spec_linenum, + "unexpected material after block directive", nao); + if ((spec = rest(spec)) == nil) + break; + { + uw_block_begin(name, result); + result = match_files(spec, files, bindings, data, num(data_lineno)); + uw_block_end; + return result; + } + } else if (sym == fail || sym == accept) { + obj_t *target = first(rest(first_spec)); + + if (rest(specline)) + sem_error(spec_linenum, "unexpected material after ~a", sym, nao); + + uw_block_return(target, + if2(sym == accept, + cons(bindings, + if3(data, cons(data, num(data_lineno)), t)))); + /* TODO: uw_block_return could just throw this */ + if (target) + sem_error(spec_linenum, "~a: no block named ~a in scope", + sym, target, nao); + else + sem_error(spec_linenum, "%~a: no anonymous block in scope", sym, nao); + return nil; + } else if (sym == next) { + if (rest(first_spec) && rest(specline)) + sem_error(spec_linenum, + "invalid combination of old and new next syntax", nao); + + if ((spec = rest(spec)) == nil) + break; + + if (rest(first_spec)) { + obj_t *source = rest(first_spec); + + if (eq(first(source), nothrow)) + push(nil, &source); + + { + obj_t *val = eval_form(first(source), bindings); + obj_t *name = cdr(val); + + if (!val) + sem_error(spec_linenum, "next: unbound variable in form ~a", + first(source), nao); + + if (eq(second(source), nothrow)) { + if (name) { + files = cons(cons(nothrow, name), files); + } else { + files = rest(files); + if (!files) { + debuglf(spec_linenum, "next: out of arguments", nao); + return nil; + } + files = cons(cons(nothrow, first(files)), rest(files)); + } + } else { + if (name) { + files = cons(name, files); + } else { + files = rest(files); + if (!files) + sem_error(spec_linenum, "next: out of arguments", nao); + files = cons(cons(nothrow, first(files)), rest(files)); + } + } + } + } else if (rest(specline)) { + obj_t *sub = subst_vars(rest(specline), bindings); + obj_t *str = cat_str(sub, nil); + if (str == nil) { + sem_error(spec_linenum, "bad substitution in next file spec", nao); + continue; + } + files = cons(cons(nothrow, str), files); + } else { + files = rest(files); + if (!files) + sem_error(spec_linenum, "next: out of arguments", nao); + } + + /* We recursively process the file list, but the new + data position we return to the caller must be in the + original file we we were called with. Hence, we can't + make a straight tail call here. */ + { + cons_bind (new_bindings, success, + match_files(spec, files, bindings, t, nil)); + if (success) + return cons(new_bindings, + if3(data, cons(data, num(data_lineno)), t)); + return nil; + } + } else if (sym == some || sym == all || sym == none || sym == maybe || + sym == cases) + { + obj_t *specs; + obj_t *all_match = t; + obj_t *some_match = nil; + obj_t *max_line = zero; + obj_t *max_data = nil; + + for (specs = rest(first_spec); specs != nil; specs = rest(specs)) + { + obj_t *nested_spec = first(specs); + obj_t *data_linenum = num(data_lineno); + + cons_bind (new_bindings, success, + match_files(nested_spec, files, bindings, + data, data_linenum)); + + if (success) { + bindings = new_bindings; + some_match = t; + + if (success == t) { + max_data = t; + } else if (consp(success) && max_data != t) { + cons_bind (new_data, new_line, success); + if (gt(new_line, max_line)) { + max_line = new_line; + max_data = new_data; + } + } + if (sym == cases) + break; + } else { + all_match = nil; + } + } + + if (sym == all && !all_match) { + debuglf(spec_linenum, "all: some clauses didn't match", nao); + return nil; + } + + if ((sym == some || sym == cases) && !some_match) { + debuglf(spec_linenum, "some/cases: no clauses matched", nao); + return nil; + } + + if (sym == none && some_match) { + debuglf(spec_linenum, "none: some clauses matched", nao); + return nil; + } + + /* No check for maybe, since it always succeeds. */ + + if (consp(max_data)) { + data_lineno = c_num(max_line); + data = max_data; + } else if (max_data == t) { + data = nil; + } + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } else if (sym == collect) { + obj_t *coll_spec = second(first_spec); + obj_t *until_spec = third(first_spec); + obj_t *bindings_coll = nil; + obj_t *iter; + + uw_block_begin(nil, result); + + result = t; + + while (data) { + cons_bind (new_bindings, success, + match_files(coll_spec, files, bindings, + data, num(data_lineno))); + + /* Until clause sees un-collated bindings from collect. */ + if (until_spec) + { + cons_bind (discarded_bindings, success, + match_files(until_spec, files, new_bindings, + data, num(data_lineno))); + + if (success) { + (void) discarded_bindings; + break; + } + } + + if (success) { + debuglcf(spec_linenum, "collect matched %s:%ld", + first(files), data_lineno); + + for (iter = new_bindings; iter && iter != bindings; + iter = cdr(iter)) + { + obj_t *binding = car(iter); + obj_t *existing = assoc(bindings_coll, car(binding)); + + bindings_coll = acons_new(bindings_coll, car(binding), + cons(cdr(binding), cdr(existing))); + } + } + + if (success) { + if (consp(success)) { + debuglcf(spec_linenum, + "collect advancing from line d to %ld", + data_lineno, c_num(cdr(success))); + data = car(success); + data_lineno = c_num(cdr(success)); + } else { + debuglf(spec_linenum, "collect consumed entire file", nao); + data = nil; + } + } else { + data = rest(data); + data_lineno++; + } + } + + uw_block_end; + + if (!result) { + debuglf(spec_linenum, "collect explicitly failed", nao); + return nil; + } + + if (!bindings_coll) { + debuglf(spec_linenum, "nothing was collected", nao); + return nil; + } + + for (iter = bindings_coll; iter; iter = cdr(iter)) { + obj_t *pair = car(iter); + obj_t *rev = cons(car(pair), nreverse(cdr(pair))); + bindings = cons(rev, bindings); + } + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } else if (sym == flattn) { + obj_t *iter; + + for (iter = rest(first_spec); iter; iter = rest(iter)) { + obj_t *sym = first(iter); + + if (!symbolp(sym)) { + sem_error(spec_linenum, "non-symbol in flatten directive", nao); + } else { + obj_t *existing = assoc(bindings, sym); + + if (existing) + *cdr_l(existing) = flatten(cdr(existing)); + } + } + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } else if (sym == forget || sym == local) { + bindings = alist_remove(bindings, rest(first_spec)); + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } else if (sym == mrge) { + obj_t *target = first(rest(first_spec)); + obj_t *args = rest(rest(first_spec)); + obj_t *merged = nil; + + if (!target || !symbolp(target)) + sem_error(spec_linenum, "bad merge directive", nao); + + for (; args; args = rest(args)) { + obj_t *other_sym = first(args); + + if (other_sym) { + obj_t *other_lookup = assoc(bindings, other_sym); + + if (!symbolp(other_sym)) + sem_error(spec_linenum, "non-symbol in merge directive", nao); + else if (!other_lookup) + sem_error(spec_linenum, "merge: nonexistent symbol ~a", + other_sym, nao); + + if (merged) + merged = weird_merge(merged, cdr(other_lookup)); + else + merged = cdr(other_lookup); + } + } + + bindings = acons_new(bindings, target, merged); + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } else if (sym == bind) { + obj_t *args = rest(first_spec); + obj_t *pattern = first(args); + obj_t *form = second(args); + obj_t *val = eval_form(form, bindings); + + if (!val) + sem_error(spec_linenum, "bind: unbound variable on right side", nao); + + bindings = dest_bind(bindings, pattern, cdr(val)); + + if (bindings == t) + return nil; + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } else if (sym == cat) { + obj_t *iter; + + for (iter = rest(first_spec); iter; iter = rest(iter)) { + obj_t *sym = first(iter); + + if (!symbolp(sym)) { + sem_error(spec_linenum, "non-symbol in cat directive", nao); + } else { + obj_t *existing = assoc(bindings, sym); + obj_t *sep = nil; + + if (rest(specline)) { + obj_t *sub = subst_vars(rest(specline), bindings); + sep = cat_str(sub, nil); + } + + if (existing) + *cdr_l(existing) = cat_str(flatten(cdr(existing)), sep); + } + } + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } else if (sym == output) { + obj_t *specs = second(first_spec); + obj_t *old_style_dest = third(first_spec); + obj_t *new_style_dest = fourth(first_spec); + obj_t *nt = nil; + obj_t *dest; + + if (old_style_dest) { + dest = cat_str(subst_vars(old_style_dest, bindings), nil); + } else { + if (eq(first(new_style_dest), nothrow)) + push(nil, &new_style_dest); + + { + obj_t *form = first(new_style_dest); + obj_t *val = eval_form(form, bindings); + + if (!val) + sem_error(spec_linenum, "output: unbound variable in form ~a", + form, nao); + + nt = eq(second(new_style_dest), nothrow); + dest = or2(cdr(val), string(strdup("-"))); + } + } + + fpip_t fp = (errno = 0, complex_open(dest, t)); + + debugf("opening data sink ~a", dest, nao); + + if (complex_open_failed(fp)) { + if (nt) { + debugf("could not open ~a: treating as failed match due to nothrow", + dest, nao); + return nil; + } else if (errno != 0) { + file_err(nil, "could not open ~a (error ~a/~a)", dest, + num(errno), string(strdup(strerror(errno))), nao); + } else { + file_err(nil, "could not open ~a", dest, nao); + } + } else { + do_output(bindings, specs, fp.f); + complex_close(fp); + } + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } else if (sym == define) { + obj_t *args = second(first_spec); + obj_t *body = third(first_spec); + obj_t *name = first(args); + obj_t *params = second(args); + + if (rest(specline)) + sem_error(spec_linenum, "unexpected material after define", nao); + + uw_set_func(name, cons(params, body)); + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } else if (sym == try) { + obj_t *catch_syms = second(first_spec); + obj_t *try_clause = third(first_spec); + obj_t *catch_fin = fourth(first_spec); + obj_t *finally_clause = nil; + + { + uw_block_begin(nil, result); + uw_catch_begin(catch_syms, exsym, exception); + + { + result = match_files(try_clause, files, bindings, + data, num(data_lineno)); + uw_do_unwind; + } + + uw_catch(exsym, exception) { + { + obj_t *iter; + + for (iter = catch_fin; iter; iter = cdr(iter)) { + obj_t *clause = car(iter); + obj_t *matches = second(clause); + obj_t *body = third(clause); + + if (first(clause) == catch) { + obj_t *match; + for (match = matches; match; match = cdr(match)) + if (uw_exception_subtype_p(exsym, car(match))) + break; + if (match) { + cons_bind (new_bindings, success, + match_files(body, files, bindings, + data, num(data_lineno))); + if (success) { + bindings = new_bindings; + result = t; /* catch succeeded, so try succeeds */ + if (consp(success)) { + data = car(success); + data_lineno = c_num(cdr(success)); + } else { + data = nil; + } + } + break; + } + } else if (car(clause) == finally) { + finally_clause = body; + } + } + } + uw_do_unwind; + } + + uw_unwind { + obj_t *iter; + + /* result may be t, from catch above. */ + if (consp(result)) { + /* We process it before finally, as part of the unwinding, so + finally can accumulate more bindings over top of any bindings + produced by the main clause. */ + cons_bind (new_bindings, success, result); + if (consp(success)) { + data = car(success); + data_lineno = c_num(cdr(success)); + } else { + data = nil; + } + bindings = new_bindings; + } + + if (!finally_clause) { + for (iter = catch_fin; iter; iter = cdr(iter)) { + obj_t *clause = car(iter); + if (first(clause) == finally) { + finally_clause = third(clause); + break; + } + } + } + + if (finally_clause) { + cons_bind (new_bindings, success, + match_files(finally_clause, files, bindings, + data, num(data_lineno))); + if (success) { + bindings = new_bindings; + result = t; /* finally succeeds, so try block succeeds */ + if (consp(success)) { + data = car(success); + data_lineno = c_num(cdr(success)); + } else { + data = nil; + } + } + } + } + + uw_catch_end; + uw_block_end; + + if (!result) + return nil; + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } + } else { + obj_t *func = uw_get_func(sym); + + if (func) { + obj_t *args = rest(first_spec); + obj_t *params = car(func); + obj_t *ub_p_a_pairs = nil; + obj_t *body = cdr(func); + obj_t *piter, *aiter; + obj_t *bindings_cp = copy_alist(bindings); + + if (!equal(length(args), length(params))) + sem_error(spec_linenum, "function ~a takes ~a argument(s)", + sym, length(params), nao); + + for (piter = params, aiter = args; piter; + piter = cdr(piter), aiter = cdr(aiter)) + { + obj_t *param = car(piter); + obj_t *arg = car(aiter); + + if (arg && symbolp(arg)) { + obj_t *val = eval_form(arg, bindings); + if (val) { + bindings_cp = acons_new(bindings_cp, + param, + cdr(val)); + } else { + bindings_cp = alist_remove(bindings_cp, cons(param, nil)); + ub_p_a_pairs = cons(cons(param, arg), ub_p_a_pairs); + } + } else { + obj_t *val = eval_form(arg, bindings); + if (!val) + sem_error(spec_linenum, + "unbound variable in function argument form", nao); + bindings_cp = acons_new(bindings_cp, param, cdr(val)); + } + } + + { + uw_block_begin(nil, result); + uw_env_begin; + result = match_files(body, files, bindings_cp, + data, num(data_lineno)); + uw_env_end; + uw_block_end; + + if (!result) { + debuglf(spec_linenum, "function failed", nao); + return nil; + } + + { + cons_bind (new_bindings, success, result); + + for (piter = ub_p_a_pairs; piter; piter = cdr(aiter)) + { + cons_bind (param, arg, car(piter)); + + if (symbolp(arg)) { + obj_t *newbind = assoc(new_bindings, param); + if (newbind) { + bindings = dest_bind(bindings, arg, cdr(newbind)); + if (bindings == t) { + debuglf(spec_linenum, "binding mismatch on ~a " + "when returning from ~a", arg, sym, nao); + return nil; + } + } + } + } + + if (consp(success)) { + debuglcf(spec_linenum, + "function matched; advancing from line %ld to %ld", + data_lineno, c_num(cdr(success))); + data = car(success); + data_lineno = c_num(cdr(success)); + } else { + debuglf(spec_linenum, "function consumed entire file", nao); + data = nil; + } + } + } + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } + } + } + + if (dataline == nil) + return nil; + + { + cons_bind (new_bindings, success, + match_line(bindings, specline, dataline, zero, + spec_linenum, num(data_lineno), first(files))); + + if (nump(success) && c_num(success) < c_num(length_str(dataline))) { + debuglf(spec_linenum, "spec only matches line to position ~a: ~a", + success, dataline, nao); + return nil; + } + + if (!success) + return nil; + + bindings = new_bindings; + } + } + + return cons(bindings, if3(data, cons(data, num(data_lineno)), t)); +} + +int extract(obj_t *spec, obj_t *files, obj_t *predefined_bindings) +{ + cons_bind (bindings, success, match_files(spec, files, predefined_bindings, + t, nil)); + + if (!output_produced) { + if (!opt_nobindings) { + if (bindings) { + bindings = nreverse(bindings); + dump_bindings(bindings); + } + } + + if (!success) + puts("false"); + } + + return success ? 0 : EXIT_FAILURE; +} diff --git a/match.h b/match.h new file mode 100644 index 00000000..9fc2f8ed --- /dev/null +++ b/match.h @@ -0,0 +1,27 @@ +/* Copyright 2009 + * Kaz Kylheku + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +int extract(obj_t *spec, obj_t *filenames, obj_t *bindings); diff --git a/parser.h b/parser.h new file mode 100644 index 00000000..5d3e95cf --- /dev/null +++ b/parser.h @@ -0,0 +1,36 @@ +/* Copyright 2009 + * Kaz Kylheku + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include +long lineno; +extern int errors; +extern obj_t *yyin_stream; +extern const char *spec_file; +extern obj_t *spec_file_str; +int yyparse(void); +obj_t *get_spec(void); +void yyerrorf(const char *s, ...); +void yybadtoken(int tok, const char *context); diff --git a/parser.l b/parser.l new file mode 100644 index 00000000..7a5f0c17 --- /dev/null +++ b/parser.l @@ -0,0 +1,523 @@ +/* Copyright 2009 + * Kaz Kylheku + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +%{ + +#include +#include +#include +#include +#include +#include +#include +#include "y.tab.h" +#include "lib.h" +#include "gc.h" +#include "stream.h" +#include "parser.h" + +#define YY_NO_UNPUT + +#define YY_INPUT(buf, result, max_size) \ + do { \ + obj_t *c = nil; \ + int n, ch; \ + for (n = 0; n < max_size && \ + (c = get_char(yyin_stream)) && \ + (ch = c_chr(c)) != '\n'; ++n) \ + buf[n] = (char) ch; \ + if (ch == '\n') \ + buf[n++] = (char) ch; \ + result = n; \ + } while (0) + +obj_t *yyin_stream; + +long lineno = 1; +int opt_loglevel = 1; /* 0 - quiet; 1 - normal; 2 - verbose */ +int opt_nobindings = 0; +int opt_arraydims = 1; + +int errors; + +void yyerror(const char *s) +{ + yyerrorf("%s", s); +} + +void yyerrorf(const char *s, ...) +{ + if (opt_loglevel >= 1) { + va_list vl; + va_start (vl, s); + fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, lineno); + vfprintf(stderr, s, vl); + putc('\n', stderr); + va_end (vl); + } + errors++; +} + +void yybadtoken(int tok, const char *context) +{ + const char *problem = 0; + + switch (tok) { + case TEXT: problem = "text"; break; + case IDENT: problem = "identifier"; break; + case ALL: problem = "\"all\""; break; + case SOME: problem = "\"some\""; break; + case NONE: problem = "\"none\""; break; + case MAYBE: problem = "\"maybe\""; break; + case CASES: problem = "\"cases\""; break; + case AND: problem = "\"and\""; break; + case OR: problem = "\"or\""; break; + case END: problem = "\"end\""; break; + case COLLECT: problem = "\"collect\""; break; + case UNTIL: problem = "\"until\""; break; + case COLL: problem = "\"coll\""; break; + case OUTPUT: problem = "\"output\""; break; + case REPEAT: problem = "\"repeat\""; break; + case REP: problem = "\"rep\""; break; + case SINGLE: problem = "\"single\""; break; + case FIRST: problem = "\"first\""; break; + case LAST: problem = "\"last\""; break; + case EMPTY: problem = "\"empty\""; break; + case DEFINE: problem = "\"define\""; break; + case TRY: problem = "\"try\""; break; + case CATCH: problem = "\"catch\""; break; + case FINALLY: problem = "\"finally\""; break; + case NUMBER: problem = "\"number\""; break; + case REGCHAR: problem = "regular expression character"; break; + case LITCHAR: problem = "string literal character"; break; + } + + if (problem != 0) + if (context) + yyerrorf("misplaced %s in %s", problem, context); + else + yyerrorf("unexpected %s", problem); + else + if (context) + yyerrorf("unterminated %s", context); + else + yyerrorf("unexpected end of input"); +} + +static int char_esc(int letter) +{ + switch (letter) { + case 'a': return '\a'; + case 'b': return '\b'; + case 't': return '\t'; + case 'n': return '\n'; + case 'v': return '\v'; + case 'f': return '\f'; + case 'r': return '\r'; + case 'e': return 27; + case '"': return '"'; + case '\'': return '\''; + case '`': return '`'; + } + + abort(); +} + +static int num_esc(char *num) +{ + if (num[0] == 'x') { + if (strlen(num) > 3) + yyerror("too many digits in hex character escape"); + return strtol(num + 1, 0, 16); + } else { + if (strlen(num) > 3) + yyerror("too many digits in octal character escape"); + return strtol(num, 0, 8); + } +} + +%} + +%option stack + +TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+ +ID_END [^a-zA-Z0-9_] +NUM_END [^0-9] +WS [\t ]* +HEX [0-9A-Fa-f] +OCT [0-7] + +%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT QSILIT + +%% + +{TOK} { + long val; + char *errp; + + errno = 0; + + val = strtol(yytext, &errp, 10); + + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); + + if (*errp != 0) { + /* not a number */ + yylval.lexeme = strdup(yytext); + return IDENT; + } + + if ((val == LONG_MAX || val == LONG_MIN) + && errno == ERANGE) + yyerror("numeric overflow in token"); + + yylval.num = val; + return NUMBER; + } + +\({WS}all{WS}\) { + yy_pop_state(); + return ALL; + } + +\({WS}some{WS}\) { + yy_pop_state(); + return SOME; + } + +\({WS}none{WS}\) { + yy_pop_state(); + return NONE; + } + +\({WS}maybe{WS}\) { + yy_pop_state(); + return MAYBE; + } + +\({WS}cases{WS}\) { + yy_pop_state(); + return CASES; + } + +\({WS}and{WS}\) { + yy_pop_state(); + return AND; + } + +\({WS}or{WS}\) { + yy_pop_state(); + return OR; + } + +\({WS}end{WS}\) { + yy_pop_state(); + return END; + } + +\({WS}collect{WS}\) { + yy_pop_state(); + return COLLECT; + } + +\({WS}coll{WS}\) { + yy_pop_state(); + return COLL; + } + +\({WS}until{WS}\) { + yy_pop_state(); + return UNTIL; + } + +\({WS}output/{ID_END} { + yy_push_state(NESTED); + return OUTPUT; + } + +\({WS}repeat{WS}\) { + yy_pop_state(); + return REPEAT; + } + + +\({WS}rep{WS}\) { + yy_pop_state(); + return REP; + } + +\({WS}single{WS}\) { + yy_pop_state(); + return SINGLE; + } + +\({WS}first{WS}\) { + yy_pop_state(); + return FIRST; + } + +\({WS}last{WS}\) { + yy_pop_state(); + return LAST; + } + +\({WS}empty{WS}\) { + yy_pop_state(); + return EMPTY; + } + +\({WS}define/{ID_END} { + yy_push_state(NESTED); + return DEFINE; + } + +\({WS}try{WS}\) { + yy_pop_state(); + return TRY; + } + +\({WS}catch/{ID_END} { + yy_push_state(NESTED); + return CATCH; + } + +\({WS}finally{WS}\) { + yy_pop_state(); + return FINALLY; + } + +\{|\( { + yy_push_state(NESTED); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); + return yytext[0]; + } + +\}|\) { + yy_pop_state(); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); + return yytext[0]; + } + +[\t ]+ { /* Eat whitespace in directive */ } + +\" { + yy_push_state(STRLIT); + return '"'; + } + +\' { + yy_push_state(CHRLIT); + return '\''; + } + +` { + yy_push_state(QSILIT); + return '`'; + } + +@ { + yy_pop_state(); + yylval.lexeme = strdup("@"); + return TEXT; + } + +\n { + lineno++; + } + +[/] { + yy_push_state(REGEX); + return '/'; + } + +\. { + yylval.chr = '.'; + return '.'; + } + +[\\][abtnvfre] { + char lexeme[2]; + lexeme[0] = char_esc(yytext[1]); + lexeme[1] = 0; + yylval.lexeme = strdup(lexeme); + yy_pop_state(); + return TEXT; + } + +[\\](x{HEX}+|{OCT}+) { + char lexeme[2]; + lexeme[0] = num_esc(yytext + 1); + lexeme[1] = 0; + yylval.lexeme = strdup(lexeme); + yy_pop_state(); + return TEXT; + } + +. { + yyerrorf("bad character in directive: '%c'", + yytext[0]); + } + +[/] { + yy_pop_state(); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); + yylval.chr = '/'; + return '/'; + } + + +[\\][abtnvfre] { + yylval.chr = char_esc(yytext[1]); + return REGCHAR; + } + +[\\](x{HEX}+|{OCT}+) { + yylval.chr = num_esc(yytext + 1); + return REGCHAR; + } + +\n { + lineno++; + yyerror("newline in regex"); + } + +[.*?+^] { + yylval.chr = yytext[0]; + return yytext[0]; + } + + +[\[\]\-] { + yylval.chr = yytext[0]; + return yytext[0]; + } + +[()|] { + yylval.chr = yytext[0]; + return yytext[0]; + } + +[\\]. { + yylval.chr = yytext[1]; + return REGCHAR; + } + +. { + yylval.chr = yytext[0]; + return REGCHAR; + } + +[^@\n]+ { + yylval.lexeme = strdup(yytext); + return TEXT; + } + +\n { + lineno++; + return '\n'; + } + +@{WS}\* { + yy_push_state(SPECIAL); + return '*'; + } + +@ { + yy_push_state(SPECIAL); + } + +^@#.*\n { + /* eat whole line comment */ + lineno++; + } + +@#.* { + /* comment to end of line */ + } + +\" { + yy_pop_state(); + return yytext[0]; + } + +\' { + yy_pop_state(); + return yytext[0]; + } + +` { + yy_pop_state(); + return yytext[0]; + } + +[\\][abtnvfre"`'] { + yylval.chr = char_esc(yytext[1]); + return LITCHAR; + } + +[\\](x{HEX}+|{OCT}+) { + yylval.chr = num_esc(yytext + 1); + return LITCHAR; + } +\n { + yyerror("newline in string literal"); + lineno++; + yylval.chr = yytext[0]; + return LITCHAR; + } + +\n { + yyerror("newline in character literal"); + lineno++; + yylval.chr = yytext[0]; + return LITCHAR; + } + +\n { + yyerror("newline in string quasiliteral"); + lineno++; + yylval.chr = yytext[0]; + return LITCHAR; + } + +@ { + yy_push_state(SPECIAL); + } + +. { + yylval.chr = yytext[0]; + return LITCHAR; + } + +%% diff --git a/parser.y b/parser.y new file mode 100644 index 00000000..9b440919 --- /dev/null +++ b/parser.y @@ -0,0 +1,593 @@ +/* Copyright 2009 + * Kaz Kylheku + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +%{ + +#include +#include +#include +#include +#include "lib.h" +#include "regex.h" +#include "parser.h" + +int yylex(void); +void yyerror(const char *); + +obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts); +obj_t *define_transform(obj_t *define_form); +obj_t *lit_char_helper(obj_t *litchars); + +static obj_t *parsed_spec; + +%} + +%union { + char *lexeme; + union obj *obj; + char chr; + long num; +} + +%token TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT +%token UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE +%token TRY CATCH FINALLY +%token NUMBER +%token REGCHAR LITCHAR + +%type spec clauses clause all_clause some_clause none_clause maybe_clause +%type cases_clause collect_clause clause_parts additional_parts +%type output_clause define_clause try_clause catch_clauses_opt +%type line elems_opt elems elem var var_op +%type list exprs expr out_clauses out_clauses_opt out_clause +%type repeat_clause repeat_parts_opt o_line +%type o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt +%type regex regexpr regbranch +%type regterm regclass regclassterm regrange +%type strlit chrlit quasilit quasi_items quasi_item litchars +%type regchar +%nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL +%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE +%nonassoc '{' '}' '[' ']' '(' ')' +%right IDENT TEXT NUMBER +%left '|' '/' +%right '*' '?' '+' +%right '^' '.' '\\' REGCHAR LITCHAR + +%% + +spec : clauses { parsed_spec = $1; } + | { parsed_spec = nil; } + | error { parsed_spec = nil; + yybadtoken(yychar, 0); } + ; + +clauses : clause { $$ = cons($1, nil); } + | clause clauses { $$ = cons($1, $2); } + ; + +clause : all_clause { $$ = list(num(lineno - 1), $1, nao); } + | some_clause { $$ = list(num(lineno - 1), $1, nao); } + | none_clause { $$ = list(num(lineno - 1), $1, nao); } + | maybe_clause { $$ = list(num(lineno - 1), $1, nao); } + | cases_clause { $$ = list(num(lineno - 1), $1, nao); } + | collect_clause { $$ = list(num(lineno - 1), $1, nao); } + | define_clause { $$ = list(num(lineno - 1), + define_transform($1), nao); } + | try_clause { $$ = list(num(lineno - 1), $1, nao); } + | output_clause { $$ = list(num(lineno - 1), $1, nao); } + | line { $$ = $1; } + | repeat_clause { $$ = nil; + yyerror("repeat outside of output"); } + ; + +all_clause : ALL newl clause_parts { $$ = cons(all, $3); } + | ALL newl error { $$ = nil; + yybadtoken(yychar, + "all clause"); } + | ALL newl END newl { $$ = nil; + yyerror("empty all clause"); } + + ; + +some_clause : SOME newl clause_parts { $$ = cons(some, $3); } + | SOME newl error { $$ = nil; + yybadtoken(yychar, + "some clause"); } + | SOME newl END newl { $$ = nil; + yyerror("empty some clause"); } + ; + +none_clause : NONE newl clause_parts { $$ = cons(none, $3); } + | NONE newl error { $$ = nil; + yybadtoken(yychar, + "none clause"); } + | NONE newl END newl { $$ = nil; + yyerror("empty none clause"); } + ; + +maybe_clause : MAYBE newl clause_parts { $$ = cons(maybe, $3); } + | MAYBE newl error { $$ = nil; + yybadtoken(yychar, + "maybe clause"); } + | MAYBE newl END newl { $$ = nil; + yyerror("empty maybe clause"); } + ; + +cases_clause : CASES newl clause_parts { $$ = cons(cases, $3); } + | CASES newl error { $$ = nil; + yybadtoken(yychar, + "cases clause"); } + | CASES newl END newl { $$ = nil; + yyerror("empty cases clause"); } + ; + +collect_clause : COLLECT newl clauses END newl { $$ = list(collect, $3, nao); } + | COLLECT newl clauses + UNTIL newl clauses END newl { $$ = list(collect, $3, + $6, nao); } + | COLLECT newl error { $$ = nil; + if (yychar == UNTIL || yychar == END) + yyerror("empty collect"); + else + yybadtoken(yychar, + "collect clause"); } + ; + +clause_parts : clauses additional_parts { $$ = cons($1, $2); } + ; + +additional_parts : END newl { $$ = nil; } + | AND newl clauses additional_parts { $$ = cons($3, $4); } + | OR newl clauses additional_parts { $$ = cons($3, $4); } + ; + +line : elems_opt '\n' { $$ = $1; } + ; + +elems_opt : elems { $$ = cons(num(lineno - 1), $1); } + | { $$ = nil; } + ; + +elems : elem { $$ = cons($1, nil); } + | elem elems { $$ = cons($1, $2); } + | rep_elem { $$ = nil; + yyerror("rep outside of output"); } + ; + +elem : TEXT { $$ = string($1); } + | var { $$ = $1; } + | list { $$ = $1; } + | regex { $$ = cons(regex_compile($1), $1); } + | COLL elems END { $$ = list(coll, $2, nao); } + | COLL elems + UNTIL elems END { $$ = list(coll, $2, $4, nao); } + | COLL error { $$ = nil; + yybadtoken(yychar, "coll clause"); } + ; + +define_clause : DEFINE exprs ')' newl + clauses + END newl { $$ = list(define, $2, $5, nao); } + | DEFINE ')' newl + clauses + END newl { $$ = list(define, nil, $4, nao); } + | DEFINE exprs ')' newl + END newl { $$ = list(define, $2, nao); } + | DEFINE ')' newl + END newl { $$ = list(define, nao); } + | DEFINE error { yybadtoken(yychar, "list expression"); } + | DEFINE exprs ')' newl + error { yybadtoken(yychar, "define"); } + | DEFINE ')' newl + error { yybadtoken(yychar, "define"); } + ; + +try_clause : TRY newl + clauses + catch_clauses_opt + END newl { $$ = list(try, + flatten(mapcar(func_n1(second), + $4)), + $3, $4, nao); } + | TRY newl + error { $$ = nil; + yybadtoken(yychar, "try clause"); } + ; + +catch_clauses_opt : CATCH ')' newl + clauses + catch_clauses_opt { $$ = cons(list(catch, nil, $4, nao), + $5); } + | CATCH exprs ')' newl + clauses + catch_clauses_opt { $$ = cons(list(catch, $2, $5, nao), + $6); } + | FINALLY newl + clauses { $$ = cons(list(finally, nil, + $3, nao), + nil); } + | { $$ = nil; } + ; + + +output_clause : OUTPUT ')' o_elems '\n' + out_clauses + END newl { $$ = list(output, $5, $3, nao); } + | OUTPUT ')' newl + out_clauses + END newl { $$ = list(output, $4, nao); } + | OUTPUT exprs ')' newl + out_clauses + END newl { $$ = list(output, $5, nil, $2, nao); } + | OUTPUT exprs ')' o_elems '\n' + out_clauses + END newl { yyerror("invalid combination of old and " + "new syntax in output directive"); } + | OUTPUT error { yybadtoken(yychar, "list expression"); } + | OUTPUT ')' o_elems '\n' + error { $$ = nil; + yybadtoken(yychar, "output clause"); } + | OUTPUT ')' newl + error { $$ = nil; + yybadtoken(yychar, "output clause"); } + | OUTPUT exprs ')' o_elems '\n' + error { $$ = nil; + yybadtoken(yychar, "output clause"); } + | OUTPUT exprs ')' newl + error { $$ = nil; + yybadtoken(yychar, "output clause"); } + ; + +out_clauses : out_clause { $$ = cons($1, nil); } + | out_clause out_clauses { $$ = cons($1, $2); } + ; + +out_clause : repeat_clause { $$ = list(num(lineno - 1), $1, nao); } + | o_line { $$ = $1; } + | all_clause { $$ = nil; + yyerror("match clause in output"); } + | some_clause { $$ = nil; + yyerror("match clause in output"); } + | none_clause { $$ = nil; + yyerror("match clause in output"); } + | maybe_clause { $$ = nil; + yyerror("match clause in output"); } + | cases_clause { $$ = nil; + yyerror("match clause in output"); } + | collect_clause { $$ = nil; + yyerror("match clause in output"); } + | define_clause { $$ = nil; + yyerror("match clause in output"); } + + | try_clause { $$ = nil; + yyerror("match clause in output"); } + | output_clause { $$ = nil; + yyerror("match clause in output"); } + ; + +repeat_clause : REPEAT newl + out_clauses + repeat_parts_opt + END newl { $$ = repeat_rep_helper(repeat, $3, $4); } + | REPEAT newl + error { $$ = nil; + yybadtoken(yychar, "repeat clause"); } + ; + +repeat_parts_opt : SINGLE newl + out_clauses_opt + repeat_parts_opt { $$ = cons(cons(single, $3), $4); } + | FIRST newl + out_clauses_opt + repeat_parts_opt { $$ = cons(cons(frst, $3), $4); } + | LAST newl + out_clauses_opt + repeat_parts_opt { $$ = cons(cons(lst, $3), $4); } + | EMPTY newl + out_clauses_opt + repeat_parts_opt { $$ = cons(cons(empty, $3), $4); } + | /* empty */ { $$ = nil; } + ; + + +out_clauses_opt : out_clauses { $$ = $1; } + | /* empty */ { $$ = null_list; } + +o_line : o_elems_opt '\n' { $$ = $1; } + ; + +o_elems_opt : o_elems { $$ = cons(num(lineno - 1), $1); } + | { $$ = nil; } + ; + +o_elems_opt2 : o_elems { $$ = $1; } + | { $$ = null_list; } + ; + +o_elems : o_elem { $$ = cons($1, nil); } + | o_elem o_elems { $$ = cons($1, $2); } + ; + +o_elem : TEXT { $$ = string($1); } + | var { $$ = $1; } + | rep_elem { $$ = $1; } + ; + +rep_elem : REP o_elems + rep_parts_opt END { $$ = repeat_rep_helper(rep, $2, $3); } + | REP error { $$ = nil; yybadtoken(yychar, "rep clause"); } + ; + +rep_parts_opt : SINGLE o_elems_opt2 + rep_parts_opt { $$ = cons(cons(single, $2), $3); } + | FIRST o_elems_opt2 + rep_parts_opt { $$ = cons(cons(frst, $2), $3); } + | LAST o_elems_opt2 + rep_parts_opt { $$ = cons(cons(lst, $2), $3); } + | EMPTY o_elems_opt2 + rep_parts_opt { $$ = cons(cons(empty, $2), $3); } + | /* empty */ { $$ = nil; } + ; + + +/* This sucks, but factoring '*' into a nonterminal + * that generates an empty phrase causes reduce/reduce conflicts. + */ +var : IDENT { $$ = list(var, intern(string($1)), nao); } + | IDENT elem { $$ = list(var, intern(string($1)), $2, nao); } + | '{' IDENT '}' { $$ = list(var, intern(string($2)), nao); } + | '{' IDENT '}' elem { $$ = list(var, intern(string($2)), $4, nao); } + | '{' IDENT regex '}' { $$ = list(var, intern(string($2)), + nil, cons(regex_compile($3), $3), + nao); } + | '{' IDENT NUMBER '}' { $$ = list(var, intern(string($2)), + nil, num($3), nao); } + | var_op IDENT { $$ = list(var, intern(string($2)), + nil, $1, nao); } + | var_op IDENT elem { $$ = list(var, intern(string($2)), + $3, $1, nao); } + | var_op '{' IDENT '}' { $$ = list(var, intern(string($3)), + nil, $1, nao); } + | var_op '{' IDENT '}' elem { $$ = list(var, intern(string($3)), + $5, $1, nao); } + | IDENT error { $$ = nil; + yybadtoken(yychar, "variable spec"); } + | var_op error { $$ = nil; + yybadtoken(yychar, "variable spec"); } + ; + +var_op : '*' { $$ = t; } + ; + +list : '(' exprs ')' { $$ = $2; } + | '(' ')' { $$ = nil; } + | '(' error { $$ = nil; + yybadtoken(yychar, "list expression"); } + ; + +exprs : expr { $$ = cons($1, nil); } + | expr exprs { $$ = cons($1, $2); } + | expr '.' expr { $$ = cons($1, $3); } + ; + +expr : IDENT { $$ = intern(string($1)); } + | NUMBER { $$ = num($1); } + | list { $$ = $1; } + | regex { $$ = cons(regex_compile($1), $1); } + | chrlit { $$ = $1; } + | strlit { $$ = $1; } + | quasilit { $$ = $1; } + ; + +regex : '/' regexpr '/' { $$ = $2; } + | '/' '/' { $$ = nil; } + | '/' error { $$ = nil; + yybadtoken(yychar, "regex"); } + ; + +regexpr : regbranch { $$ = $1; } + | regbranch '|' regbranch { $$ = list(list(or, $1, + $3, nao), nao); } + ; + +regbranch : regterm { $$ = cons($1, nil); } + | regterm regbranch { $$ = cons($1, $2); } + ; + +regterm : '[' regclass ']' { $$ = cons(set, $2); } + | '[' '^' regclass ']' { $$ = cons(cset, $3); } + | '.' { $$ = wild; } + | '^' { $$ = chr('^'); } + | ']' { $$ = chr(']'); } + | '-' { $$ = chr('-'); } + | regterm '*' { $$ = list(zeroplus, $1, nao); } + | regterm '+' { $$ = list(oneplus, $1, nao); } + | regterm '?' { $$ = list(optional, $1, nao); } + | REGCHAR { $$ = chr($1); } + | '(' regexpr ')' { $$ = cons(compound, $2); } + | '(' error { $$ = nil; + yybadtoken(yychar, "regex subexpression"); } + | '[' error { $$ = nil; + yybadtoken(yychar, "regex character class"); } + ; + +regclass : regclassterm { $$ = cons($1, nil); } + | regclassterm regclass { $$ = cons($1, $2); } + ; + +regclassterm : regrange { $$ = $1; } + | regchar { $$ = chr($1); } + ; + +regrange : regchar '-' regchar { $$ = cons(chr($1), chr($3)); } + +regchar : '?' { $$ = '?'; } + | '.' { $$ = '.'; } + | '*' { $$ = '*'; } + | '+' { $$ = '+'; } + | '(' { $$ = '('; } + | ')' { $$ = ')'; } + | '^' { $$ = '^'; } + | '|' { $$ = '|'; } + | REGCHAR { $$ = $1; } + ; + +newl : '\n' + | error '\n' { yyerror("newline expected after directive"); + yyerrok; } + ; + +strlit : '"' '"' { $$ = null_string; } + | '"' litchars '"' { $$ = lit_char_helper($2); } + | '"' error { yybadtoken(yychar, "string literal"); } + ; + +chrlit : '\'' '\'' { yyerror("empty character literal"); } + { $$ = nil; } + | '\'' litchars '\'' { $$ = car($2); + if (cdr($2)) + yyerror("multiple characters in " + "character literal"); } + | '\'' error { $$ = nil; + yybadtoken(yychar, "character literal"); } + ; + +quasilit : '`' '`' { $$ = null_string; } + | '`' quasi_items '`' { $$ = cons(quasi, $2); } + | '`' error { $$ = nil; + yybadtoken(yychar, "string literal"); } + ; + +quasi_items : quasi_item { $$ = cons($1, nil); } + | quasi_item quasi_items { $$ = cons($1, $2); } + ; + +quasi_item : litchars { $$ = lit_char_helper($1); } + | TEXT { $$ = string($1); } + | var { $$ = $1; } + | list { $$ = $1; } + ; + +litchars : LITCHAR { $$ = cons(chr($1), nil); } + | LITCHAR litchars { $$ = cons(chr($1), $2); } + ; + + +%% + +obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts) +{ + obj_t *single_parts = nil; + obj_t *first_parts = nil; + obj_t *last_parts = nil; + obj_t *empty_parts = nil; + obj_t *iter; + + for (iter = parts; iter != nil; iter = cdr(iter)) { + obj_t *part = car(iter); + obj_t *sym = car(part); + obj_t *clauses = cdr(part); + + if (sym == single) + single_parts = nappend2(single_parts, clauses); + else if (sym == frst) + first_parts = nappend2(first_parts, clauses); + else if (sym == lst) + last_parts = nappend2(last_parts, clauses); + else if (sym == empty) + empty_parts = nappend2(empty_parts, clauses); + else + abort(); + } + + return list(sym, main, single_parts, first_parts, + last_parts, empty_parts, nao); +} + +obj_t *define_transform(obj_t *define_form) +{ + obj_t *sym = first(define_form); + obj_t *args = second(define_form); + + if (define_form == nil) + return nil; + + assert (sym == define); + + if (args == nil) { + yyerror("define requires arguments"); + return define_form; + } + + if (!consp(args) || !listp(cdr(args))) { + yyerror("bad define argument syntax"); + return define_form; + } else { + obj_t *name = first(args); + obj_t *params = second(args); + + if (!symbolp(name)) { + yyerror("function name must be a symbol"); + return define_form; + } + + if (!proper_listp(params)) { + yyerror("invalid function parameter list"); + return define_form; + } + + if (!all_satisfy(params, func_n1(symbolp), nil)) + yyerror("function parameters must be symbols"); + } + + return define_form; +} + +obj_t *lit_char_helper(obj_t *litchars) +{ + obj_t *ret = nil; + + if (litchars) { + obj_t *len = length(litchars), *iter, *ix; + ret = mkustring(len); + for (iter = litchars, ix = zero; + iter; + iter = cdr(iter), ix = plus(ix, one)) + { + chr_str_set(ret, ix, car(iter)); + } + } else { + ret = nil; + } + return ret; +} + +obj_t *get_spec(void) +{ + return parsed_spec; +} + diff --git a/regex.c b/regex.c index a48b3ff5..51c95bc4 100644 --- a/regex.c +++ b/regex.c @@ -28,7 +28,10 @@ #include #include #include +#include +#include #include "lib.h" +#include "unwind.h" #include "regex.h" #define NFA_SET_SIZE 512 @@ -340,7 +343,7 @@ int nfa_all_states(nfa_state_t **inout, int num, int visited) nfa_state_t *s = inout[i]; if (num >= NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); switch (s->a.kind) { case nfa_accept: @@ -372,7 +375,7 @@ int nfa_all_states(nfa_state_t **inout, int num, int visited) } if (num > NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); return num; } @@ -417,7 +420,7 @@ int nfa_closure(nfa_state_t **stack, nfa_state_t **in, int nin, push them on the stack, and mark them as visited. */ for (i = 0; i < nin; i++) { if (stackp >= NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); in[i]->a.visited = visited; stack[stackp++] = in[i]; out[nout++] = in[i]; @@ -429,7 +432,7 @@ int nfa_closure(nfa_state_t **stack, nfa_state_t **in, int nin, nfa_state_t *top = stack[--stackp]; if (nout >= NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); /* Only states of type nfa_empty are interesting. Each such state at most two epsilon transitions. */ @@ -457,7 +460,7 @@ int nfa_closure(nfa_state_t **stack, nfa_state_t **in, int nin, } if (nout > NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); return nout; } @@ -497,7 +500,7 @@ int nfa_move(nfa_state_t **in, int nin, nfa_state_t **out, int ch) among a common set of leading struct members in the union. */ if (nmove >= NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); out[nmove++] = s->o.trans; } @@ -580,7 +583,7 @@ static void regex_destroy(obj_t *regex) } static struct cobj_ops regex_obj_ops = { - regex_equal, cobj_print_op, regex_destroy + regex_equal, cobj_print_op, regex_destroy, 0, }; obj_t *regex_compile(obj_t *regex_sexp) diff --git a/stream.c b/stream.c new file mode 100644 index 00000000..f91ae753 --- /dev/null +++ b/stream.c @@ -0,0 +1,641 @@ +/* Copyright 2009 + * Kaz Kylheku + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "lib.h" +#include "gc.h" +#include "unwind.h" +#include "stream.h" + +obj_t *std_input, *std_output, *std_error; + +struct strm_ops { + struct cobj_ops cobj_ops; + obj_t *(*put_string)(obj_t *, const char *); + obj_t *(*put_char)(obj_t *, int); + obj_t *(*get_line)(obj_t *); + obj_t *(*get_char)(obj_t *); + obj_t *(*vcformat)(obj_t *, const char *fmt, va_list vl); + obj_t *(*vformat)(obj_t *, const char *fmt, va_list vl); + obj_t *(*close)(obj_t *); +}; + +static obj_t *common_equal(obj_t *self, obj_t *other) +{ + return self == other ? t : nil; +} + +static void common_destroy(obj_t *obj) +{ + (void) close_stream(obj); +} + +obj_t *common_vformat(obj_t *stream, const char *fmt, va_list vl) +{ + int ch; + + for (; (ch = *fmt) != 0; fmt++) { + obj_t *obj; + + if (ch == '~') { + ch = *++fmt; + if (ch == 0) + abort(); + switch (ch) { + case '~': + put_cchar(stream, ch); + continue; + case 'a': + obj = va_arg(vl, obj_t *); + if (obj == nao) + abort(); + obj_pprint(obj, stream); + continue; + case 's': + obj = va_arg(vl, obj_t *); + if (obj == nao) + abort(); + obj_print(obj, stream); + continue; + default: + abort(); + } + continue; + } + + put_cchar(stream, ch); + } + + if (va_arg(vl, obj_t *) != nao) + internal_error("unterminated format argument list"); + return t; +} + +static obj_t *stdio_put_string(obj_t *stream, const char *s) +{ + FILE *f = (FILE *) stream->co.handle; + return (f && fputs(s, f) != EOF) ? t : nil; +} + +static obj_t *stdio_put_char(obj_t *stream, int ch) +{ + FILE *f = (FILE *) stream->co.handle; + return (f && putc(ch, f) != EOF) ? t : nil; +} + +static char *snarf_line(FILE *in) +{ + const size_t min_size = 512; + size_t size = 0; + size_t fill = 0; + char *buf = 0; + + for (;;) { + int ch = getc(in); + + if (ch == EOF && buf == 0) + break; + + if (fill >= size) { + size_t newsize = size ? size * 2 : min_size; + buf = chk_realloc(buf, newsize); + size = newsize; + } + + if (ch == '\n' || ch == EOF) { + buf[fill++] = 0; + break; + } + buf[fill++] = ch; + } + + if (buf) + buf = chk_realloc(buf, fill); + + return buf; +} + +static obj_t *stdio_get_line(obj_t *stream) +{ + if (stream->co.handle == 0) { + return nil; + } else { + char *line = snarf_line((FILE *) stream->co.handle); + if (!line) + return nil; + return string(line); + } +} + +obj_t *stdio_get_char(obj_t *stream) +{ + FILE *f = (FILE *) stream->co.handle; + if (f) { + int ch = getc(f); + return (ch != EOF) ? chr(ch) : nil; + } + return nil; +} + +obj_t *stdio_vcformat(obj_t *stream, const char *fmt, va_list vl) +{ + FILE *f = (FILE *) stream->co.handle; + if (f) { + int n = vfprintf(f, fmt, vl); + return (n >= 0) ? num(n) : nil; + } + return nil; +} + +static obj_t *stdio_close(obj_t *stream) +{ + + FILE *f = (FILE *) stream->co.handle; + + if (f != 0 && f != stdin && f != stdout) { + int result = fclose(f); + stream->co.handle = 0; + return result != EOF ? t : nil; + } + return nil; +} + +static struct strm_ops stdio_ops = { + { common_equal, + cobj_print_op, + common_destroy, + 0 }, + stdio_put_string, + stdio_put_char, + stdio_get_line, + stdio_get_char, + stdio_vcformat, + common_vformat, + stdio_close +}; + +static obj_t *pipe_close(obj_t *stream) +{ + FILE *f = (FILE *) stream->co.handle; + + if (f != 0) { + int result = pclose(f); + stream->co.handle = 0; + return result >= 0 ? t : nil; + } + return nil; +} + +static struct strm_ops pipe_ops = { + { common_equal, + cobj_print_op, + common_destroy, + 0 }, + stdio_put_string, + stdio_put_char, + stdio_get_line, + stdio_get_char, + stdio_vcformat, + common_vformat, + pipe_close +}; + +void string_in_stream_mark(obj_t *stream) +{ + obj_t *stuff = (obj_t *) stream->co.handle; + gc_mark(stuff); +} + +static obj_t *string_in_get_line(obj_t *stream) +{ + obj_t *pair = (obj_t *) stream->co.handle; + obj_t *string = car(pair); + obj_t *pos = cdr(pair); + + /* TODO: broken, should only scan to newline */ + if (lt(pos, length(string))) { + obj_t *result = sub_str(string, pos, nil); + *cdr_l(pair) = length_str(string); + return result; + } + + return nil; +} + +static obj_t *string_in_get_char(obj_t *stream) +{ + obj_t *pair = (obj_t *) stream->co.handle; + obj_t *string = car(pair); + obj_t *pos = cdr(pair); + + if (lt(pos, length_str(string))) { + *cdr_l(pair) = plus(pos, one); + return chr_str(string, pos); + } + + return nil; +} + +static struct strm_ops string_in_ops = { + { common_equal, + cobj_print_op, + 0, + string_in_stream_mark }, + 0, + 0, + string_in_get_line, + string_in_get_char, + 0, + 0, + 0 +}; + +struct string_output { + char *buf; + size_t size; + size_t fill; +}; + +static void string_out_stream_destroy(obj_t *stream) +{ + struct string_output *so = (struct string_output *) stream->co.handle; + + if (so) { + free(so->buf); + so->buf = 0; + free(so); + stream->co.handle = 0; + } +} + +static obj_t *string_out_put_string(obj_t *stream, const char *s) +{ + struct string_output *so = (struct string_output *) stream->co.handle; + + if (so == 0) { + return nil; + } else { + size_t len = strlen(s); + size_t old_size = so->size; + size_t required_size = len + so->fill + 1; + + if (required_size < len) + return nil; + + while (so->size <= required_size) { + so->size *= 2; + if (so->size < old_size) + return nil; + } + + so->buf = chk_realloc(so->buf, so->size); + memcpy(so->buf + so->fill, s, len + 1); + so->fill += len; + return t; + } +} + +static obj_t *string_out_put_char(obj_t *stream, int ch) +{ + char mini[2]; + mini[0] = ch; + mini[1] = 0; + return string_out_put_string(stream, mini); +} + +obj_t *string_out_vcformat(obj_t *stream, const char *fmt, va_list vl) +{ + struct string_output *so = (struct string_output *) stream->co.handle; + + if (so == 0) { + return nil; + } else { + int nchars, nchars2; + char dummy_buf[1]; + size_t old_size = so->size; + size_t required_size; + va_list vl_copy; + +#if defined va_copy + va_copy (vl_copy, vl); +#elif defined __va_copy + __va_copy (vl_copy, vl); +#else + vl_copy = vl; +#endif + + nchars = vsnprintf(dummy_buf, 0, fmt, vl_copy); + +#if defined va_copy || defined __va_copy + va_end (vl_copy); +#endif + + bug_unless (nchars >= 0); + + required_size = so->fill + nchars + 1; + + if (required_size < so->fill) + return nil; + + while (so->size <= required_size) { + so->size *= 2; + if (so->size < old_size) + return nil; + } + + so->buf = chk_realloc(so->buf, so->size); + nchars2 = vsnprintf(so->buf + so->fill, so->size-so->fill, fmt, vl); + bug_unless (nchars == nchars2); + so->fill += nchars; + return t; + } +} + +static struct strm_ops string_out_ops = { + { common_equal, + cobj_print_op, + string_out_stream_destroy, + 0 }, + string_out_put_string, + string_out_put_char, + 0, + 0, + string_out_vcformat, + common_vformat, + 0, +}; + +static obj_t *dir_get_line(obj_t *stream) +{ + DIR *handle = (DIR *) stream->co.handle; + + if (handle == 0) { + return nil; + } else { + for (;;) { + struct dirent *e = readdir(handle); + if (!e) + return nil; + if (!strcmp(e->d_name, ".") || !strcmp(e->d_name, "..")) + continue; + return string(chk_strdup(e->d_name)); + } + } +} + +static obj_t *dir_close(obj_t *stream) +{ + if (stream->co.handle != 0) { + closedir((DIR *) stream->co.handle); + stream->co.handle = 0; + return t; + } + + return nil; +} + +static struct strm_ops dir_ops = { + { common_equal, + cobj_print_op, + common_destroy, + 0 }, + 0, + 0, + dir_get_line, + 0, + 0, + 0, + dir_close +}; + + +obj_t *make_stdio_stream(FILE *handle, obj_t *input, obj_t *output) +{ + return cobj((void *) handle, stream_t, &stdio_ops.cobj_ops); +} + +obj_t *make_pipe_stream(FILE *handle, obj_t *input, obj_t *output) +{ + return cobj((void *) handle, stream_t, &pipe_ops.cobj_ops); +} + +obj_t *make_string_input_stream(obj_t *string) +{ + return cobj((void *) cons(string, zero), stream_t, &string_in_ops.cobj_ops); +} + +obj_t *make_string_output_stream(void) +{ + struct string_output *so = chk_malloc(sizeof *so); + so->size = 128; + so->buf = chk_malloc(so->size); + so->fill = 0; + so->buf[0] = 0; + return cobj((void *) so, stream_t, &string_out_ops.cobj_ops); +} + +obj_t *get_string_from_stream(obj_t *stream) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + if (stream->co.ops == &string_out_ops.cobj_ops) { + struct string_output *so = (struct string_output *) stream->co.handle; + obj_t *out = nil; + + stream->co.handle = 0; + + if (!so) + return out; + + so->buf = chk_realloc(so->buf, so->fill + 1); + out = string(so->buf); + free(so); + return out; + } else if (stream->co.ops == &string_in_ops.cobj_ops) { + obj_t *pair = (obj_t *) stream->co.handle; + return pair ? car(pair) : nil; + } else { + abort(); /* not a string input or output stream */ + } +} + +obj_t *make_dir_stream(DIR *dir) +{ + return cobj((void *) dir, stream_t, &dir_ops.cobj_ops); +} + +obj_t *close_stream(obj_t *stream) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->close ? ops->close(stream) : nil; + } +} + +obj_t *get_line(obj_t *stream) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->get_line ? ops->get_line(stream) : nil; + } +} + +obj_t *get_char(obj_t *stream) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->get_char ? ops->get_char(stream) : nil; + } +} + +obj_t *vformat(obj_t *stream, const char *str, va_list vl) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->vformat ? ops->vformat(stream, str, vl) : nil; + } +} + +obj_t *vcformat(obj_t *stream, const char *string, va_list vl) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->vcformat ? ops->vcformat(stream, string, vl) : nil; + } +} + +obj_t *format(obj_t *stream, const char *str, ...) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + va_list vl; + obj_t *ret; + + va_start (vl, str); + ret = ops->vformat ? ops->vformat(stream, str, vl) : nil; + va_end (vl); + return ret; + } +} + +obj_t *cformat(obj_t *stream, const char *string, ...) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + va_list vl; + obj_t *ret; + + va_start (vl, string); + ret = ops->vformat ? ops->vcformat(stream, string, vl) : nil; + va_end (vl); + return ret; + } +} + +obj_t *put_string(obj_t *stream, obj_t *string) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->put_string ? ops->put_string(stream, c_str(string)) : nil; + } +} + +obj_t *put_cstring(obj_t *stream, const char *str) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->put_string ? ops->put_string(stream, str) : nil; + } +} + +obj_t *put_char(obj_t *stream, obj_t *ch) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->put_char ? ops->put_char(stream, c_chr(ch)) : nil; + } +} + +obj_t *put_cchar(obj_t *stream, int ch) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->put_char ? ops->put_char(stream, ch) : nil; + } +} + +obj_t *put_line(obj_t *stream, obj_t *string) +{ + return (put_string(stream, string), put_cchar(stream, '\n')); +} + +void stream_init(void) +{ + protect(&std_input, &std_output, &std_error, 0); + std_input = make_stdio_stream(stdin, t, nil); + std_output = make_stdio_stream(stdout, nil, t); + std_error = make_stdio_stream(stderr, nil, t); +} diff --git a/stream.h b/stream.h new file mode 100644 index 00000000..2be353f3 --- /dev/null +++ b/stream.h @@ -0,0 +1,48 @@ +/* Copyright 2009 + * Kaz Kylheku + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +extern obj_t *std_input, *std_output, *std_error; + +obj_t *make_stdio_stream(FILE *, obj_t *input, obj_t *output); +obj_t *make_pipe_stream(FILE *, obj_t *input, obj_t *output); +obj_t *make_string_input_stream(obj_t *); +obj_t *make_string_output_stream(void); +obj_t *get_string_from_stream(obj_t *); +obj_t *make_dir_stream(DIR *); +obj_t *close_stream(obj_t *); +obj_t *get_line(obj_t *); +obj_t *get_char(obj_t *); +obj_t *vformat(obj_t *stream, const char *string, va_list); /* nao-terminated */ +obj_t *vcformat(obj_t *stream, const char *string, va_list); /* printf-style */ +obj_t *format(obj_t *stream, const char *string, ...); +obj_t *cformat(obj_t *stream, const char *string, ...); +obj_t *put_string(obj_t *stream, obj_t *string); +obj_t *put_line(obj_t *stream, obj_t *string); +obj_t *put_cstring(obj_t *stream, const char *); +obj_t *put_char(obj_t *stream, obj_t *ch); +obj_t *put_cchar(obj_t *stream, int ch); + +void stream_init(void); diff --git a/txr.1 b/txr.1 index e1a67248..cbc6887a 100644 --- a/txr.1 +++ b/txr.1 @@ -1,4 +1,4 @@ -.\"Copyright (C) 2009, Kaz Kylheku . +5\"Copyright (C) 2009, Kaz Kylheku . .\"All rights reserved. .\" .\"BSD License: @@ -21,7 +21,7 @@ .\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED .\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. -.TH txr 1 2009-09-09 "txr v. 014" "Text Extraction Utility" +.TH txr 1 2009-10-14 "txr v. 015" "Text Extraction Utility" .SH NAME txr \- text extractor .SH SYNOPSIS @@ -76,8 +76,8 @@ from their subqueries in special ways. .SH ARGUMENTS AND OPTIONS -Options other than -D may be combined together into a single argument. -The -v and -q options are mutually exclusive. The one which occurs +Options other than -D, -a and -f may be combined together into a single +argument. The -v and -q options are mutually exclusive. The one which occurs in the rightmost position in the argument list dominates. .IP -Dvar=value @@ -135,6 +135,38 @@ reported as: The leftmost bracketed index is the most major index. That is to say, the dimension order is: NAME_m_m+1_..._n[1][2]...[m-1]. +.IP -f query +Specifies the query in the form of a command line argument. If this option is +used, the query-file argument is omitted. The first non-option argument, +if there is one, now specifies the first input source rather than a query. +Queries specified as arguments must properly end in a newline, as if they +were read from a text file, thus -f "@a" is not a properly formed query. + +Example: + + # read two lines "1" and "2" from standard input, + # binding them to variables a and b. Standard + # input is specified as - and the data + # comes from shell "here document" redirection. + + txr -f "@a + @b + " - < twoline.txr + #!/usr/bin/txr + @a + @b + [Ctrl-D] + $ chmod a+x twoline.txr + $ ./twoline.txr - + 1 + 2 + [Ctrl-D] + a=1 + b=2 + .SS Text Query material which is not escaped by the special character @ is @@ -601,9 +656,9 @@ The general syntax of a directive is: @EXPR where expr is a parenthesized list of subexpressions. A subexpression -is an symbol, number, string literal, character literal, regular expression, or -a parenthesized expression. So, examples of syntactically valid directives -are: +is an symbol, number, string literal, character literal, quasiliteral, regular +expression, or a parenthesized expression. So, examples of syntactically valid +directives are: @(banana) @@ -615,6 +670,8 @@ are: @(a /[a-z]*/ b) + @(_ `@file.txt`) + A symbol is lexically the same thing as a variable and the same rules apply. Tokens that look like numbers are treated as numbers. @@ -623,6 +680,15 @@ respectively, and may not span multiple lines. Character literals must contain exactly one character. Character and numeric escapes may be used within literals to escape the quotes, and to denote control characters. +Quasiliterals are similar to string literals, except that they may +contain variable references denoted by the usual @ syntax. The quasiliteral +represents a string formed by substituting the values of those variables +into the literal template. If a is bound to "apple" and b to "banana", +the quasiliteral `one@a and two @{b}s` represents the string +"one apple and two bananas". A backquote escaped by a backslash represents +itself, and two consecutive @ characters code for a literal @. +There is no \e@ escape. + Some directives are involved in structuring the overall syntax of the query. There are syntactic constraints that depend on the directive. For instance the @@ -699,6 +765,13 @@ Terminate the processing of a block, as if it were a successful match. What bindings emerge may depend on the kind of block: collect has special semantics. Blocks are discussed in the section BLOCKS below. +.IP @(try) +Indicates the start of a try block, which is related to exception +handling, discussed in the EXCEPTIONS section below. + +.IP @(catch), @(finally) +Special clauses within @(try). See EXCEPTIONS below. + .IP @(flatten) Normalizes a set of specified variables to one-dimensional lists. Those variables which have scalar value are reduced to lists of that value. @@ -733,23 +806,51 @@ produces repeated text within one line. .SS The Next Directive -The next directive comes in two forms. It can occur by itself as the -only element in a query line: +The next directive comes in two forms, one of which is obsolescent +syntax. This directive indicates that the remainder of the query. + +In the first form, it can occur by itself as the only element in a query line, +with, or without arguments: @(next) + @(next SOURCE) + @(next SOURCE nothrow) + +The lone @(next) without arguments switches to the next file in the +argument list which was passed to the +.B txr +utility. If SOURCE is given, it must be text-valued expression which denotes an +input source; it may be a string literal, quasiliteral or a variable. +For instance, if variable A contains the text "data", then + + @(next A) + +means switch to the file called "data", and -Or it may be followed by material, which may contain variables. -All of the variables must be bound. For example: + @(next `@A.txt`) + +means to switch to the file "data.txt". + +If the input source cannot be opened for whatever reason, +.B txr +throws an exception (see EXCEPTIONS below). An unhandled exception will +terminate the program. Often, such a drastic measure is inconvenient; +if @(next) is invoked with the nothrow keyword, then if the input +source cannot be opened, the situation is treated as a simple +match failure. + +In the obsolescent second form, @(next) is followed by material on the same +line, which may contain variables. All of the variables must be bound. For +example: @(next)/path/to/@foo.txt -Both forms indicate that the remainder of the query applies -to a new file. The lone @(next) switches to the next file in the -argument list which was passed to the +The trailing material specifies gives the input source. +The nothrow behavior is implicit in this form. The syntax will +disappear in some future version of .B txr -utility. The second form diverts the remainder of the query to a file whose -name is given by the trailing material, after variable substitutions are -performed. +. + Note that "remainder of the query" refers to the subquery in which the next directive appears, not necessarily the entire query. @@ -760,7 +861,7 @@ After the @(end) which terminates the @(some), the "abc" is matched in the current file. @(some) - @(next)foo.txt + @(next "foo.txt") xyz@suffix @(end) abc @@ -1845,6 +1946,14 @@ usual printing of the variable bindings or the word false. The syntax of the @(output) directive is: + @(output [ DESTINATION ] [ nothrow ]) + . + . one or more output directives or lines + . + @(end) + +An obsolescent syntax is also supported: + @(output)...optional destination... . . one or more output directives or lines @@ -1853,7 +1962,16 @@ The syntax of the @(output) directive is: The optional destination is a filename, the special name, - which redirects to standard output, or a shell command preceded by the ! symbol. -Variables are substituted in the directive. +In the first form, the destination may be specified as a variable +which holds text, a string literal or a quasiliteral + +In the second obsolescent form, the material to the right of @(output) +is query text which may contain variables. + +The new syntax throws an exception if the output destination +cannot be opened, unless the nothrow keyword is present, in which +case the situation is treated as a match failure. The old syntax throws an +exception. .SS Output Text @@ -2025,6 +2143,269 @@ spaces each one, except the last which has no space. If the list has exactly one item, then the @(last) applies to it instead of the main clause: it is produced with no trailing space. +.SH EXCEPTIONS + +The exceptions mechanism in +.B txr +is disciplined way for representing and handling abnormal situations that may +occur during query processing, such as using an unbound variable, or attempting +to open a nonexistent file. + +An exception is a situation in the query which stops the query and +demands handling. If handling is not provided for that exception, +the execution of the program is terminated. + +An exception is always identified by a symbol, which is its type. Types are +organized in a subtype-supertype hierarchy. For instance, the file_error +exception type is a subtype of the error type. This means that a file error is +a kind of error. An exception handling block which catches exceptions of type +error will catch exceptions of type file_error, but a block which catches +file_error will not catch all exceptions of type error. A query_error is a kind +of error, but not a kind of file_error. The symbol t is the supertype +of every type: every exception type is considered to be a kind of t. +(Mnemonic: t stands for type, as in any type). + +Exceptions are handled using @(catch) clauses within a @(try) directive. + +In addition to being useful for exception handling, the @(try) directive +also provides unwind protection by means of a @(finally) clause, +which specifies query material to be executed unconditionally when +the try clause terminates, no matter how it terminates. + +.SS The Try Directive + +The general syntax of the try directive is + + @(try) + ... main clause, required ... + ... optional catch clauses ... + ... optional finally clause + @(end) + +A catch clause looks like: + + @(catch TYPE) + . + . + . + +and also the this form, equivalent to @(catch (t)): + + @(catch) + . + . + . + +which catches all exceptions. + +A finally clause looks like: + + @(finally) + ... + . + . + +None of the clauses may be empty. + +A try clause is surrounded by an implicit anonymous block (see BLOCKS section +above). So for instance, the following is a no-op (an operation with no effect, +other than successful execution): + + @(try) + @(accept) + @(end) + +The @(accept) causes a successful termination of the implicit anonymous block. +Execution resumes with query lines or directives which follow, if any. + +Try clauses and blocks interact. For instance, a block accept from within +a try clause invokes a finally. + + Query: @(block foo) + @ (try) + @ (accept foo) + @ (finally) + @ (output) + bye! + @ (end) + @ (end) + + Output: bye! + +How this works: the try block's main clause is @(accept foo). This causes +the enclosing block named foo to terminate, as a successful match. +Since the try is nested within this block, it too must terminate +in order for the block to terminate. But the try has a finally clause, +which executes unconditionally, no matter how the try block +terminates. The finally clause performs some output, which is seen. + +.SH The Finally Clause + +A try directive can terminate in one of three ways. The main clause +may match successfully, and possibly yield some new variable bindings. +The main clause may fail to match. Or the main clause may be terminated +by a non-local control transfer, like an exception being thrown or a block +return (like the block foo example in the previous section). + +No matter how the try clause terminates, the finally clause is processed. + +Now, the finally clause is itself a query which binds variables, which leads to +the question: what happens to such variables? What if the finally block fails +as a query? Another question is: what if a finally clause itself initiates a +control transfer? Answers follow. + +Firstly, a finally clause will contribute variable bindings only if the main +clause terminates normally (either as a successful or failed match). +If the main clause successfully matches, then the finally block continues +matching at the next position in the data, and contributes bindings. +If the main clause fails, then the finally block matches at the +same position. + +The overall try directive succeeds as a match if either the main clause +or the finally clause succeed. If both fail, then the try directive is +a failed match. The subquery in which it is located fails, et cetera. + +Example: + + Query: @(try) + @a + @(finally) + @b + @(end) + @c + + Data: 1 + 2 + 3 + + Output: a=1 + b=2 + c=3 + +In this example, the main clause of the try captures line "1" of the data as +variable a, then the finally clause captures "2" as b, and then the +query continues with the @c variable after try block, and captures "3". + + +Example: + + Query: @(try) + hello @a + @(finally) + @b + @(end) + @c + + Data: 1 + 2 + + Output: b=1 + c=2 + +In this example, the main clause of the try fails to match, because +the input is not prefixed with "hello ". However, the finally clause +matches, binding b to "1". This means that the try block is a successful +match, and so processing continues with @c which captures "2". + +When finally clauses are processed during a non-local return, +they have no externally visible effect if they do not bind variables. +However, their execution makes itself known if they perform side effects, +such as output. + +A finally clause guards only the main clause and the catch clauses. It does not +guard itself. Once the finally clause is executing, the try block is no +longer guarded. This means if a nonlocal transfer, such as a block accept +or exception, is initiated within the finally clause, it will not re-execute +the finally clause. The finally clause is simply abandoned. + +The disestablishment of blocks and try clauses is properly interleaved +with the execution of finally clauses. This means that all surrounding +exit points are visible in a finally clause, even if the finally clause +is being invoked as part of a transfer to a distant exit point. +The finally clause can make a control transfer to an exit point which +is more near than the original one, thereby "hijacking" the control +transfer. Also, the anonymous block established by the try directive +is visible in the finally clause. + +Example: + +@(try) +@ (try) +@ (next "nonexistent-file") +@ (finally) +@ (accept) +@ (end) +@(catch file_error) +@ (output) +file error caught +@ (end) +@(end) + +In this example, the @(next) directive throws an exception of type file_error, +because the given file does not exist. The exit point for this exception is the +@(catch file_error) clause in the outer-most try block. The inner block is +not eligible because it contains no catch clauses at all. However, the inner +try block has a finally clause, and so during the processing of this +exception which is headed for the @(catch file_error), the finally +clause performs an anonymous accept. The exit point for the accept +is the anonymous block surrounding the inner try. So the original +transfer to the catch clause is forgotten. The inner try terminates +sucessfully, and since it constitutes the main clause of the outer try, +that also terminates sucessfully. The "file error caught" message is +never printed. + +.SS Catch Clauses + +Catch clauses establish a try block as a potential exit point for +an exception-induced control transfer (called a ``throw''). + +A catch clause specifies an optional list of symbols which represent +the exception types which it catches. The catch clause will catch +exceptions which are a subtype of any one of those exception types. + +If a try block has more than one catch clause which can match a given +exception, the first one will be invoked. + +The exception protection of a try block does not extend over the +catch clauses. Once a catch clause is being executed, if it throws +an exception, that exception will not re-enter any catch within the +same try block, even if it matches one. + +Catches are processed prior to finally. + +When a catch is invoked, it is of course understood that the main clause did +not terminate normally, and so the main clause could not have produced any +bindings. + +So the success or failure of the try block depends on the behavior of the catch +clause or the finally, if there is one. If either of them succeed, then the try block is considered a successful match. + +Example: + + Query: @(try) + @ (next "nonexistent-file") + @ x + @ (catch file_error) + @a + @(finally) + @b + @(end) + @c + + Data: 1 + 2 + 3 + + Output: a=1 + b=2 + c=3 + +Here, the try block's main clause is terminated abruptly by a file_error +exception from the @(next) directive. This is handled by the +catch clause, which binds variable a to the input line "1". +Then the finally clause executes, binding b to "2". The try block +then terminates successfully, and so @c takes "3". + .SH NOTES ON FALSE The reason for printing the word diff --git a/txr.c b/txr.c new file mode 100644 index 00000000..de4d81a2 --- /dev/null +++ b/txr.c @@ -0,0 +1,336 @@ +/* Copyright 2009 + * Kaz Kylheku + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "lib.h" +#include "stream.h" +#include "gc.h" +#include "unwind.h" +#include "parser.h" +#include "match.h" +#include "txr.h" + +const char *version = "015"; +const char *progname = "txr"; +const char *spec_file = "stdin"; +obj_t *spec_file_str; + +/* + * Can implement an emergency allocator here from a fixed storage + * pool, which sets an OOM flag. Program can check flag + * and gracefully terminate instead of aborting like this. + */ +void *oom_realloc_handler(void *old, size_t size) +{ + fprintf(stderr, "%s: out of memory\n", progname); + puts("false"); + abort(); +} + +void help(void) +{ + const char *text = +"\n" +"txr version %s\n" +"\n" +"copyright 2009, Kaz Kylheku \n" +"\n" +"usage:\n" +"\n" +" %s [ options ] query-file { data-file }*\n" +"\n" +"The query-file or data-file arguments may be specified as -, in which case\n" +"standard input is used. If these arguments end with a | character, then\n" +"they are treated as command pipes. Leading arguments which begin with a -\n" +"followed by one or more characters, and which are not arguments to options\n" +"are interpreted as options. The -- option indicates the end of the options.\n" +"\n" +"If no data-file arguments sare supplied, then the query itself must open a\n" +"a data source prior to attempting to make any pattern match, or it will\n" +"simply fail due to a match which has run out of data.\n" +"\n" +"options:\n" +"\n" +"-Dvar=value Pre-define variable var, with the given value.\n" +" A list value can be specified using commas.\n" +"-Dvar Predefine variable var, with empty string value.\n" +"-q Quiet: don't report errors during query matching.\n" +"-v Verbose: extra logging from matcher.\n" +"-b Don't dump list of bindings.\n" +"-a num Generate array variables up to num-dimensions.\n" +" Default is 1. Additional dimensions are fudged\n" +" by generating numeric suffixes\n" +"-f query Specify the query text as an argument.\n" +" The query-file argument is omitted in this case.\n" +"--help You already know!\n" +"--version Display program version\n" +"\n" +"Options that take no argument can be combined. The -q and -v options\n" +"are mutually exclusive; the right-most one dominates.\n" +"\n" + ; + fprintf(stdout, text, version, progname); +} + +void hint(void) +{ + fprintf(stderr, "%s: incorrect arguments: try --help\n", progname); +} + +obj_t *remove_hash_bang_line(obj_t *spec) +{ + if (!consp(spec)) + return spec; + + { + obj_t *shbang = string(strdup("#!")); + obj_t *firstline = first(spec); + obj_t *items = rest(firstline); + + if (stringp(first(items))) { + obj_t *twochars = sub_str(first(items), zero, two); + if (equal(twochars, shbang)) + return rest(spec); + } + + return spec; + } +} + +int main(int argc, char **argv) +{ + obj_t *stack_bottom_0 = nil; + obj_t *specstring = nil; + obj_t *spec = nil; + obj_t *bindings = nil; + int match_loglevel = opt_loglevel; + progname = argv[0] ? argv[0] : progname; + obj_t *stack_bottom_1 = nil; + + init(progname, oom_realloc_handler, &stack_bottom_0, &stack_bottom_1); + + protect(&spec_file_str, 0); + spec_file_str = string(strdup(spec_file)); + + yyin_stream = std_input; + protect(&yyin_stream, 0); + + if (argc <= 1) { + hint(); + return EXIT_FAILURE; + } + + argc--, argv++; + + while (argc > 0 && (*argv)[0] == '-') { + if (!strcmp(*argv, "--")) { + argv++, argc--; + break; + } + + if (!strcmp(*argv, "-")) + break; + + if (!strncmp(*argv, "-D", 2)) { + char *var = *argv + 2; + char *equals = strchr(var, '='); + char *has_comma = (equals != 0) ? strchr(equals, ',') : 0; + + if (has_comma) { + char *val = equals + 1; + obj_t *list = nil; + + *equals = 0; + + for (;;) { + size_t piece = strcspn(val, ","); + char comma_p = val[piece]; + + val[piece] = 0; + + list = cons(string(strdup(val)), list); + + if (!comma_p) + break; + + val += piece + 1; + } + + list = nreverse(list); + bindings = cons(cons(intern(string(strdup(var))), list), bindings); + } else if (equals) { + char *val = equals + 1; + *equals = 0; + bindings = cons(cons(intern(string(strdup(var))), + string(strdup(val))), bindings); + } else { + bindings = cons(cons(intern(string(strdup(var))), + null_string), bindings); + } + + argc--, argv++; + continue; + } + + if (!strcmp(*argv, "--version")) { + printf("%s: version %s\n", progname, version); + return 0; + } + + if (!strcmp(*argv, "--help")) { + help(); + return 0; + } + + if (!strcmp(*argv, "-a") || !strcmp(*argv, "-f")) { + long val; + char *errp; + char opt = (*argv)[1]; + + if (argc == 1) { + fprintf(stderr, "%s: option %c needs argument\n", progname, opt); + + return EXIT_FAILURE; + } + + argv++, argc--; + + switch (opt) { + case 'a': + val = strtol(*argv, &errp, 10); + if (*errp != 0) { + fprintf(stderr, "%s: option %c needs numeric argument, not %s\n", + progname, opt, *argv); + return EXIT_FAILURE; + } + + opt_arraydims = val; + break; + case 'f': + specstring = string(strdup(*argv)); + break; + } + + argv++, argc--; + continue; + } + + if (!strcmp(*argv, "--gc-debug")) { + opt_gc_debug = 1; + argv++, argc--; + continue; + } + + { + char *popt; + for (popt = (*argv)+1; *popt != 0; popt++) { + switch (*popt) { + case 'v': + match_loglevel = 2; + break; + case 'q': + match_loglevel = 0; + break; + case 'b': + opt_nobindings = 1; + break; + case 'a': + case 'f': + case 'D': + fprintf(stderr, "%s: option -%c does not clump\n", progname, *popt); + return EXIT_FAILURE; + case '-': + fprintf(stderr, "%s: unrecognized long option: --%s\n", + progname, popt + 1); + return EXIT_FAILURE; + default: + fprintf(stderr, "%s: unrecognized option: %c\n", progname, *popt); + return EXIT_FAILURE; + } + } + + argc--, argv++; + } + } + + if (specstring) { + spec_file = "cmdline"; + yyin_stream = make_string_input_stream(specstring); + } else { + if (argc < 1) { + hint(); + return EXIT_FAILURE; + } + + if (strcmp(*argv, "-") != 0) { + FILE *in = fopen(*argv, "r"); + if (in == 0) { + uw_errorcf("%s: unable to open %s", progname, *argv); + fprintf(stderr, "%s: unable to open %s\n", progname, *argv); + return EXIT_FAILURE; + } + yyin_stream = make_stdio_stream(in, t, nil); + spec_file = *argv; + spec_file_str = string(strdup(spec_file)); + } + argc--, argv++; + } + + { + int gc = gc_state(0); + yyparse(); + gc_state(gc); + + if (errors) + return EXIT_FAILURE; + spec = remove_hash_bang_line(get_spec()); + + opt_loglevel = match_loglevel; + + if (opt_loglevel >= 2) { + format(std_error, "spec:\n~s\n", spec, nao); + format(std_error, "bindings:\n~s\n", bindings, nao); + } + + { + int retval; + list_collect_decl(filenames, iter); + + while (*argv) + list_collect(iter, string(*argv++)); + + retval = extract(spec, filenames, bindings); + + return errors ? EXIT_FAILURE : retval; + } + } +} diff --git a/txr.h b/txr.h new file mode 100644 index 00000000..51b4990f --- /dev/null +++ b/txr.h @@ -0,0 +1,33 @@ +/* Copyright 2009 + * Kaz Kylheku + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +extern int opt_loglevel; +extern int opt_nobindings; +extern int opt_arraydims; +extern int opt_gc_debug; +extern const char *version; +extern const char *progname; +extern int output_produced; diff --git a/unwind.c b/unwind.c index eb1490d4..c3df021e 100644 --- a/unwind.c +++ b/unwind.c @@ -29,8 +29,11 @@ #include #include #include +#include #include "lib.h" #include "gc.h" +#include "stream.h" +#include "txr.h" #include "unwind.h" static uw_frame_t *uw_stack; @@ -39,8 +42,39 @@ static uw_frame_t toplevel_env; static void uw_unwind_to_exit_point() { - while (uw_stack && uw_stack != uw_exit_point) - uw_stack = uw_stack->uw.up; + assert (uw_exit_point); + + for (; uw_stack && uw_stack != uw_exit_point; uw_stack = uw_stack->uw.up) { + switch (uw_stack->uw.type) { + case UW_CATCH: + /* If a catch block is not visible, do + not run its unwind stuff. This + would cause infinite loops if + unwind blocks trigger a nonlocal exit. */ + if (!uw_stack->ca.visible) + continue; + /* Catches catch everything, so that they + can do "finally" or "unwind protect" logic. + If a catch is invoked with a nil exception + and symbol, it must excecute the + mandatory clean-up code and then + continue the unwinding by calling uw_continue, + passing it the ca.cont value. */ + uw_stack->ca.sym = nil; + uw_stack->ca.exception = nil; + uw_stack->ca.cont = uw_exit_point; + /* This catch frame is no longer + visible. If the unwind section + throws something, it cannot + be caught in the same frame. */ + uw_stack->ca.visible = 0; + /* 1 means unwind only. */ + longjmp(uw_stack->ca.jb, 1); + abort(); + default: + break; + } + } if (!uw_stack) abort(); @@ -50,19 +84,21 @@ static void uw_unwind_to_exit_point() switch (uw_stack->uw.type) { case UW_BLOCK: longjmp(uw_stack->bl.jb, 1); - break; + abort(); case UW_ENV: /* env frame cannot be exit point */ abort(); + case UW_CATCH: + /* Catch frame is no longer visible. + If a catch or unwind throw something, + it cannot go back to the same catch. */ + uw_stack->ca.visible = 0; + /* 2 means actual catch, not just unwind */ + longjmp(uw_stack->ca.jb, 2); default: abort(); } } -void uw_init(void) -{ - protect(&toplevel_env.ev.func_bindings, 0); -} - void uw_push_block(uw_frame_t *fr, obj_t *tag) { fr->bl.type = UW_BLOCK; @@ -135,3 +171,186 @@ obj_t *uw_block_return(obj_t *tag, obj_t *result) uw_unwind_to_exit_point(); abort(); } + +void uw_push_catch(uw_frame_t *fr, obj_t *matches) +{ + fr->ca.type = UW_CATCH; + fr->ca.matches = matches; + fr->ca.exception = nil; + fr->ca.cont = 0; + fr->ca.visible = 1; + fr->ca.up = uw_stack; + uw_stack = fr; +} + +static obj_t *exception_subtypes; + +obj_t *uw_exception_subtype_p(obj_t *sub, obj_t *sup) +{ + if (sub == nil || sup == t || sub == sup) { + return t; + } else { + obj_t *entry = assoc(exception_subtypes, sub); + return memq(sup, cdr(entry)) ? t : nil; + } +} + +obj_t *uw_throw(obj_t *sym, obj_t *exception) +{ + uw_frame_t *ex; + + for (ex = uw_stack; ex != 0; ex = ex->uw.up) { + if (ex->uw.type == UW_CATCH && ex->ca.visible) { + /* The some_satisfy would require us to + cons up a function; we want to + avoid consing in exception handling, if we can. */ + obj_t *matches = ex->ca.matches; + obj_t *match; + for (match = matches; match; match = cdr(match)) + if (uw_exception_subtype_p(sym, car(match))) + break; + if (match) + break; + } + } + + if (ex == 0) { + if (opt_loglevel >= 1) { + format(std_error, "~a: unhandled exception of type ~a:\n", + prog_string, sym, nao); + format(std_error, "~a\n", exception, nao); + } + if (uw_exception_subtype_p(sym, query_error) || + uw_exception_subtype_p(sym, file_error)) { + if (!output_produced) + put_cstring(std_output, "false\n"); + exit(EXIT_FAILURE); + } + abort(); + } + + ex->ca.sym = sym; + ex->ca.exception = exception; + uw_exit_point = ex; + uw_unwind_to_exit_point(); + abort(); +} + +obj_t *uw_throwf(obj_t *sym, const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + (void) vformat(stream, fmt, vl); + va_end (vl); + + uw_throw(sym, get_string_from_stream(stream)); + abort(); +} + +obj_t *uw_errorf(const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + (void) vformat(stream, fmt, vl); + va_end (vl); + + uw_throw(error, get_string_from_stream(stream)); + abort(); +} + +obj_t *uw_throwcf(obj_t *sym, const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + (void) vcformat(stream, fmt, vl); + va_end (vl); + + uw_throw(sym, get_string_from_stream(stream)); + abort(); +} + +obj_t *uw_errorcf(const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + (void) vcformat(stream, fmt, vl); + va_end (vl); + + uw_throw(error, get_string_from_stream(stream)); + abort(); +} + +obj_t *type_mismatch(const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + (void) vformat(stream, fmt, vl); + va_end (vl); + + uw_throw(type_error, get_string_from_stream(stream)); + abort(); +} + +void uw_register_subtype(obj_t *sub, obj_t *sup) +{ + obj_t *t_entry = assoc(exception_subtypes, t); + obj_t *sub_entry = assoc(exception_subtypes, sub); + obj_t *sup_entry = assoc(exception_subtypes, sup); + + assert (t_entry != 0); + + if (sub == nil) + return; + + if (sub == t) { + if (sup == t) + return; + abort(); + } + + /* If sup symbol not registered, then we make it + an immediate subtype of t. */ + if (!sup_entry) { + sup_entry = cons(sup, t_entry); + exception_subtypes = cons(sup_entry, exception_subtypes); + } + + /* If sub already registered, we delete that + registration. */ + if (sub_entry) { + exception_subtypes = alist_remove1(exception_subtypes, sub); + } + + /* Register sub as an immediate subtype of sup. */ + sub_entry = cons(sub, sup_entry); + exception_subtypes = cons(sub_entry, exception_subtypes); +} + +void uw_continue(uw_frame_t *current, uw_frame_t *cont) +{ + uw_pop_frame(current); + uw_exit_point = cont; + uw_unwind_to_exit_point(); +} + +void uw_init(void) +{ + protect(&toplevel_env.ev.func_bindings, &exception_subtypes, 0); + exception_subtypes = cons(cons(t, cons(t, nil)), exception_subtypes); + uw_register_subtype(type_error, error); + uw_register_subtype(internal_err, error); + uw_register_subtype(numeric_err, error); + uw_register_subtype(range_err, error); + uw_register_subtype(query_error, error); + uw_register_subtype(file_error, error); +} diff --git a/unwind.h b/unwind.h index 1cd0792a..574794b2 100644 --- a/unwind.h +++ b/unwind.h @@ -24,10 +24,16 @@ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ +#ifdef __GNUC__ +#define noreturn __attribute__((noreturn)) +#else +#define noreturn +#endif + typedef union uw_frame uw_frame_t; typedef enum uw_frtype uw_frtype_t; -enum uw_frtype { UW_BLOCK, UW_ENV }; +enum uw_frtype { UW_BLOCK, UW_ENV, UW_CATCH }; struct uw_common { uw_frame_t *up; @@ -48,20 +54,42 @@ struct uw_dynamic_env { obj_t *func_bindings; }; +struct uw_catch { + uw_frame_t *up; + uw_frtype_t type; + obj_t *matches; + obj_t *sym; + obj_t *exception; + uw_frame_t *cont; + int visible; + jmp_buf jb; +}; + union uw_frame { struct uw_common uw; struct uw_block bl; struct uw_dynamic_env ev; + struct uw_catch ca; }; -void uw_init(void); void uw_push_block(uw_frame_t *, obj_t *tag); void uw_push_env(uw_frame_t *); obj_t *uw_get_func(obj_t *sym); obj_t *uw_set_func(obj_t *sym, obj_t *value); obj_t *uw_block_return(obj_t *tag, obj_t *result); +void uw_push_catch(uw_frame_t *, obj_t *matches); +noreturn obj_t *uw_throw(obj_t *sym, obj_t *exception); +noreturn obj_t *uw_throwf(obj_t *sym, const char *fmt, ...); +noreturn obj_t *uw_errorf(const char *fmt, ...); +noreturn obj_t *uw_throwcf(obj_t *sym, const char *fmt, ...); +noreturn obj_t *uw_errorcf(const char *fmt, ...); +void uw_register_subtype(obj_t *sub, obj_t *super); +obj_t *uw_exception_subtype_p(obj_t *sub, obj_t *sup); +void uw_continue(uw_frame_t *curr, uw_frame_t *target); void uw_pop_frame(uw_frame_t *); +void uw_init(void); +noreturn obj_t *type_mismatch(const char *, ...); #define uw_block_begin(TAG, RESULTVAR) \ obj_t *RESULTVAR = nil; \ @@ -85,3 +113,63 @@ void uw_pop_frame(uw_frame_t *); #define uw_env_end \ uw_pop_frame(&uw_env); \ } + +#define uw_catch_begin(MATCHES, SYMVAR, \ + EXCVAR) \ + obj_t *SYMVAR = nil; \ + obj_t *EXCVAR = nil; \ + { \ + uw_frame_t uw_catch; \ + uw_push_catch(&uw_catch, MATCHES); \ + switch (setjmp(uw_catch.ca.jb)) { \ + case 0: + +#define uw_do_unwind \ + goto uw_unwind_label + +#define uw_catch(SYMVAR, EXCVAR) \ + break; \ + case 2: \ + EXCVAR = uw_catch.ca.exception; \ + SYMVAR = uw_catch.ca.sym; \ + +#define uw_unwind \ + break; \ + uw_unwind_label: \ + case 1: + +#define uw_catch_end \ + default: \ + break; \ + } \ + if (uw_catch.ca.cont) \ + uw_continue(&uw_catch, \ + uw_catch.ca.cont); \ + uw_pop_frame(&uw_catch); \ + } + +#define internal_error(STR) \ + uw_throwcf(internal_err, \ + "%s:%d %s", __FILE__, \ + __LINE__, STR) + +#define type_assert(EXPR, ARGS) \ + if (!(EXPR)) type_mismatch ARGS + +#define bug_unless(EXPR) \ + if (!(EXPR)) \ + internal_error("assertion " \ + #EXPR \ + " failed") + +#define numeric_assert(EXPR) \ + if (!(EXPR)) \ + uw_throwcf(numeric_err, "%s", \ + "assertion " #EXPR \ + " failed") + +#define range_bug_unless(EXPR) \ + if (!(EXPR)) \ + uw_throwcf(range_err, "%s", \ + "assertion" #EXPR \ + " failed") -- cgit v1.2.3