forked from howerj/libforth
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlibforth.c
2837 lines (2514 loc) · 97.3 KB
/
libforth.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**
# libforth.c.md
@file libforth.c
@author Richard James Howe.
@copyright Copyright 2015,2016,2017 Richard James Howe.
@license MIT
@email howe.r.j.89@gmail.com
@brief A FORTH library, written in a literate style.
@todo Fix the special 'literal' word, moving it outside register area
@todo Add 'parse', removing scanf/fscanf
@todo The special case of base = 0 causes problems, this should be
removed.
@todo Add THROW/CATCH as virtual machine instructions, remove
RESTART and current error handling scheme.
@todo Add u.rc to the interpreter
## License
The MIT License (MIT)
Copyright (c) 2016 Richard James Howe
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
## Introduction
This file implements the core Forth interpreter, it is written in portable
C99. The file contains a virtual machine that can interpret threaded Forth
code and a simple compiler for the virtual machine, which is one of its
instructions. The interpreter can be embedded in another application and
there should be no problem instantiating multiple instances of the
interpreter.
For more information about Forth see:
* <https://en.wikipedia.org/wiki/Forth_%28programming_language%29>
* Thinking Forth by Leo Brodie
* Starting Forth by Leo Brodie
A glossary of words for FIG FORTH 79:
* <http://www.dwheeler.com/6502/fig-forth-glossary.txt>
And the more recent and widespread standard for ANS Forth:
* <http://lars.nocrew.org/dpans/dpans.htm>
The antecedent of this interpreter:
* <http://www.ioccc.org/1992/buzzard.2.c>
cxxforth, a literate Forth written in C++
* <https://github.com/kristopherjohnson/cxxforth>
Jones Forth, a literate Forth written in x86 assembly:
* <https://rwmj.wordpress.com/2010/08/07/jonesforth-git-repository/>
* <https://github.com/AlexandreAbreu/jonesforth> (backup)
Another portable Forth interpreter of written in C:
* http://www.softsynth.com/pforth/
* https://github.com/philburk/pforth
A Forth processor:
* <http://www.excamera.com/sphinx/fpga-j1.html>
And my Forth processor based on this one:
* <https://github.com/howerj/fyp>
The repository should also contain:
* "readme.md" : a Forth manual, and generic project information
* "forth.fth" : basic Forth routines and startup code
* "libforth.h" : The header contains the API documentation
The structure of this file is as follows:
1) Headers and configuration macros
2) Enumerations and constants
3) Helping functions for the compiler
4) API related functions and Initialization code
5) The Forth virtual machine itself
6) An example main function called **main_forth** and support functions
Each section will be explained in detail as it is encountered.
An attempt has been made to make this document flow, as both a source
code document and as a description of how the Forth kernel works.
This is helped by the fact that the program is small and compact
without being written in obfuscated C. It is, as mentioned, compact,
and can be difficult to understand regardless of code quality. Some
of the semantics of Forth will not be familiar to C programmers.
A basic understanding of how to use Forth would help as this document is
meant to describe how a Forth implementation works and not as an
introduction to the language. A quote about the language from Wikipedia
best sums the language up:
"Forth is an imperative stack-based computer programming language
and programming environment.
Language features include structured programming, reflection (the
ability to modify the program structure during program execution),
concatenative programming (functions are composed with juxtaposition)
and extensibility (the programmer can create new commands).
...
A procedural programming language without type checking, Forth features
both interactive execution of commands (making it suitable as a shell
for systems that lack a more formal operating system) and the ability
to compile sequences of commands for later execution."
Forth has a philosophy like most languages, one of simplicity, compactness
and of trying only to solve the problem at hand, even going as far as to try
to simplify the problem or replace the problem (which may span multiple
domains, not just software) with a simpler one. This is often not
a realistic way of tackling things and Forth has fallen out of
favor, it is nonetheless an interesting language which can be
implemented and understood by a single programmer (another integral part
of the Forth philosophy).
The core of the concept of the language - simplicity I would say - is
achieved by the following:
1) The language uses Reverse Polish Notation to enter expressions and parsing
is simplified to the extreme with space delimited words and numbers being
the most complex terms. This means a abstract syntax tree does not need to
be constructed and terms can be executed as soon as they are parsed. The
*parser* can described in only a handful of lines of C.
2) The language uses concatenation of Forth words (called functions in
other language) to create new words, this allows for small programs to
be created and encourages *factoring* definitions into smaller words.
3) The language is untyped.
4) Forth functions, or words, take their arguments implicitly and return
variables implicitly via a variable stack which the programmer explicitly
interacts with. A comparison of two languages behavior best illustrates the
point, we will define a function in C and in Forth that simply doubles a
number. In C this would be:
int double_number(int x)
{
return x << 1;
}
And in Forth it would be:
: 2* 1 lshift ;
No types are needed, and the arguments and the return values are not
stated, unlike in C. Although this has the advantage of brevity, it is now
up to the programmer to manages those variables.
5) The input and output facilities are set up and used implicitly as well.
Input is taken from **stdin** and output goes to **stdout**, by default.
Words that deal with I/O uses these file steams internally.
6) Error handling is traditionally non existent or limited.
7) This point is not a property of the language, but part of the way the
Forth programmer must program. The programmer must make their factored word
definitions *flow*. Instead of reordering the contents of the stack for
each word, words should be made so that the reordering does not have to
take place (ie. Manually performing the job of a optimizing compile another
common theme in Forth, this time with memory reordering).
The implicit behavior relating to argument passing and I/O really reduce
program size, the type of implicit behavior built into a language can
really define what that language is good for. For example AWK is naturally
good for processing text, thanks in large part to sensible defaults for how
text is split up into lines and records, and how input and output is
already set up for the programmer.
An example of this succinctness in AWK is the following program, which
can be typed in at the command line. It will read from the standard
input if no files are given, and print any lines longer than eighty characters
along with the line number of that line:
awk '{line++}length > 80 {printf "%04u: %s\n", line, $0}' file.txt ...
For more information about AWK see:
* <http://www.grymoire.com/Unix/Awk.html>
* <https://en.wikipedia.org/wiki/AWK>
* <http://www.pement.org/awk/awk1line.txt>
Forth likewise can achieve succinctness and brevity because of its implicit
behavior.
Naturally we try to adhere to Forth philosophy, but also to Unix philosophy
(which most Forths do not do), this is described later on.
Glossary of Terms:
VM - Virtual Machine
Cell - The Virtual Machines natural Word Size, on a 32 bit
machine the Cell will be 32 bits wide
Word - In Forth a Word refers to a function, and not the
usual meaning of an integer that is the same size as
the machines underlying word size, this can cause confusion
API - Application Program Interface
interpreter - as in byte code interpreter, synonymous with virtual
machine.
REPL - Read-Evaluate-Print-Loop, this Forth actually provides
something more like a "REL", or Read-Evaluate-Loop (as printing
has to be done explicitly), but the interpreter is interactive
which is the important point
RPN - Reverse Polish Notation (see
<https://en.wikipedia.org/wiki/Reverse_Polish_notation>).
The Forth interpreter uses RPN to enter expressions.
The stack - Forth implementations have at least two stacks, one for
storing variables and another for control flow and temporary
variables, when the term *stack* is used on its own and with
no other context it refers to the *variable stack* and not
the *return stack*. This *variable stack* is used for
passing parameters into and return values to functions.
Return stack - Most programming languages have a call stack, C has one
but not one that the programmer can directly access, in
Forth manipulating the return stack is often used.
factor - factoring is splitting words into smaller words that
perform a specific function. To say a word is a natural
factor of another word is to say that it makes sense to take
some functionality of the word to be factored and to create
a new word that encapsulates that functionality. Forth
encourages heavy factoring of definitions.
Command mode - This mode executes both compiling words and immediate
words as they are encountered
Compile mode - This mode executes immediate words as they are
encountered, but compiling words are compiled into the
dictionary.
Primitive - A word whose instruction is built into the VM.
**/
/**
## Headers and configurations macros
**/
/**
This file implements a Forth library, so a Forth interpreter can be embedded
in another application, as such a subset of the functions in this file are
exported, and are documented in the *libforth.h* header
**/
#include "libforth.h"
/**
We try to make good use of the C library as even microcontrollers have enough
space for a reasonable implementation of it, although it might require some
setup. The only time allocations are explicitly done is when the virtual
machine image is initialized, after this the VM does not allocate any
more memory.
**/
#include <assert.h>
#include <stdbool.h>
#include <stdarg.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <setjmp.h>
#include <time.h>
/**
Traditionally Forth implementations were the only program running on the
(micro)computer, running on processors orders of magnitude slower than
this one, as such checks to make sure memory access was in bounds did not
make sense and the implementation had to have access to the entire machines
limited memory.
To aide debugging and to help ensure correctness the **ck** macro, a wrapper
around the function **check_bounds**, is called for most memory accesses that
the virtual machine makes.
**/
#ifndef NDEBUG
/**
@brief This is a wrapper around **check_bounds**, so we do not have to keep
typing in the line number, as so the name is shorter (and hence the checks
are out of the way visually when reading the code).
@param C expression to bounds check
@return check index
**/
#define ck(C) check_bounds(o, &on_error, (C), __LINE__, o->core_size)
/**
@brief This is a wrapper around **check_bounds**, so we do not have to keep
typing in the line number, as so the name is shorter (and hence the checks
are out of the way visually when reading the code). This will check
character pointers instead of cell pointers, like **ck** does.
@param C expression to bounds check
@return checked character index
**/
#define ckchar(C) check_bounds(o, &on_error, (C), __LINE__, \
o->core_size * sizeof(forth_cell_t))
/**
@brief This is a wrapper around **check_depth**, to make checking the depth
short and simple.
@param DEPTH current depth of the stack
**/
#define cd(DEPTH) check_depth(o, &on_error, S, (DEPTH), __LINE__)
/**
@brief This macro makes sure any dictionary pointers never cross into
the stack area.
@param DPTR a index into the dictionary
@return checked index
**/
#define dic(DPTR) check_dictionary(o, &on_error, (DPTR))
/**
@brief This macro wraps up the tracing function, which we may want to remove.
@param ENV forth environment
@param INSTRUCTION instruction being executed
@param STK stack pointer
@param TOP current top of stack to print out
**/
#define TRACE(ENV,INSTRUCTION,STK,TOP) trace(ENV,INSTRUCTION,STK,TOP)
#else
/**
The following are defined only if we remove the checking and
the debug code.
**/
#define ck(C) (C)
#define ckchar(C) (C)
#define cd(DEPTH) ((void)DEPTH)
#define dic(DPTR) check_dictionary(o, &on_error, (DPTR))
#define TRACE(ENV, INSTRUCTION, STK, TOP)
#endif
/**
@brief When we are reading input to be parsed we need a space to hold that
input, the offset to this area is into a field called **m** in **struct forth**,
defined later, the offset is a multiple of cells and not chars.
**/
#define STRING_OFFSET (32u)
/**
@brief This defines the maximum length of a Forth words name, that is the
string that represents a Forth word.
**/
#define MAXIMUM_WORD_LENGTH (32u)
/**
@brief The minimum stack size of both the variable and return stack, the stack
size should not be made smaller than this otherwise the built in code and
code in *forth.fth* will not work.
**/
#define MINIMUM_STACK_SIZE (64u)
/**
@brief The start of the dictionary is after the registers and the
**STRING_OFFSET**, this is the area where Forth definitions are placed.
**/
#define DICTIONARY_START (STRING_OFFSET+MAXIMUM_WORD_LENGTH)
/**
Later we will encounter a field called **CODE**, a field in every Word
definition and is always present in the Words header. This field contains
multiple values at different bit offsets, only the lower 16 bits of this
cell are ever used. The next macros are helper to extract information from
the **CODE** field.
**/
/**
@brief The bit offset for word length start.
**/
#define WORD_LENGTH_OFFSET (8)
/**
@brief The bit offset for the bit that determines whether a word is a
compiling, or an immediate word.
**/
#define COMPILING_BIT_OFFSET (15)
/**
@brief This is the bit that determines whether a word is a compiling word
(the bit is set) or an immediate word (the bit is cleared).
**/
#define COMPILING_BIT (1u << COMPILING_BIT_OFFSET)
/**
@brief The lower 5-bits of the upper word are used for the word length
**/
#define WORD_MASK (0x1f)
/**
@brief **WORD_LENGTH** extracts the length of a Forth words name so we know
where it is relative to the **PWD** field of a word.
@param CODE This should be the **CODE** field of a word
**/
#define WORD_LENGTH(CODE) (((CODE) >> WORD_LENGTH_OFFSET) & WORD_MASK)
/**
@brief Offset for the word hidden bit
**/
#define WORD_HIDDEN_BIT_OFFSET (7)
/**
@brief Test if a word is a **hidden** word, one that is not in the search
order for the dictionary.
@param CODE field to test
**/
#define WORD_HIDDEN(CODE) ((CODE) & 0x80)
/**
@brief The lower 7 bits of the CODE field are used for the VM instruction,
limiting the number of instructions the virtual machine can have in it, the
higher bits are used for other purposes.
**/
#define INSTRUCTION_MASK (0x7f)
/**
@brief A mask that the VM uses to extract the instruction.
@param k This **CODE**, or a **CODE** Field of a Forth word
**/
#define instruction(k) ((k) & INSTRUCTION_MASK)
/**
@brief **VERIFY** is our assert macro that will always been defined
regardless of whether **NDEBUG** is defined.
@param X expression to verify
**/
#define VERIFY(X) do { if(!(X)) { abort(); } } while(0)
/**
@brief Errno are biased to fall in the range of -256...-511 when
the get to the Forth interpreter.
**/
#define BIAS_ERRNO (-256)
/**
@brief Signals numbers are biased to fall in the range of -512...-1024
when they get to the Forth interpreter.
**/
#define BIAS_SIGNAL (-512)
/**
## Enumerations and Constants
**/
/**
This following string is a forth program that gets called when creating a
new Forth environment, it is run before the user gets a chance to do anything.
The program is kept as small as possible, but is dependent on the virtual
machine image being set up correctly with other, basic, constants being defined
first, they will be described as they are encountered. Suffice to say,
before this program is executed the following happens:
1) The virtual machine image is initialized
2) All the virtual machine primitives are defined
3) All registers are named and some constants defined
4) **;** is defined
Of note, words such as **if**, **else**, **then**, and even comments
- **(** -, are not actually Forth primitives, there are defined in terms
of other Forth words.
The Forth interpreter is a simple loop that does the following:
Start the interpreter loop <-----------<-----------------<---.
Get a space delimited word \
Attempt to look up that word in the dictionary \
Was the word found? ^
|-Yes: |
| Are we in compile mode? |
| |-Yes: ^
| | \-Is the Word an Immediate word? |
| | |-Yes: |
| | | \-Execute the word >--------->----------------->----->.
| | \-No: |
| | \-Compile the word into the dictionary >------->----->.
| \-No: |
| \-Execute the word >------------->----------------->----->.
\-No: ^
\-Can the word be treated as a number? |
|-Yes: |
| \-Are we in compile mode? |
| |-Yes: |
| | \-Compile a literal into the dictionary >------>----->.
| \-No: |
| \-Push the number to the variable stack >------>----->.
\-No: |
\-An Error has occurred, print out an error message >---->.
As you can see, there is not too much too it, however there are still a lot
of details left out, such as how exactly the virtual machine executes words
and how this loop is formed.
A short description of the words defined in **initial_forth_program**
follows, bear in mind that they depend on the built in primitives, the
named registers being defined, as well as **state** and **;**.
here - push the current dictionary pointer
[ - immediately enter command mode
] - enter compile mode
>mark - make a hole in the dictionary and push a pointer to it
:noname - make an anonymous word definition, push token to it, the
definition is terminated by ';' like normal word definitions.
if - immediate word, begin if...else...then clause
else - immediate word, optional else clause
then - immediate word, end if...else...then clause
begin - immediate word, start a begin...until loop
until - immediate word, end begin...until loop, jump to matching
begin at run time if top of stack is zero.
')' - push a ")" character to the stack
( - begin a Forth comment, terminated by a )
rot - perform stack manipulation: x y z => y z x
-rot - perform stack manipulation: x y z => z x y
tuck - perform stack manipulation: x y => y x y
nip - perform stack manipulation: x y => y
allot - allocate space in the dictionary
bl - push the space character to the stack
space - print a space
. - print out current top of stack, followed by a space
**/
static const char *initial_forth_program =
": smudge pwd @ 1 + dup @ hidden-mask xor swap ! _exit\n"
": (;) ' _exit , 0 state ! _exit\n"
": ; immediate (;) smudge _exit\n"
": : immediate :: smudge _exit\n"
": here h @ ; \n"
": [ immediate 0 state ! ; \n"
": ] 1 state ! ; \n"
": >mark here 0 , ; \n"
": :noname immediate -1 , here dolist , ] ; \n"
": if immediate ' ?branch , >mark ; \n"
": else immediate ' branch , >mark swap dup here swap - swap ! ; \n"
": then immediate dup here swap - swap ! ; \n"
": begin immediate here ; \n"
": until immediate ' ?branch , here - , ; \n"
": ( immediate begin key ')' = until ; \n"
": rot >r swap r> swap ; \n"
": -rot rot rot ; \n"
": tuck swap over ; \n"
": nip swap drop ; \n"
": 2drop drop drop ; \n"
": allot here + h ! ; \n"
": emit _emit drop ; \n"
": space bl emit ; \n"
": evaluate 0 evaluator ; \n"
": . (.) drop space ; \n"
": ? @ . ;\n" ;
/**
@brief This is a string used in number to string conversion in
**number_printer**, which is dependent on the current base.
**/
static const char conv[] = "0123456789abcdefghijklmnopqrstuvwxzy";
/**
@brief int to **char\*** map for file access methods.
**/
enum fams {
FAM_WO, /**< write only */
FAM_RO, /**< read only */
FAM_RW, /**< read write */
LAST_FAM /**< marks last file access method */
};
/**
@brief These are the file access methods available for use when the virtual
machine is up and running, they are passed to the built in primitives that
deal with file input and output (such as open-file).
@note It might be worth adding more *fams*, which **fopen** can accept.
**/
static const char *fams[] = {
[FAM_WO] = "wb",
[FAM_RO] = "rb",
[FAM_RW] = "w+b",
NULL
};
/**
@brief The following are different reactions errors can take when
using **longjmp** to a previous **setjump**.
**/
enum errors
{
INITIALIZED, /**< setjmp returns zero if returning directly */
OK, /**< no error, do nothing */
FATAL, /**< fatal error, this invalidates the Forth image */
RECOVERABLE, /**< recoverable error, this will reset the interpreter */
};
/**
We can serialize the Forth virtual machine image, saving it to disk so we
can load it again later. When saving the image to disk it is important
to be able to identify the file somehow, and to identify properties of
the image.
Unfortunately each image is not portable to machines with different
cell sizes (determined by "sizeof(forth_cell_t)") and different endianess,
and it is not trivial to convert them due to implementation details.
**enum header** names all of the different fields in the header.
The first four fields (**MAGIC0**...**MAGIC3**) are magic numbers which identify
the file format, so utilities like *file* on Unix systems can differentiate
binary formats from each other.
**CELL_SIZE** is the size of the virtual machine cell used to create the image.
**VERSION** is used to both represent the version of the Forth interpreter and
the version of the file format.
**ENDIAN** is the endianess of the VM
**MAGIC7** is the last magic number.
When loading the image the magic numbers are checked as well as
compatibility between the saved image and the compiled Forth interpreter.
**/
enum header { /**< Forth header description enum */
MAGIC0, /**< Magic number used to identify file type */
MAGIC1, /**< Magic number ... */
MAGIC2, /**< Magic number ... */
MAGIC3, /**< Magic number ... */
CELL_SIZE, /**< Size of a Forth cell, or virtual machine word */
VERSION, /**< Version of the image */
ENDIAN, /**< Endianess of the interpreter */
LOG2_SIZE, /**< Log-2 of the size */
MAX_HEADER_FIELD
};
/**
The header itself, this will be copied into the **forth_t** structure on
initialization, the **ENDIAN** field is filled in then as it seems impossible
to determine the endianess of the target at compile time.
**/
static const uint8_t header[MAX_HEADER_FIELD] = {
[MAGIC0] = 0xFF,
[MAGIC1] = '4',
[MAGIC2] = 'T',
[MAGIC3] = 'H',
[CELL_SIZE] = sizeof(forth_cell_t),
[VERSION] = FORTH_CORE_VERSION,
[ENDIAN] = -1,
[LOG2_SIZE] = -1
};
/**
@brief The main structure used by the virtual machine is **forth_t**.
The structure is defined here and not in the header to hide the implementation
details it, all API functions are passed an opaque pointer to the structure
(see <https://en.wikipedia.org/wiki/Opaque_pointer>).
Only three fields are serialized to the file saved to disk:
1) **header**
2) **core_size**
3) **m**
And they are done so in that order, **core_size** and **m** are save in
whatever endianess the machine doing the saving is done in, however
**core_size** is converted to a **uint64_t** before being save to disk
so it is not of a variable size. **m** is a flexible array member
**core_size** number of members.
The **m** field is the virtual machines working memory, it has its own internal
structure which includes registers, stacks and a dictionary of defined words.
The **m** field is laid out as follows, assuming the size of the virtual
machine is 32768 cells big:
.-----------------------------------------------.
| 0-3F | 40-7BFF |7C00-7DFF|7E00-7FFF|
.-----------------------------------------------.
| Registers | Dictionary... | V stack | R stack |
.-----------------------------------------------.
V stack = The Variable Stack
R stack = The Return Stack
The dictionary has its own complex structure, and it always starts just
after the registers. It includes scratch areas for parsing words, start up
code and empty space yet to be consumed before the variable stack. The sizes
of the variable and returns stack change depending on the virtual machine
size. The structures within the dictionary will be described later on.
In the following structure, **struct forth**, values marked with a '~~'
are serialized, the serialization takes place in order. Values are written
out as they are. The size of the Forth memory core gets stored in the header,
the size must be a power of two, so its binary logarithm can be stored
in a single byte.
**/
struct forth { /**< FORTH environment */
uint8_t header[sizeof(header)]; /**< ~~ header for core file */
forth_cell_t core_size; /**< size of VM */
uint8_t *s; /**< convenience pointer for string input buffer */
forth_cell_t *S; /**< stack pointer */
forth_cell_t *vstart;/**< index into m[] where variable stack starts*/
forth_cell_t *vend; /**< index into m[] where variable stack ends*/
const struct forth_functions *calls; /**< functions for CALL instruction */
int unget; /**< single character of push back */
bool unget_set; /**< character is in the push back buffer? */
size_t line; /**< count of new lines read in */
forth_cell_t m[]; /**< ~~ Forth Virtual Machine memory */
};
/**
@brief This enumeration describes the possible actions that can be taken when an
error occurs, by setting the right register value it is possible to make errors
halt the interpreter straight away, or even to make it invalidate the core.
This does not override the behavior of the virtual machine when it detects an
error that cannot be recovered from, only when it encounters an error such
as a divide by zero or a word not being found, not when the virtual machine
executes and invalid instruction (which should never normally happen unless
something has been corrupted).
**/
enum actions_on_error
{
ERROR_RECOVER, /**< recover when an error happens, like a call to ABORT */
ERROR_HALT, /**< halt on error */
ERROR_INVALIDATE, /**< halt on error and invalid the Forth interpreter */
};
/**
@brief There are a small number of registers available to the virtual machine,
they are actually indexes into the virtual machines main memory, this is so
that the programs running on the virtual machine can access them.
There are other registers that are in use that the virtual machine cannot
access directly (such as the program counter or instruction pointer).
Some of these registers correspond directly to well known Forth concepts,
such as the dictionary and return stack pointers, others are just
implementation details.
X-Macros are an unusual but useful method of making tables
of data. We use this to store the registers name, it's address within
the virtual machine and the enumeration for it.
More information about X-Macros can be found here:
* <https://en.wikipedia.org/wiki/X_Macro>
* <http://www.drdobbs.com/cpp/the-x-macro/228700289>
* <https://stackoverflow.com/questions/6635851>
**/
#define XMACRO_REGISTERS \
X("h", DIC, 6, "dictionary pointer")\
X("r", RSTK, 7, "return stack pointer")\
X("state", STATE, 8, "interpreter state")\
X("base", BASE, 9, "base conversion variable")\
X("pwd", PWD, 10, "pointer to previous word")\
X("`source-id", SOURCE_ID, 11, "input source selector")\
X("`sin", SIN, 12, "string input pointer")\
X("`sidx", SIDX, 13, "string input index")\
X("`slen", SLEN, 14, "string input length")\
X("`start-address", START_ADDR, 15, "pointer to start of VM")\
X("`fin", FIN, 16, "file input pointer")\
X("`fout", FOUT, 17, "file output pointer")\
X("`stdin", STDIN, 18, "file pointer to stdin")\
X("`stdout", STDOUT, 19, "file pointer to stdout")\
X("`stderr", STDERR, 20, "file pointer to stderr")\
X("`argc", ARGC, 21, "argument count")\
X("`argv", ARGV, 22, "arguments")\
X("`debug", DEBUG, 23, "turn debugging on/off if enabled")\
X("`invalid", INVALID, 24, "non-zero on serious error")\
X("`top", TOP, 25, "*stored* version of top of stack")\
X("`instruction", INSTRUCTION, 26, "start up instruction")\
X("`stack-size", STACK_SIZE, 27, "size of the stacks")\
X("`error-handler", ERROR_HANDLER, 28, "actions to take on error")\
X("`handler", THROW_HANDLER, 29, "exception handler is stored here")\
X("`signal", SIGNAL_HANDLER, 30, "signal handler")\
X("`x", SCRATCH_X, 31, "scratch variable x")
/**
@brief The virtual machine registers used by the Forth virtual machine.
**/
enum registers {
#define X(NAME, ENUM, VALUE, HELP) ENUM = VALUE,
XMACRO_REGISTERS
#undef X
};
static const char *register_names[] = { /**< names of VM registers */
#define X(NAME, ENUM, VALUE, HELP) NAME,
XMACRO_REGISTERS
#undef X
NULL
};
/**
@brief The enum **input_stream** lists values of the **SOURCE_ID** register.
Input in Forth systems traditionally (tradition is a word we will keep using
here, generally in the context of programming it means justification for
cruft) came from either one of two places, the keyboard that the programmer
was typing at, interactively, or from some kind of non volatile store, such
as a floppy disk. Our C program has no portable way of interacting
directly with the keyboard, instead it could interact with a file handle
such as **stdin**, or read from a string. This is what we do in this
interpreter.
A word in Forth called **SOURCE-ID** can be used to query what the input device
currently is, the values expected are zero for interactive interpretation, or
minus one (minus one, or all bits set, is used to represent truth conditions
in most Forths, we are a bit more liberal in our definition of true) for string
input. These are the possible values that the **SOURCE_ID** register can take.
The **SOURCE-ID** word, defined in *forth.fth*, then does more processing
of this word.
Note that the meaning is slightly different in our Forth to what is meant
traditionally, just because this program is taking input from **stdin** (or
possibly another file handle), does not mean that this program is being
run interactively, it could possibly be part of a Unix pipe, which is
the reason the interpreter defaults to being as silent as possible.
**/
enum input_stream {
FILE_IN, /**< file input; this could be interactive input */
STRING_IN = -1 /**< string input */
};
/**
@brief **enum instructions** contains each virtual machine instruction, a valid
instruction is less than LAST. One of the core ideas of Forth is that
given a small set of primitives it is possible to build up a high level
language, given only these primitives it is possible to add conditional
statements, case statements, arrays and strings, even though they do not
exist as instructions here.
Most of these instructions are simple (such as; pop two items off the
variable stack, add them and push the result for **ADD**) however others are a
great deal more complex and will require paragraphs to explain fully
(such as **READ**, or how **IMMEDIATE** interacts with the virtual machines
execution).
The instruction name, enumeration and a help string, are all stored with
an X-Macro.
Some of these words are not necessary, that is they can be implemented in
Forth, but they are useful to have around when the interpreter starts
up for debugging purposes (like **pnum**).
**/
#define XMACRO_INSTRUCTIONS\
X(0, PUSH, "push", " -- u : push a literal")\
X(0, CONST, "const", " -- u : push a literal")\
X(0, RUN, "run", " -- : run a Forth word")\
X(0, DEFINE, "define", " -- : make new Forth word, set compile mode")\
X(0, IMMEDIATE, "immediate", " -- : make a Forth word immediate")\
X(0, READ, "read", " c\" xxx\" -- : read Forth word, execute it")\
X(1, LOAD, "@", "addr -- u : load a value")\
X(2, STORE, "!", "u addr -- : store a value")\
X(1, CLOAD, "c@", "c-addr -- u : load character value")\
X(2, CSTORE, "c!", "u c-addr -- : store character value")\
X(2, SUB, "-", "u1 u2 -- u3 : subtract u2 from u1 yielding u3")\
X(2, ADD, "+", "u u -- u : add two values")\
X(2, AND, "and", "u u -- u : bitwise and of two values")\
X(2, OR, "or", "u u -- u : bitwise or of two values")\
X(2, XOR, "xor", "u u -- u : bitwise exclusive or of two values")\
X(1, INV, "invert", "u -- u : invert bits of value")\
X(2, SHL, "lshift", "u1 u2 -- u3 : left shift u1 by u2")\
X(2, SHR, "rshift", "u1 u2 -- u3 : right shift u1 by u2")\
X(2, MUL, "*", "u u -- u : multiply to values")\
X(2, DIV, "/", "u1 u2 -- u3 : divide u1 by u2 yielding u3")\
X(2, ULESS, "u<", "u u -- bool : unsigned less than")\
X(2, UMORE, "u>", "u u -- bool : unsigned greater than")\
X(0, EXIT, "exit", " -- : return from a word definition")\
X(0, KEY, "key", " -- char : get one character of input")\
X(1, EMIT, "_emit", " char -- status : get one character of input")\
X(0, FROMR, "r>", " -- u, R: u -- : move from return stack")\
X(1, TOR, ">r", "u --, R: -- u : move to return stack")\
X(0, BRANCH, "branch", " -- : unconditional branch")\
X(1, QBRANCH, "?branch", "u -- : branch if u is zero")\
X(1, PNUM, "(.)", "u -- n : print a number returning an error on failure")\
X(1, COMMA, ",", "u -- : write a value into the dictionary")\
X(2, EQUAL, "=", "u u -- bool : compare two values for equality")\
X(2, SWAP, "swap", "x1 x2 -- x2 x1 : swap two values")\
X(1, DUP, "dup", "u -- u u : duplicate a value")\
X(1, DROP, "drop", "u -- : drop a value")\
X(2, OVER, "over", "x1 x2 -- x1 x2 x1 : copy over a value")\
X(0, TAIL, "tail", " -- : tail recursion")\
X(0, FIND, "find", "c\" xxx\" -- addr | 0 : find a Forth word")\
X(0, DEPTH, "depth", " -- u : get current stack depth")\
X(0, SPLOAD, "sp@", " -- addr : load current stack pointer ")\
X(0, SPSTORE, "sp!", " addr -- : modify the stack pointer")\
X(0, CLOCK, "clock", " -- u : push a time value")\
X(3, EVALUATOR, "evaluator", "c-addr u 0 | file-id 0 1 -- u : evaluate file/str")\
X(0, PSTK, ".s", " -- : print out values on the stack")\
X(1, RESTART, "restart", " error -- : restart system, cause error")\
X(0, CALL, "call", "n1...nn c -- n1...nn c : call a function")\
X(2, SYSTEM, "system", "c-addr u -- bool : execute system command")\
X(1, FCLOSE, "close-file", "file-id -- ior : close a file")\
X(3, FOPEN, "open-file", "c-addr u fam -- open a file")\
X(2, FDELETE, "delete-file", "c-addr u -- ior : delete a file")\
X(3, FREAD, "read-file", "c-addr u file-id -- u ior : write block")\
X(3, FWRITE, "write-file", "c-addr u file-id -- u ior : read block")\
X(1, FPOS, "file-position", "file-id -- u : get the file position")\
X(2, FSEEK, "reposition-file", "file-id u -- ior : reposition file")\
X(1, FFLUSH, "flush-file", "file-id -- ior : flush a file")\
X(4, FRENAME, "rename-file", "c-addr1 u1 c-addr2 u2 -- ior : rename file")\
X(0, TMPFILE, "temporary-file", "-- file-id ior : open a temporary file")\
X(1, RAISE, "raise", "signal -- bool : raise a signal")\
X(0, DATE, "date", " -- date : push the time")\
X(3, MEMMOVE, "memory-copy", " r-addr1 r-addr2 u -- : move a block of memory from r-addr2 to r-addr1")\
X(3, MEMCHR, "memory-locate", " r-addr char u -- r-addr | 0 : locate a character memory")\
X(3, MEMSET, "memory-set", " r-addr char u -- : set a block of memory")\
X(3, MEMCMP, "memory-compare", " r-addr1 r-addr2 u -- u : compare two blocks of memory")\
X(1, ALLOCATE, "allocate", " u -- r-addr ior : allocate a block of memory")\
X(1, FREE, "free", " r-addr1 -- ior : free a block of memory")\
X(2, RESIZE, "resize", " r-addr u -- r-addr ior : resize a block of memory")\
X(2, GETENV, "getenv", " c-addr u -- r-addr u : return an environment variable")\
X(0, LAST_INSTRUCTION, NULL, "")
/** // @todo Implement these instructions?
X(1, MLOAD, "m@", "raddr -- u : load a value, non-relative")\
X(2, MSTORE, "m!", "u r-addr -- u : store a value, non-relative")\
X(1, MCLOAD, "mc@", "rc-addr -- u : load character value, non-relative")\
X(2, MCSTORE, "mc!", "u rc-addr -- : store character value, non-relative")\
*/
/**
@brief All of the instructions that can be used by the Forth virtual machine.
**/
enum instructions {
#define X(STACK, ENUM, STRING, HELP) ENUM,
XMACRO_INSTRUCTIONS
#undef X
};
/**
So that we can compile programs we need ways of referring to the basic
programming constructs provided by the virtual machine, theses words are
fed into the C function **compile** in a process described later.
**LAST_INSTRUCTION** is not an instruction, but only a marker of the last
enumeration used in **enum instructions**, so it does not get a name.
**/
static const char *instruction_names[] = { /**< instructions with names */
#define X(STACK, ENUM, STRING, HELP) STRING,
XMACRO_INSTRUCTIONS
#undef X
};
/**
This contains an array of values that are the minimum number of values
needed on the stack before a word can execute.
**/
static const int stack_bounds[] = { /**< number stack variables needed*/
#define X(STACK, ENUM, STRING, HELP) STACK,
XMACRO_INSTRUCTIONS
#undef X
};
/**
This X-Macro contains a list of constants that will be available to the
Forth interpreter.
**/
#define X_MACRO_CONSTANTS\
X("dictionary-start", DICTIONARY_START, "start of dictionary")\
X("r/o", FAM_RO, "read only file access method")\
X("r/w", FAM_RW, "read/write file access method")\
X("w/o", FAM_WO, "write only file access method")\
X("size", sizeof(forth_cell_t), "size of forth cell in bytes")\
X("#tib", MAXIMUM_WORD_LENGTH * sizeof(forth_cell_t), "")\
X("tib", STRING_OFFSET * sizeof(forth_cell_t), "")\
X("SIGABRT", -SIGABRT+BIAS_SIGNAL, "SIGABRT value")\
X("SIGFPE", -SIGFPE +BIAS_SIGNAL, "SIGFPE value")\
X("SIGILL", -SIGILL +BIAS_SIGNAL, "SIGILL value")\
X("SIGINT", -SIGINT +BIAS_SIGNAL, "SIGINT value")\
X("SIGSEGV", -SIGSEGV+BIAS_SIGNAL, "SIGSEGV value")\
X("SIGTERM", -SIGTERM+BIAS_SIGNAL, "SIGTERM value")\
X("bias-signal", BIAS_SIGNAL, "bias added to signals")\
X("bias-errno", BIAS_ERRNO, "bias added to errnos")\
X("instruction-mask", INSTRUCTION_MASK, "instruction mask for CODE field")\
X("word-mask", WORD_MASK, "word length mask for CODE field")\
X("hidden-bit", WORD_HIDDEN_BIT_OFFSET, "hide bit in CODE field")\
X("hidden-mask", 1u << WORD_HIDDEN_BIT_OFFSET, "hide mask for CODE ")\
X("compile-bit", COMPILING_BIT_OFFSET, "compile/immediate bit in CODE field")\