diff --git a/Makefile b/Makefile index cc92c5619..c20141f40 100644 --- a/Makefile +++ b/Makefile @@ -7,9 +7,9 @@ NETMASK=255,255,255,248 SRC = system syseng sysen1 sysen2 sysen3 sysnet kshack dragon channa \ midas _teco_ emacs emacs1 rms klh syshst sra mrc ksc eak gren \ bawden _mail_ l lisp libdoc comlap lspsrc nilcom rwk \ - inquir acount gz sys decsys ecc alan sail kcc -DOC = info _info_ sysdoc sysnet kshack _teco_ emacs emacs1 -BIN = sys2 device emacs _teco_ lisp liblsp alan inquir sail comlap + inquir acount gz sys decsys ecc alan sail kcc kcc_sy c +DOC = info _info_ sysdoc sysnet kshack _teco_ emacs emacs1 c kcc +BIN = sys2 device emacs _teco_ lisp liblsp alan inquir sail comlap c # These directories are put on the minsys tape. MINSYS = _ sys diff --git a/README.md b/README.md index ad19fa42c..05bfe5857 100644 --- a/README.md +++ b/README.md @@ -163,6 +163,7 @@ A list of [known ITS machines](doc/machines.md). - INSTAL, install executables on other ITS machines. - ITSDEV, ITS device server. - JOBS, list jobs by category. + - KCC, C compiler (binary only). - LISP, lisp interpreter and runtime library (autoloads only). - LOADP, displays system load. - LOCK, shut down system. diff --git a/bin/c/[clib].11 b/bin/c/[clib].11 new file mode 100755 index 000000000..748826692 Binary files /dev/null and b/bin/c/[clib].11 differ diff --git a/bin/c/[clib].14 b/bin/c/[clib].14 new file mode 100755 index 000000000..796705586 Binary files /dev/null and b/bin/c/[clib].14 differ diff --git a/bin/c/[clib].16 b/bin/c/[clib].16 new file mode 100755 index 000000000..2f860a24d Binary files /dev/null and b/bin/c/[clib].16 differ diff --git a/bin/c/[clib].9 b/bin/c/[clib].9 new file mode 100644 index 000000000..40d45796c Binary files /dev/null and b/bin/c/[clib].9 differ diff --git a/bin/c/[crel].16 b/bin/c/[crel].16 new file mode 100755 index 000000000..f64a7261d Binary files /dev/null and b/bin/c/[crel].16 differ diff --git a/bin/c/ts.cc b/bin/c/ts.cc new file mode 100755 index 000000000..74c24252e Binary files /dev/null and b/bin/c/ts.cc differ diff --git a/bin/c/}c.bin b/bin/c/}c.bin new file mode 100755 index 000000000..08a21dd16 Binary files /dev/null and b/bin/c/}c.bin differ diff --git a/bin/c/}lp.bin b/bin/c/}lp.bin new file mode 100755 index 000000000..100de5ffa Binary files /dev/null and b/bin/c/}lp.bin differ diff --git a/bin/c/}m.bin b/bin/c/}m.bin new file mode 100755 index 000000000..1e7981d6f Binary files /dev/null and b/bin/c/}m.bin differ diff --git a/doc/_info_/cc.recent b/doc/_info_/cc.recent new file mode 100755 index 000000000..224d64dfb --- /dev/null +++ b/doc/_info_/cc.recent @@ -0,0 +1,53 @@ +18 April 1977 + +--- C --- + +C is an implementation language, similar to BCPL except with data +types. Further information is be available from Alan Snyder (AS@DM). +C is currently available only on DM and MC. + +--- Compiling --- + +CC is the C compiler command. Usage is + + :CC file1.name file2.name ... + +where the arguments are the path names of C source files which are to +be compiled. Each file will be compiled in turn, and if the +compilation is successful, the resulting relocatable file will be +placed in the file file?.rel. Arguments beginning with the '-' +character are taken to be compiler options. Available options include: + + -c compile only, do not assemble + -g do not delete MIDAS file + -x syntax check only + -s produce a symbol table listing + -b compile big function (FUNCTION TOO LARGE) + +For example, the command + + :cc foo.c + +would compile the C program in the file FOO C in the current +directory, and place the resulting relocatable program in the file +FOO REL. + +--- Loading --- + +Relocatable programs produced by the C compiler are loaded together +with the C support routines by using the STINKR loader. In order +to load files FOO REL, BAR REL, and BLETCH REL and produce a +runnable file TS FOO, type the following to STINKR: + + x c/clib + l foo + l bar + l bletch + o ts.foo + ^@ + +The ^@ (ASCII NUL) terminates the teletype input file. These +commands (minus the ^@) could also be written in a file, say +FOO STINKR, in which case one could invoke STINKR with FOO +as a JCL argument and STINKR would read the commands from the +command file. diff --git a/doc/c/c.info b/doc/c/c.info new file mode 100755 index 000000000..470d97c6d --- /dev/null +++ b/doc/c/c.info @@ -0,0 +1,249 @@ +C Info (26 August 1980) + +--- C --- + +C is an implementation language, similar to BCPL except with +data types. It is the primary language used on Unix systems. +This implementation runs under the ITS and TOPS-20 operating +systems. This implementation is moderately compatible with +the Unix C implementation. However, the Unix system calls +are NOT implemented. + +Some portions of the UNIX Standard I/O library are +implemented by C10STD. See C10STD.C (in on XX, +and in ITS machines in the C; directory) for details. + +Further information is available from Eliot Moss (EBM @ XX). + +--- Compiling --- + +CC is the C compiler command. Usage is + + :cc file1.c file2.c ... + (more on pathnames below in I/O discussion) + +where the arguments are the path names of C source files which +are to be compiled. Each file will be compiled in turn, and if +the compilation is successful, the resulting relocatable file +will be placed in the file "file*.stk". Arguments beginning +with the '-' character are taken to be compiler options. +Available options include: + + -c compile only, do not assemble + -g do not delete MIDAS file + -x syntax check only + -s produce symbol table (not very useful) + -b compile big function (FUNCTION TOO LARGE) + +For example, the command + + :cc foo.c + +would compile the C program in the file "foo.c" in the current +directory, and place the resulting relocatable program in the +file "foo.stk". + +--- Loading --- + +Relocatable programs produced by the C compiler are loaded +together with the C support routines using the STINKR loader. +To load program files "foo", "bar", and "bletch" and produce a +runnable file "foo", type the following to STINKR: + + x c/clib + l foo + l bar + l bletch + o ts.foo + ^@ + +The ^@ (ASCII NUL) terminates the terminal input file. These +commands (minus the ^@) could also be written in a file, say +"foo.stinkr", in which case one could invoke STINKR by saying + + :stinkr foo + +and STINKR would read the commands from the command file. + +--- Library --- + +The above STINKR commands will load in a set of library routines +for performing I/O, etc. Here is an introduction to that +library (the source files, which are in on XX, are the +definitive documentation). + +Here are some handly declarations for use below: + +char c; /* an ASCII character */ +int i, n, cc; /* an integer */ +int *p; /* an integer pointer */ +int b; /* a boolean */ +char *s, *s1, *s2; /* strings */ +char *fn; /* a file name or a path name (defined below) */ +FILE *fd; /* a "file descriptor" (can be declared INT) */ + +/* I/O routines -- see also C10IO, C10MIO */ + +/* COPEN - opens an I/O channel */ + +fd = copen (fn, mode, options); /* open file */ + char *fn; /* file name */ + char mode; /* 'r', 'w', or 'a' (append) */ + char *options; /* 0 (char I/O), "s" (string file), "b" (binary) */ + /* for string file, pass string as fn */ + + /* returns - if open fails */ + +/* Other routines: */ + +cprint (fd, s, a1, a2, ...); /* print s on fd */ + /* s is printed on the output file fd. If fd is not supplied, + it defaults to cout, the standard output, i.e., + cprint ("foo", ...) defaults fd to cout. s may contain + format descriptors, of the following forms: + + %c - print a character + %d - print an integer in decimal + %o - print an integer in octal + %s - print a string + %z - print N copies of a character (N + supplied via field width - read on) + + These use the other arguments, a1, a2, etc., in order. + For example, + cprint (fd, "%c plus %d equals %s.\n", '2', 1+1, "4") + results in + 2 plus 2 equals 4. + + The format letter may be preceded by a number, giving + the minimum field width. The output is normally right + justified in the field, but is left justified in a field + of size at least N if -N is given as the width. Spaces + are the normal padding character, used to fill out if + the argument does not fill the specified number of + columns. If a '0' character immediately follows the + '%', then '0' will be used as the padding character. + If the character following the '0' is not a digit, then + it will be used as the padding character instead of '0'. + Here are some examples of cprint (s, 312): + + s = "%d" ==> 312 + s = "%9d" ==> 312 + s = "%09d" ==> 000000312 + s = "%0*9d" ==> ******312 + s = "%-0*9d" ==> 312****** + s = "%o" ==> 470 + + For a more complete explanation, read CPRINT.C. It is + even possible to define your own formats, etc. */ + +c = cgetc (fd); /* get character (text input) */ +c = cputc (c, fd); /* put character (text output) */ + +/* Note: these are redundant because cgetc and cputc do the +right thing for binary files */ + +i = cgeti (fd); /* get integer (binary input) */ +i = cputi (i, fd); /* put integer (binary output) */ + + +ungetc (c, fd); /* push unit back on input stream */ + /* Several (but not very many) units may be pushed back + on any channel. */ + +b = ceof (fd); /* indicates whether the channel is at + end of file; always FALSE for output. */ + +cflush (fd); /* flush the buffer -- writes any not + yet written output */ + +cclose (fd); /* close channel, after doing a cflush + for any buffered output */ + +c = getchar (); /* cgetc (cin); */ +s = gets (s); /* reads from cin until a newline, + storing into s. Returns the original + argument; does not store the newline. */ + +putchar (c); /* cputc (c, cout); */ +puts (s); /* writes all of s to cout, and a + newline afterwards */ + +/* FILENAMES -- see also C10FNM */ + +/* The standard ITS format filenames are supported, with the +following terminology: + + dev:dir;name type + + dev - device + dir - directory + name - first file name + type - second file name + +All these components are stored WITHOUT punctuation, as sixbit +words, once parsed. UNIX format ("pathnames") is also supported, +for ease in designing programs to run on more than one of TOPS20, +ITS, UNIX, and VAX/VMS. The interpretation is as follows: + + /dev/dir1/name.type converts to + dev:dir;name type + +If there is no leading /, there is no dev part. The available +routines are described below. */ + +fnparse (old, dev, dir, nm, typ, gen, attr) + char *old, *dev, *dir, *nm, *typ, *gen, *attr; + /* parses a name into its components; the gen and attr + components will always be null, but are there for + generality of interface to other operating systems. */ + +char *fngdv (old, buf) char *old, *buf; +char *fngdr (old, buf) char *old, *buf; +char *fngnm (old, buf) char *old, *buf; +char *fngtp (old, buf) char *old, *buf; +char *fnggn (old, buf) char *old, *buf; +char *fngat (old, buf) char *old, *buf; + /* extract the relevant component only */ + +fncons (buf, dev, dir, nm, typ, gen, attr) + char *buf, *dev, *dir, *nm, *typ, *gen, *attr; + /* construct a filename (ITS format) from its components */ + +char *fnsdf (buf, old, dv, dir, nm, typ, gen, attr) + char *old, *buf, *dv, *dir, *nm, *typ, *gen, *attr; + /* buf is set to contain a new filename with the + supplied components substituted for null components of + the filename in old */ + +char *fnsfd (buf, old, dv, dir, nm, typ, gen, attr) + char *old, *buf, *dv, *dir, *nm, *typ, *gen, *attr; + + /* buf is set to contain a new filename with the + supplied components substituted for components of the + filename in old. This is different from fnsdf in that + non-zero component arguments (dv, dir, etc.) are + substituted even if old is not null in the corresponding + field. */ + + /* a FILESPEC is a structure containing the 4 components + as sixbit integers ... */ + +fparse (old, fs) char *old; filespec *fs; + /* parse a filename, handles both ITS and UNIX formats */ + +prfile (fs, buf) filespec *fs; char *buf; + /* converts the filespec to a string, inserting standard + punctuation */ + + +/* STORAGE ALLOCATION */ + +p = salloc (n); /* allocate n words, return pointer to it */ +sfree (p); /* free storage allocated by salloc */ +s = calloc (n); /* allocate n characters, return ptr to it */ +cfree (s); /* free storage allocated by calloc */ + +/* In this implementation words and chars are the same; but it +is bad style to depend on it, for some day packed strings might +be implemented ... */ diff --git a/doc/c/c.mail b/doc/c/c.mail new file mode 100755 index 000000000..6cf5403b8 --- /dev/null +++ b/doc/c/c.mail @@ -0,0 +1,229 @@ +Received: from grape.ARPA by MC.LCS.MIT.EDU 6 Jun 86 02:07:00 EDT +Date: Thu, 5 Jun 86 23:06:03 pdt +From: jeff@aids-unix (Jeff Dean) +To: -s@ads.ARPA, jeff@ads.ARPA, c@mc + +#ifdef HEADER +From dan@rna Sat Jan 26 10:38:50 1985 +Received: from usenet by BRL-TGR.ARPA id a002453; 26 Jan 85 12:09 EST +From: Dan Ts'o +Newsgroups: net.sources +Subject: Multiple column filter +Message-ID: <355@rna.UUCP> +Date: 26 Jan 85 01:59:08 GMT +Xref: seismo net.sources:2469 +To: unix-sources@BRL-TGR.ARPA + + Here is a filter "mc" which rearranges input lines to multicolumned +output. That is, + + ls | mc + +is like + + ls -C (On Berkeley systems) + + There are other ways of producing multicolumned output, of course. +Two string to mind: + + 1) + ls | pr -t -4 -l1 + + 2) + ls | paste - - - - + + However, neither of these filters duplicates the "ls -C" output, +where the columns proceed downward first, then on to the next column, which +is what "mc" does. + The original code came from the old Harvard UNIX V6 systems, but I +had to modify it a bit to move out of the V6 PDP-11 world. + To compile it, + cc -O mc.c -ltermlib + although, if you don't have libtermlib.a, commenting out three lines +would do fine. In addition, + mc -132 + specifies a 132-column output. Normally, columns is taken from TERMCAP. + I find mc useful for all sorts of general purpose re-formatting, like +directory listing of other OS's and long columns of numbers. + + Cheers, + Dan Ts'o + Dept. Neurobiology + Rockefeller Univ. + 1230 York Ave. + NY, NY 10021 + 212-570-7671 + ...cmcl2!rna!dan + +#endif +/* + * mc - Multiple column filter + * Transform lines of input into listing of multiple columns + * Original code from Harvard V6 Unix + * Updated and reworked by Dan Ts'o, Rockefeller Univ. + * Now has cat-like syntax: + * mc [-] [file ...] + */ +#include + +#define MEMINCR 1024L /* Memory buffer increments */ +#define ZSTACK1 (-1) /* An impossible (char *) (Sorry) */ + +char *nodename; +int width; +char *malloc(), **stack1(); +char *getenv(); + +main(c,v) +char **v; +{ + register int i,f; + register char *cp; + FILE *fd; + char tbuf[1024]; +/* Uncomment if your stdio doesn't buffer stdout + char buf[BUFSIZ]; + + setbuf(stdout, buf); + */ + f = 0; + nodename = *v; + cp = getenv("TERM"); + if (cp == NULL || tgetent(tbuf, cp) <= 0 + || (width = tgetnum("co")-1) < 8) + width = 79; + while (c > 1 && v[1][0] == '-') { + c--; + v++; + switch (v[0][1]) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + width = atoi(&v[0][1]); + break; + default: + error("%s: Bad option\n", *v); + } + } + + if (--c == 0) + exit(xfer(stdin)); + else { + while (c--) { + v++; + if (**v == '-' && v[0][1] == 0) { + xfer(stdin); + clearerr(stdin); + } + else { + if ((fd = fopen(*v, "r")) == NULL) { + fprintf(stderr, "%s: %s: Can't open\n", + nodename, *v); + f++; + } + else { + xfer(fd); + fclose(fd); + } + } + } + } + exit(f ? -1 : 0); + +} + +xfer(fin) +FILE *fin; +{ + register int i; + register char *cp; + char line[1024]; + int max, items, columns; + int row_p, index, rows, col_p; + char **bot; + + items = max = 0; + while(fgets(line, sizeof line, fin) != NULL) { + i = strlen(line); + if (line[--i] == '\n') + line[i] = 0; + else + i++; + if(i >= width) { + error("Line of length=%d is too long for width=%d\n", + i, width); + } + if((cp = (char *)malloc(i+1)) == NULL) + error("Out of memory\n"); + strcpy(cp, line); + bot = stack1(cp); + if(i > max) max = i; + items++; + } + columns = width / (max+1); + rows = (items + columns - 1) / columns; + for(row_p = 0; row_p < rows; row_p++) { + for(col_p = 0; col_p < columns; col_p++) { + index = (col_p * rows) + row_p; + if(index >= items) + continue; + if((col_p + 1) * rows + row_p >= items) + printf("%s", bot[items - index - 1]); + else + printf("%-*s ", max, bot[items - index - 1]); + } + printf("\n"); + } + fflush(stdout); + stack1(ZSTACK1); + return ferror(fin); +} + +error(a, b, c, d, e) +{ + fprintf(stderr, "%s: ", nodename); + fprintf(stderr, a, b, c, d, e); + exit (1); +} + +char **stack1(s) +char *s; +{ + static char **s_beg, **s_end; + static long nbuf = 0; + register char **v, **u; + register long n; + + if (s == (char *) ZSTACK1) { + if (nbuf > 0) + free(s_beg); + nbuf = 0; + return 0; + } + if (nbuf == 0 || s_end <= s_beg) { + n = MEMINCR * (nbuf+1); + v = (char **)malloc(n*(sizeof (char *))); + if (v == NULL) + error("mc: Out of memory\n"); + s_beg = v; + v += n; + if (nbuf > 0) { + u = s_end+(MEMINCR*nbuf); + while (u > s_end) + *--v = *--u; + free(s_end); + } + nbuf++; + s_end = v; + } + *--s_end = s; + return s_end; +} + + +Date: Wed, 4 Jun 86 07:59:50 EDT +From: "R. P. Miller" +To: C%MX.LCS.MIT.EDU@MC.LCS.MIT.EDU +Message-ID: <[MX.LCS.MIT.EDU].924252.860604.ARPEE> + +mail + diff --git a/doc/c/c.refman b/doc/c/c.refman new file mode 100755 index 000000000..d1c38fbf3 --- /dev/null +++ b/doc/c/c.refman @@ -0,0 +1,2292 @@ + + + + C Reference Manual + + + 2 March 1977 + + + Dennis M. Ritchie + Bell Telephone Laboratories + Murray Hill, New Jersey 07974 + + + Alan Snyder + Laboratory for Computer Science + Massachusetts Institute of Technology + + + + +1. Introduction + C is a computer language based on the earlier language B [1], +itself a descendant of BCPL [3]. C differs from B and BCPL +primarily by the introduction of types, along with the +appropriate extra syntax and semantics. + Most of the software for the UNIX time-sharing system [4] is +written in C, as is the operating system itself. In addition to +the UNIX C compiler, there exist C compilers for the HIS 6000 and +the IBM System/370 [2]. This manual describes the C programming +language as implemented by the portable C compiler [6]. It is a +revision by the second author of the original C Reference Manual +(contained in [5]), which describes the UNIX C compiler. +Differences with respect to the UNIX C compiler and undesirable +limitations of the current portable C compiler are described in +footnotes to this document. + The report ``The C Programming Language'' [5] contains a +tutorial introduction to C and a description of a set of portable +I/O routines, concerned primarily with I/O. + +2. Lexical conventions + There are six kinds of tokens: identifiers, keywords, +constants, strings, expression operators, and other separators. +In general blanks, tabs, newlines, and comments as described +below are ignored except as they serve to separate tokens. At +least one of these characters is required to separate otherwise +adjacent identifiers, constants, and certain operator-pairs. + If the input stream has been parsed into tokens up to a given +character, the next token is taken to include the longest string +of characters which could possibly constitute a token. + +2.1 Comments + The characters /* introduce a comment, which terminates with +the characters */. Comments thus may not be nested. + C Reference Manual - 2 + + +2.2 Identifiers (Names) + An identifier is a sequence of letters and digits; the first +character must be alphabetic. The underscore ``_'' counts as +alphabetic. Upper and lower case letters are not distinguished. +There is no limit placed on the length of identifiers; all +characters of internal identifiers are significant. However, the +number of significant characters in external identifiers (i.e., +function names and names of external variables) may be limited by + 1 +the operating system to as few as the first five characters. +This limitation on external identifiers can be circumvented, to +some extent, by using the token replacement facility (described +in section 12.1). + +2.3 Keywords + The following identifiers are reserved for use as keywords, and +may not be used otherwise: + + int break + char continue + float if + double else + long goto + short return + unsigned entry + struct for + auto do + extern while + register switch + static case + sizeof default + typedef + +The entry keyword is not currently implemented by any compiler +but is reserved for future use. + +2.3 Constants + There are several kinds of constants, as follows: + +2.3.1 Integer constants + An integer constant is a sequence of digits. An integer is +taken to be octal if it begins with 0, hexadecimal if it begins +with 0x (or 0X), and decimal otherwise. The digits 8 and 9 have +octal value 10 and 11 respectively. An integer constant + + + +_________________________ + 1 + The UNIX C compiler distinguishes upper and lower case in all +identifiers and accepts keywords only in lower case. In +addition, the UNIX C compiler treats only the first eight +characters of internal identifiers and the first seven characters +of external identifiers as significant. + C Reference Manual - 3 + + + 2 +immediately followed by l or L is a long integer constant. + +2.3.2 Character constants + 3 + A character constant consists of a single ASCII character +enclosed in single quotes `` ' ''. Within a character constant a +single quote must be preceded by a back-slash ``\''. Certain +non-graphic characters, and ``\'' itself, may be escaped + 4 +according to the following table: + + BS \b + NL \n + CR \r + HT \t + VT \v + FF \p + ddd \ddd + \ \\ + +The escape ``\ddd'' consists of the backslash followed by 1, 2, +or 3 octal digits which are taken to specify the value of the +desired character. A special case of this construction is ``\0'' +(not followed by a digit) which indicates a null character. + Character constants behave exactly like integers whose value is + 5 +the corresponding ASCII code. They do not behave like objects +of character type. + +2.3.3 Floating constants + A floating constant consists of an integer part, a decimal +point, a fraction part, an e (or E), and an optionally signed +integer exponent. The integer and fraction parts both consist of +a sequence of digits. Either the integer part or the fraction +part (not both) may be missing; either the decimal point or the e +and the exponent (not both) may be missing. Every floating +constant is taken to be double-precision. + + + +_________________________ + 2 + A long integer constant is equivalent to an integer constant +in the portable C compiler. + 3 + The UNIX C compiler allows 2 characters in character +constants. Other compilers may allow as many characters as can +be packed into a machine word. The order of packed characters in +a machine word is machine-dependent. + 4 + The UNIX C compiler does not recognize \v or \p. + 5 + On UNIX, character constants range in value from -128 to 127. + C Reference Manual - 4 + + +2.4 Strings + A string is a sequence of characters surrounded by double +quotes `` " ''. A string has the type array-of-characters (see +below) and refers to an area of storage initialized with the +given characters. The compiler places a null byte ( \0 ) at the +end of each string so that programs which scan the string can +find its end. In a string, the character `` " '' must be +preceded by a ``\'' ; in addition, the same escapes as described +for character constants may be used. + +String constants are constant, i.e., they may not be modified. + +3. Syntax notation + In the syntax notation used in this manual, syntactic +categories are indicated by italic type, and literal words and +characters in gothic. Alternatives are listed on separate lines. +An optional terminal or non-terminal symbol is indicated by the +subscript ``opt,'' so that + + { expression } + opt +would indicate an optional expression in braces. + +4. What's in a Name? + C bases the interpretation of an identifier upon two attributes +of the identifier: its storage class and its type. The storage +class determines the location and lifetime of the storage +associated with an identifier; the type determines the meaning of +the values found in the identifier's storage. + There are four declarable storage classes: automatic, static, +external, and register. Automatic variables are created upon +each invocation of the function in which they are defined, and +are discarded on return. Static variables are local to a +function or to a group of functions defined in one source file, +but retain their values independently of function invocations. +External variables are independent of any function and accessible +by separately-compiled functions. Register variables are stored +(if possible) in the fast registers of the machine; like +automatic variables they are local to each function and disappear +on return. + C supports four fundamental types of objects: characters, +integers, single-, and double-precision floating-point numbers. + + Characters (declared, and hereinafter called, char) are + chosen from the ASCII set; they occupy the right-most seven + bits in a machine-dependent unit of storage called a byte. + Integers (int) are represented in 2's complement notation in + a machine-dependent unit of storage called a word. Integers + C Reference Manual - 5 + + + 1 + should be at least 16 bits long. + The precision and range of single precision floating point + (float) quantities and double-precision floating-point + (double, or long float) quantities are machine-dependent. + + Besides the four fundamental types there is a conceptually +infinite class of derived types constructed from the fundamental +types in the following ways: + + arrays of objects of most types; + functions which return objects of a given type; + pointers to objects of a given type; + structures containing objects of various types. + +In general these methods of constructing objects can be applied +recursively. + +5. Objects and lvalues + An object is a manipulatable region of storage; an lvalue is an +expression referring to an object. An obvious example of an +lvalue expression is an identifier. There are operators which +yield lvalues: for example, if E is an expression of pointer +type, then *E is an lvalue expression referring to the object to +which E points. The name ``lvalue'' comes from the assignment +expression ``E1 = E2'' in which the left operand E1 must be an +lvalue expression. The discussion of each operator below +indicates whether it expects lvalue operands and whether it +yields an lvalue. + +6. Conversions + A number of operators may, depending on their operands, cause +conversion of the value of an operand from one type to another. +This section explains the result to be expected from such +conversions. + +6.1 Characters and integers + A char object may be used anywhere an int may be. In all cases +the char is converted to an int by extending the character value + 2 +with high-order zero bits. + +_________________________ + 1 + The UNIX C compiler implements a longer variety of integer +(declared as long or long int) and unsigned integers (declared as +unsigned or unsigned int), for which most int operations are +applicable. The portable C compiler treats long int, short int, +and unsigned int as synonymous with int. + 2 + On the PDP-11, a character is converted to an integer by +propagating its sign through the upper 8 bits of the resultant +integer. Thus, it is possible to have (non-ASCII) characters +with negative values. + C Reference Manual - 6 + + +6.2 Float and double + All floating arithmetic in C is carried out in +double-precision. Whenever a float appears in an expression, it +is lengthened to double by zero-padding its fraction. When a +double must be converted to float, for example by an assignment, +the double is rounded before truncation to float length. + +6.3 Float and double; integer and character + Ints and chars may be converted to float or double; truncation +may occur for some values. Conversion of float or double to int + 1 +or char takes place with rounding. Again, erroneous results are +possible for some values. + +6.4 Pointers and integers + Integers may be added to pointers; in such cases the int is +converted as specified in the discussion of the addition +operator. + Two pointers to objects of the same type may be subtracted; in +this case the result is converted to an integer as specified in +the discussion of the subtraction operator. + +7. Expressions + The precedence of expression operators is the same as the order +of the major subsections of this section (highest precedence +first). Thus the expressions referred to as the operands of + +(section 7.4) are those expressions defined in sections 7.1-7.3. +Within each subsection, the operators have the same precedence. +Left- or right-associativity is specified in each subsection for +the operators discussed therein. The precedence and +associativity of all the expression operators is summarized in an +appendix. + Unless otherwise noted, the order of evaluation of expressions +is undefined. In particular the compiler considers itself free +to compute subexpressions in the order it believes most +efficient, even if the subexpressions involve side effects. + +7.1 Primary expressions + Primary expressions involving . , ->, subscripting, and +function calls group left to right. + +7.1.1 identifier + An identifier is a primary expression, provided it has been +suitably declared as discussed below. Its type is specified by +its declaration. However, if the type of the identifier is +``array of . . .'', then the value of the identifier-expression +is a pointer to the first object in the array, and the type of +the expression is ``pointer to . . .''. Moreover, an array +identifier is not an lvalue expression. + Likewise, an identifier which is declared ``function returning + +_________________________ + 1 + On UNIX, this conversion involves truncation towards 0. + C Reference Manual - 7 + + +. . .'', when used except in the function-name position of a +call, is converted to ``pointer to function returning . . .''. + +7.1.2 constant + A decimal, octal, character, or floating constant is a primary +expression. Its type is int in the first three cases, double in +the last. + +7.1.3 string + A string is a primary expression. Its type is originally +``array of char''; but following the same rule as in section +7.1.1 for identifiers, this is modified to ``pointer to char'' +and the result is a pointer to the first character in the string. + +7.1.4 ( expression ) + A parenthesized expression is a primary expression whose type +and value are identical to those of the unadorned expression. +The presence of parentheses does not affect whether the +expression is an lvalue. + +7.1.5 primary-expression [ expression ] + A primary expression followed by an expression in square +brackets is a primary expression. The intuitive meaning is that +of a subscript. Usually, the primary expression has type +``pointer to . . .'', the subscript expression is int, and the +type of the result is `` . . . ''. The expression ``E1[E2]'' is +identical (by definition) to ``* (( E1 ) + ( E2 )) ''. All the +clues needed to understand this notation are contained in this +section together with the discussions in sections 7.1.1, 7.2.1, +and 7.4.1 on identifiers, *, and + respectively; section 14.3 +below summarizes the implications. + +7.1.6 primary-expression ( expression-list ) + opt + A function call is a primary expression followed by parentheses +containing a possibly empty, comma-separated list of expressions +which constitute the actual arguments to the function. The +primary expression must be of type ``function returning . . .'', +and the result of the function call is of type `` . . . ''. As +indicated below, a hitherto unseen identifier followed +immediately by a left parenthesis is contextually declared to +represent a function returning an integer; thus in the most +common case, integer-valued functions need not be declared. + Any actual arguments of type float are converted to double +before the call; any of type char are converted to int. + In preparing for the call to a function, a copy is made of each +actual parameter; thus, all argument-passing in C is strictly by +value. A function may change the values of its formal +parameters, but these changes cannot possibly affect the values +of the actual parameters. On the other hand, it is perfectly +possible to pass a pointer on the understanding that the function +may change the value of the object to which the pointer points. +Note that the order of evaluation of function arguments is not +defined. + Recursive calls to any function are permissible. + C Reference Manual - 8 + + +7.1.7 primary-lvalue . member-of-structure + An lvalue expression followed by a dot followed by the name of +a member of a structure is a primary expression. The object +referred to by the lvalue must be of a structure type, and the + 1 +member-of-structure must be a member of that structure. The +result of the expression is an lvalue appropriately offset from +the origin of the given lvalue whose type is that of the named +structure member. + Structures are discussed in section 8.5. + +7.1.8 primary-expression -> member-of-structure + The primary-expression must be a pointer to a structure and the + 2 +member-of-structure must be a member of that structure type. +The result is an lvalue appropriately offset from the origin of +the pointed-to structure whose type is that of the named +structure member. + The expression ``E1->MOS'' is exactly equivalent to +``(*E1).MOS''. + +7.2 Unary operators + Expressions with unary operators group right-to-left. + +7.2.1 * expression + The unary * operator means indirection: the expression must be +a pointer, and the result is an lvalue referring to the object to +which the expression points. If the type of the expression is +``pointer to . . .'', the type of the result is `` . . . ''. + +7.2.2 & lvalue-expression + The result of the unary & operator is a pointer to the object +referred to by the lvalue-expression. If the type of the +lvalue-expression is `` . . . '', the type of the result is +``pointer to . . .''. + +7.2.3 - expression + The result is the negative of the expression. The type of the +expression must be char, int, float, or double. The type of the + 3 +result is int or double. + +_________________________ + 1 + The UNIX C compiler allows any primary-lvalue and assumes it +to have the same form as the structure containing the named +structure member. + 2 + The UNIX C compiler allows any primary-lvalue and assumes it +to be a pointer which points to an object of the same form as the +structure of which the member-of-structure is a part. + 3 + The UNIX C compiler defines the type of the result to be the +same as the type of the operand. + C Reference Manual - 9 + + +7.2.4 ! expression + The result of the logical negation operator ! is 1 if the value +of the expression is zero, 0 if the value of the expression is +non-zero. The type of the result is int. The allowable + 1 +expressions are those allowed by the if statement (section 9.3). + +7.2.5 ~ expression + The ~ operator yields the one's complement of its operand. The +type of the expression must be int or char, and the result is +int. + +7.2.6 ++ lvalue-expression + The object referred to by the lvalue expression is incremented. +The value is the new value of the lvalue expression and the type +is the type of the lvalue. If the expression is of a fundamental + 2 +type, it is incremented by 1; if it is a pointer to an object, +it is incremented by the length of the object. + +7.2.7 -- lvalue-expression + The object referred to by the lvalue expression is decremented +analogously to the ++ operator. + +7.2.8 lvalue-expression ++ + The result is the value of the object referred to by the lvalue +expression. After the result is noted, the object referred to by +the lvalue is incremented in the same manner as for the prefix ++ + 3 +operator: by 1 for an object of fundamental type, by the length +of the pointed-to object for a pointer. The type of the result +is the same as the type of the lvalue-expression. + +7.2.9 lvalue-expression -- + The result of the expression is the value of the object +referred to by the the lvalue expression. After the result is +noted, the object referred to by the lvalue expression is +decremented in a way analogous to the postfix ++ operator. + +7.2.10 sizeof expression + The sizeof operator yields the size, in bytes, of its operand. +When applied to an array, the result is the total number of bytes +in the array. The size is determined from the declarations of +the objects in the expression. The major use of sizeof is in + +_________________________ + 1 + The UNIX C compiler does not allow float or double operands. + 2 + The portable C compiler does not allow float or double +operands. + 3 + The portable C compiler does not allow float or double +operands. + C Reference Manual - 10 + + +communication with routines like storage allocators and I/O + 1 +systems. + +7.3 Multiplicative operators + The multiplicative operators *, /, and % group left-to-right. + +7.3.1 expression * expression + The binary * operator indicates multiplication. If both +operands are int or char, the result is int; if one is int or +char and one float or double, the former is converted to double, +and the result is double; if both are float or double, the result +is double. No other combinations are allowed. + +7.3.2 expression / expression + The binary / operator indicates division. The same type +considerations as for multiplication apply. + +7.3.3 expression % expression + The binary % operator yields the remainder from the division of +the first expression by the second. Both operands must be int or +char, and the result is int. The use of this operation is not +recommended for negative operands. + +7.4 Additive operators + The additive operators + and - group left-to-right. + +7.4.1 expression + expression + The result is the sum of the expressions. If both operands are +int or char, the result is int. If both are float or double, the +result is double. If one is char or int and one is float or +double, the former is converted to double and the result is +double. If an int or char is added to a pointer, the former is +converted by multiplying it by the length of the object to which +the pointer points and the result is a pointer of the same type +as the original pointer. Thus if P is a pointer to an object, +the expression ``P+1'' is a pointer to another object of the same +type as the first and immediately following it in storage. + No other type combinations are allowed. + +7.4.2 expression - expression + The result is the difference of the operands. If both operands +are int, char, float, or double, the same type considerations as +for + apply. If an int or char is subtracted from a pointer, the +former is converted in the same way as explained under + above. + If two pointers to objects of the same type are subtracted, the +result is converted (by division by the length of the object) to +an int representing the number of objects separating the +pointed-to objects. This conversion will in general give + +_________________________ + 1 + The UNIX C compiler allows this expression anywhere that a +constant is required. + C Reference Manual - 11 + + +unexpected results unless the pointers point to objects in the +same array, since pointers, even to objects of the same type, do +not necessarily differ by a multiple of the object-length. + +7.5 Shift operators + The shift operators << and >> group left-to-right. + +7.5.1 expression << expression +7.5.2 expression >> expression + Both operands must be int or char, and the result is int. The +second operand should be non-negative. The value of ``E1<>E2'' is E1 +(interpreted as a bit pattern) logically right-shifted E2 bit + 1 +positions. Vacated bits are filled by 0 bits. + +7.6 Relational operators + The relational operators group left-to-right, but this fact is +not very useful; ``a expression +7.6.3 expression <= expression +7.6.4 expression >= expression + The operators < (less than), > (greater than), <= (less than or +equal to) and >= (greater than or equal to) all yield 0 if the +specified relation is false and 1 if it is true. For non-pointer +operands, operand conversion is exactly the same as for the + +operator. In addition, pointers of any kind can to be compared. +The result in this case depends on the relative locations in + 2 +storage of the pointed-to objects. + +7.7 Equality operators +7.7.1 expression == expression +7.7.2 expression != expression + The == (equal to) and the != (not equal to) operators are +exactly analogous to the relational operators except for their +lower precedence. (Thus ``a>= expression + lvalue =>> expression +7.13.8 lvalue <<= expression + lvalue =<< expression +7.13.9 lvalue &= expression + lvalue =& expression +7.13.10 lvalue ^= expression + lvalue =^ expression +7.13.11 lvalue |= expression + lvalue =| expression + The behavior of an expression of the form ``E1 op= E2'' or +``E1 =op E2'' may be inferred by taking it as equivalent to +``E1 = E1 op E2''; however, E1 is evaluated only once. +Moreover, expressions like ``i += p'' in which a pointer is +added to an integer, are forbidden. The "op=" form is preferred +over the "=op" form, because it eliminates ambiguities possible +in expressions such as ``x=-1''. + +7.14 expression , expression + A pair of expressions separated by a comma is evaluated +left-to-right and the value of the left expression is discarded. +The type and value of the result are the type and value of the +right operand. This operator groups left-to-right. It should be +avoided in situations where comma is given a special meaning, for +example in actual arguments to function calls (section 7.1.6) and +lists of initializers (section 10.2). + +8. Declarations + Declarations are used within function definitions to specify +the interpretation which C gives to each identifier; they do not +necessarily reserve storage associated with the identifier. +Declarations have the forms + + +_________________________ + 1 + On UNIX, no conversion is necessary among different pointer +types and integers. Thus, the value of i in this example would +be preserved. + C Reference Manual - 14 + + + declaration: + decl-specifiers init-declarator-list ; + type-specifier ; + +The declarators in the init-declarator-list contain the +identifiers being declared. The decl-specifiers consist of at +most one type-specifier and at most one storage class specifier. + + decl-specifiers: + type-specifier + sc-specifier + type-specifier sc-specifier + sc-specifier type-specifier + +The second form of declaration is used to define structures +(section 8.5). + +8.1 Storage class specifiers + The sc-specifiers are: + + sc-specifier: + auto + static + extern + register + +The auto, static, and register declarations also serve as +definitions in that they cause an appropriate amount of storage +to be reserved. In the extern case there must be an external +definition (see below) for the given identifiers somewhere +outside the function in which they are declared. + Identifiers declared to be of class register may not be used as +the operand of the address-of operator &. In addition, each +implementation will have its own restrictions on the number and +types of register identifiers which can be supported in any +function. When these restrictions are violated, the offending + 1 +identifiers are treated as auto. + If the sc-specifier is missing from a declaration, it is +generally taken to be auto. + +8.2 Type specifiers + The type-specifiers are + + + + + + + +_________________________ + 1 + The portable C compiler treats register as synonymous with +auto. + C Reference Manual - 15 + + + type-specifier: + int + char + float + double + long + long int + short + short int + unsigned + unsigned int + long float + struct { type-decl-list } + struct identifier { type-decl-list } + struct identifier + +The struct specifier is discussed in section 8.5. If the +type-specifier is missing from a declaration, it is generally + 1 +taken to be int. + +8.3 Declarators + The init-declarator-list appearing in a declaration is a +comma-separated sequence of declarators, each of which may be +followed by an initializer for the declarator (initialization is +discussed in section 10.3). + + init-declarator-list: + init-declarator + init-declarator , init-declarator-list + + init-declarator: + declarator initializer + opt +The specifiers in the declaration indicate the type and storage +class of the objects to which the declarators refer. Declarators +have the syntax: + + declarator: + identifier + * declarator + declarator ( ) + declarator [ constant-expression ] + opt + ( declarator ) + +The grouping in this definition is the same as in expressions. + +_________________________ + 1 + The UNIX C compiler implements a facility whereby identifiers +can be equated to types. Such identifiers can be used as +type-specifiers. The portable C compiler only partially +implements this facility. + C Reference Manual - 16 + + +8.4 Meaning of declarators + Each declarator is taken to be an assertion that when a +construction of the same form as the declarator appears in an +expression, it yields an object of the indicated type and storage +class. Each declarator contains exactly one identifier; it is +this identifier that is declared. + If an unadorned identifier appears as a declarator, then it has +the type indicated by the specifier heading the declaration. + If a declarator has the form + + * D + +for D a declarator, then the contained identifier has the type +``. . . pointer to X'', where `` . . . X'' is the type which the +identifier would have had if the declarator had been simply D. + If a declarator has the form + + D ( ) + +then the contained identifier has the type ``. . . function +returning X'', where `` . . . X'' is the type which the +identifier would have had if the declarator had been simply D. + A declarator may have the form + + D[constant-expression] +or + D[ ] + +In the first case the constant expression is an expression whose +value is determinable at compile time, and whose type is int. in +the second the constant 1 is used. (Constant expressions are +defined precisely in section 15.) Such a declarator makes the +contained identifier have type ``. . . array of X'', where +`` . . . X'' is the type which the identifier would have had if +the declarator had been simply D. The constant specifies the +number of elements in the array. + An array may be constructed from one of the basic types, from a +pointer, from a structure, or from another array (to generate a +multi-dimensional array). + Finally, parentheses in declarators do not alter the type of +the contained identifier except insofar as they alter the binding +of the components of the declarator. + Not all the possibilities allowed by the syntax above are +actually permitted. The restrictions are as follows: functions +may not return arrays, structures or functions, although they may +return pointers to such things; there are no arrays of functions, +although there may be arrays of pointers to functions. Likewise +a structure may not contain a function, but it may contain a +pointer to a function. + As an example, the declaration + + int i, *ip, f(), *fip(), (*pfi)(); + +declares an integer i, a pointer ip to an integer, a function f +returning an integer, a function fip returning a pointer to an + C Reference Manual - 17 + + +integer, and a pointer pfi to a function which returns an +integer. Also + + float fa[17], *afp[17]; + +declares an array of float numbers and an array of pointers to +float numbers. Finally, + + static int x3d[3][5][7]; + +declares a static three-dimensional array of integers, with rank +3x5x7. In complete detail, x3d is an array of three items: each +item is an array of five arrays; each of the latter arrays is an +array of seven integers. Any of the expressions ``x3d'', +``x3d [ i ]'', ``x3d [ i ] [ j ]'', ``x3d [ i ] [ j ] [ k ]'' may +reasonably appear in an expression. The first three have type +``array'', the last has type int. + +8.5 Structure declarations + Recall that one of the forms for a structure specifier is + + struct { type-decl-list } + +The type-decl-list is a sequence of type declarations for the +members of the structure: + + type-decl-list: + type-declaration + type-declaration type-decl-list + +A type declaration is just a declaration which does not mention a +storage class (the storage class ``member of structure'' here +being understood by context) or include an initializer. + + type-declaration: + type-specifier declarator-list ; + +Within the structure, the objects declared have addresses which +increase as their declarations are read left-to-right. Each +component of a structure begins on an addressing boundary +appropriate to its type. Therefore, there may be unnamed holes + 1 +in a structure. + Another form of structure specifier is + + struct identifier { type-decl-list } + +This form is the same as the one just discussed, except that the +identifier is remembered as the structure tag of the structure + +_________________________ + 1 + The UNIX C compiler forces all structures to have an even +length in bytes and be aligned on word boundaries. + C Reference Manual - 18 + + +specified by the list. A declaration may then be given using the +structure tag but without the list, as in the third form of +structure specifier: + + struct identifier + +Structure tags allow definition of self-referential and +mutually-recursive structures (forward references to structure +type names must be within the same group of definitions and be a +pointed-to or returned type); they also permit the long part of +the declaration to be given once and used several times. It is +however absurd to declare a structure which contains an instance +of itself, as distinct from a pointer to an instance of itself. + A simple example of a structure declaration, taken from section +16.2 where its use is illustrated more fully, is + + struct tnode { + char tword[20]; + int count; + struct tnode *left; + struct tnode *right; + }; + +which contains an array of 20 characters, an integer, and two +pointers to similar structures. Once this declaration has been +given, the following declaration makes sense: + + struct tnode s, *sp; + +which declares s to be a structure of the given sort and sp to be +a pointer to a structure of the given sort. + The names of structure members and structure tags may be the +same as ordinary variables, since a distinction can be made by +context. All of the members of a structure must have unique +names. However, a single member name may be used in many + 1 +structure definitions. + +9. Statements + Except as indicated, statements are executed in sequence. + + + + + + +_________________________ + 1 + The UNIX C compiler requires that the names of tags and +members be distinct. In addition, the same member name is +allowed to appear in different structures only if the two members +are of the same type and if their origin with respect to their +structure is the same. Thus, separate structures can share a +common initial segment. + C Reference Manual - 19 + + +9.1 Expression statement + Most statements are expression statements, which have the form + + expression ; + +Usually expression statements are assignments or function calls. + +9.2 Compound statement + So that several statements can be used where one is expected, +or local variables defined, the compound statement is provided: + + compound-statement: + { declaration-list statement-list } + opt + declaration-list: + declaration + declaration declaration-list + + statement-list: + statement + statement statement-list + +9.3 Conditional statement + The two forms of the conditional statement are + + if ( expression ) statement + if ( expression ) statement else statement + +In both cases the expression is evaluated and if it is non-zero, +the first substatement is executed. In the second case the +second substatement is executed if the expression is zero. As +usual the ``else'' ambiguity is resolved by connecting an else +with the last encountered elseless if. + The expression may be of any fundamental type or a pointer. +The comparison with zero is done in a manner appropriate for the +type of the expression. + +9.4 While statement + The while statement has the form + + while ( expression ) statement + +The substatement is executed repeatedly so long as the value of +the expression remains non-zero. The test takes place before +each execution of the statement, and is the same as that +performed by the if statement. + +9.5 Do statement + The do statement has the form + + do statement while ( expression ) ; + +The substatement is executed repeatedly until the value of the +expression becomes zero. The test takes place after each +execution of the statement, and is the same as that performed by + C Reference Manual - 20 + + +the if statement. + +9.6 For statement + The for statement has the form + + for ( expression-1 ; expression-2 ; expression-3 ) statement + opt opt opt +This statement is equivalent to + + expression-1; + while ( expression-2 ) { + statement + expression-3 ; + } + +Thus the first expression specifies initialization for the loop; +the second specifies a test, made before each iteration, such +that the loop is exited when the expression becomes zero; the +third expression typically specifies an incrementation which is +performed after each iteration. + Any or all of the expressions may be dropped. A missing +expression-2 makes the implied while clause equivalent to ``while +( 1 )''; other missing expressions are simply dropped from the +expansion above. + +9.7 Switch statement + The switch statement causes control to be transferred to one of +several statements depending on the value of an expression. It +has the form + + switch ( expression ) statement + +The expression must be int or char. The statement is typically +compound. Each statement within the statement may be labelled +with case prefixes as follows: + + case constant-expression : + +where the constant expression must be int or char. No two of the +case constants in a switch may have the same value. Constant +expressions are precisely defined in section 15. + There may also be at most one statement prefix of the form + + default : + +When the switch statement is executed, its expression is +evaluated and compared with each case constant in an undefined +order. If one of the case constants is equal to the value of the +expression, control is passed to the statement following the +matched case prefix. If no case constant matches the expression, +and if there is a default prefix, control passes to the prefixed +statement. In the absence of a default prefix none of the +statements in the switch is executed. + Case or default prefixes in themselves do not alter the flow of +control. + C Reference Manual - 21 + + +9.8 Break statement + The statement + + break ; + +causes termination of the smallest enclosing while, do, for, or +switch statement; control passes to the statement following the +terminated statement. + +9.9 Continue statement + The statement + + continue ; + +causes control to pass to the loop-continuation portion of the +smallest enclosing while, do, or for statement; that is to the +end of the loop. More precisely, in each of the statements + + while ( ... ) { do { for ( ... ) { + . . . . . . . . . + contin: ; contin: ; contin: ; + } } while ( ... ); } + +a continue is equivalent to ``goto contin''. + +9.10 Return statement + A function returns to its caller by means of the return +statement, which has one of the forms + + return ; + return ( expression ) ; + +In the first case no value is returned. In the second case, the +value of the expression is returned to the caller of the +function. If required, the expression is converted, as if by +assignment, to the type of the function in which it appears. +Flowing off the end of a function is equivalent to a return with +no returned value. + +9.11 Goto statement + Control may be transferred unconditionally by means of the +statement + + goto expression ; + +The expression should be a label (sections 9.12, 14.4) or an +expression of type ``pointer to int'' which evaluates to a label. +It is illegal to transfer to a label not located in the current +function unless some extra-language provision has been made to +adjust the stack correctly. + C Reference Manual - 22 + + +9.12 Labelled statement + Any statement may be preceded by label prefixes of the form + + identifier : + +which serve to declare the identifier as a label. More details +on the semantics of labels are given in section 14.4 below. + +9.13 Null statement + The null statement has the form + + ; + +A null statement is useful to carry a label just before the ``}'' +of a compound statement or to supply a null body to a looping +statement such as while. + +10. Function definitions and global declarations + A C program consists of a sequence of function definitions and +global declarations. Global declarations may be given for simple +variables and for arrays. They are used to declare and/or +reserve storage for objects. + +10.1 Function definitions + Function definitions have the form + + function-definition: + type-specifier function-declarator function-body + opt +A function declarator is similar to a declarator for a ``function +returning ...'' except that it lists the formal parameters of the +function in the parentheses which must follow the function name. +Some examples of function-declarators are: + + f(a, b) returns int + *f(a) returns pointer to int + (*f(a))() returns pointer to function returning int + +The function-body has the form + + function-body: + type-decl-list function-statement + opt +The purpose of the type-decl-list is to give the types of the +formal parameters. No other identifiers should be declared in +this list, and formal parameters should be declared only here. +Formal parameters may be declared as being of class register. + The function-statement is just a compound statement. + + function-statement: + compound-statement + +A simple example of a complete function definition is + C Reference Manual - 23 + + + int max (a, b, c) + int a, b, c; + + {int m; + m = (a > b) ? a : b; + return (m > c ? m : c); + } + +Here ``int'' is the type-specifier; ``max(a, b, c)'' is the +function-declarator; ``int a, b, c;'' is the type-decl-list for +the formal parameters; ``{ . . . }'' is the function-statement. + C converts all float actual parameters to double, so formal +parameters declared float have their declaration adjusted to read +double. Correspondingly, char parameters are adjusted to read +int. Also, since a reference to an array in any context (in +particular as an actual parameter) is taken to mean a pointer to +the first element of the array, declarations of formal parameters +declared ``array of ...'' are adjusted to read ``pointer to +...''. Finally, because neither structures nor functions can be +passed to a function, it is useless to declare a formal parameter +to be a structure or function (pointers to structures or +functions are of course permitted). + A free return statement is supplied at the end of each function +definition, so running off the end causes control, but no value, +to be returned to the caller. + +10.2 Global declarations + A global declaration has the same form as a declaration within +a function (section 8), except that the sc-specifiers auto and +register may not be used. + Global declarations with sc-specifiers extern or static are +like similar declarations within functions, except that the +identifiers so declared are accessible throughout the remainder +of the source file. A global static declaration reserves storage +which is retained throughout the execution of a program. A +global extern declaration declares that the associated +identifiers have been externally defined, but is not itself such +a definition. + A global declaration without an sc-specifier is an external +definition. It reserves storage for the identifiers and allows +them to be accessed by separately-compiled functions which +contain appropriate extern declarations for the identifiers. It +is an error to have more than one external definition of an + 1 +identifier in a C program. Functions appearing in an external +data definition are declared as extern. + + +_________________________ + 1 + The UNIX C compiler treats external data definitions and +global extern declarations as equivalent. More than one external +definition of an identifier is allowed, so long as at most one +includes initialization. + C Reference Manual - 24 + + +10.3 Initialization + Explicit initialization is permitted in declarations which +reserve storage, namely register, auto, and static declarations, +and external definitions. Automatic structures and arrays may + 1 +not be initialized. The initial value of static and extern +identifiers not explicitly initialized is zero. The initial +value of register and auto identifiers not explicitly initialized +is undefined. + An initializer represents the initial value for the +corresponding object being defined (and declared). + + initializer: + constant + { constant-expression-list } + + + constant-expression-list: + constant-expression + constant-expression , constant-expression-list + +Thus an initializer consists of a constant-valued expression, or +comma-separated list of expressions, inside braces. The braces +may be dropped when the expression is just a plain constant. The +exact meaning of a constant expression is discussed in section +15. The expression list is used to initialize arrays and +structures; see below. + The type of the identifier being defined should be compatible +with the type of the initializer: a double constant may +initialize a float or double identifier; a non-floating-point +expression may initialize an int, char, or pointer. + An initializer for an array may contain a comma-separated list +of compile-time expressions. The length of the array is taken to +be the maximum of the number of expressions in the list and the +square-bracketed constant in the array's declarator. This +constant may be missing, in which case 1 is used. The +expressions initialize successive members of the array starting +at the origin (subscript 0) of the array. The acceptable +expressions for an array of type ``array of ...'' are the same as + 2 +those for type ``...''. + Structures can be initialized, but this operation is +incompletely implemented and machine-dependent. Basically the +structure is regarded as a sequence of words and the initializers + +_________________________ + 1 + The portable C compiler does not support initialization of +register or auto identifiers. + 2 + The UNIX C compiler also allows, as a special case, a single +string to be given as the initializer for an array of chars; in +this case, the characters in the string are taken as the +initializing values. + C Reference Manual - 25 + + +are placed into those words. Structure initialization, using a +comma-separated list in braces, is safe if all the members of the + 1 +structure are integers or pointers but is otherwise ill-advised. + +11. Scope rules + A complete C program need not all be compiled at the same time: +the source text of the program may be kept in several files, and +precompiled routines may be loaded from libraries. Communication +among the functions of a program may be carried out both through +explicit calls and through manipulation of external data. + Therefore, there are two kinds of scope to consider: first, +what may be called the lexical scope of an identifier, which is +essentially the region of a program during which it may be used +without drawing ``undefined identifier'' diagnostics; and second, +the scope associated with external identifiers, which is +characterized by the rule that references to the same external +identifier are references to the same object. + +11.1 Lexical scope + C supports block-structure only within function definitions +(i.e., function definitions may not be nested, but any compound +statement can define variables local to that statement). The +lexical scope of names declared in external definitions extends +from their definition through the end of the file in which they +appear. The same is true for implicit or explicit external +declarations inside of function definitions. The lexical scope +of formal parameters is the body of the function. The lexical +scope of non-external names declared at the head of compound +statements extends from their definition through the end of the +compound statement. The only allowed forward reference to a +label is as the expression in a goto statement. + It is an error to redeclare an identifier already declared in +the current context, except for a consistent set consisting of +any number of external declarations plus at most one external +definition for an identifier. + +11.2 Scope of externals + If a function declares an identifier to be extern, then +somewhere among the files or libraries constituting the complete +program there must be an external definition for the identifier. +All functions in a given program which refer to the same external +identifier refer to the same object, so care must be taken that +the type and extent specified in the definition are compatible +with those specified by each function which references the data. + In a multi-file program, an external definition for an external +identifier must appear in exactly one of the files. Any other +files which wish to use the identifier must contain a + +_________________________ + 1 + The UNIX C compiler implements initialization of arbitrary +structures, and allows nested bracketed sequences of initializers +for aggregates. + C Reference Manual - 26 + + +corresponding extern declaration of the identifier. The +identifier can be initialized only in the file where storage is +allocated. + +12. Compiler control lines + When a line of a C program begins with the character #, it is +interpreted as a special directive to the compiler. Such +compiler control lines may appear anywhere in the source file, + 1 +except within comments and constants. The names of compiler +control lines are not reserved; they are recognized by context. + +12.1 Token replacement + A compiler-control line of the form + + # define identifier token-string + +(note: no trailing semicolon) causes the compiler to replace +subsequent instances of the identifier with the given string of +tokens. When processing the # define line, token replacement is +performed on the token string, but not on the identifier. When +token replacement occurs, the inserted token string is not + 2 +subject to further token replacement. The names of compiler +control lines are not subject to token replacement, nor are +compiler control line arguments specified as identifiers. + +This facility is most valuable for definition of ``manifest +constants'', as in + + # define tabsize 100 + . . . + int table[tabsize]; + +Macros may be defined by immediately following the identifier +with a parenthesized list of formal parameters (see also section +12.3). + + + + + + +_________________________ + 1 + In order to use compiler control lines with the UNIX C +compiler, it is required that the first line of the source file +begin with #. + 2 + Unfortunately, the UNIX C compiler uses a different method of +token replacement with different semantics. Token replacement is +not performed on the token-string when processing a # define +line. However, when the token-string is inserted, it is subject +to token replacement. + C Reference Manual - 27 + + +12.2 File inclusion + In multi-file C programs, it is necessary to have extern +declarations for any external identifier used in files other than +the one in which it is defined. Rather than repeat tedious and +error-prone declarations for each external identifier in each +file, one can create a separate file containing these +declarations and cause it to be dynamically inserted into each +source file. + A compiler control line of the form + + # include "filename" + +results in the replacement of that line by the entire contents of + 1 +the file filename. Included files may include other files. + This technique is also useful for manifest constants and +structure definitions. + 2 +12.3 Macros + The C macro facility allows token replacement strings to be +parameterized. A macro is defined by lines of the form + + # macro identifier ( parameter-list ) + opt + token-string + # end + +The parameter list is a comma-separated list of identifiers, +which are the formal parameters of the macro. The token-string, +which may be given on zero or more input lines, may contain +occurrences of the formal parameter names. When substitution is +performed, these occurrences will be replaced by the +corresponding actual parameters, which are strings of tokens. + The format of a macro ``invocation'' is the same as for +function calls. Thus, the macro facility can be used to write +small ``functions'' (without local variables) which will produce +in-line code. However, one must be careful in that macro +parameters are essentially call by name, whereas function +parameters are call by value. In addition, it is a good idea to +enclose within parentheses all occurrences of formal parameters +in macro definitions, in order to avoid precedence problems after +substitution of actual parameters. + + + + + +_________________________ + 1 + The Unix C compiler also allows the filename to be enclosed in +angle brackets instead of quotation marks. Such a filename is +interpreted relative to a system standard include-file directory. + 2 + This facility is not supported by the UNIX C compiler. + C Reference Manual - 28 + + +12.4 Compile-time conditionals + Conditional compilation of source text is provided by the forms + + # ifdef identifier # ifndef identifier + ... ... + # endif # endif + +These forms cause the text enclosed by the compiler control lines +to be included in the compilation only if the given identifier +has ( ifdef ) or does not have ( ifndef ) a lexical definition. +An identifier is given a lexical definition by # define and +# macro. Compile-time conditionals may be nested. + 1 +12.5 Undefine + The undefine compiler control line has the form + + # undefine identifier + +It removes any lexical definition of the identifier established +by a previous # define or # macro. The identifier will +henceforth not be subject to any form of token replacement. When +used with a keyword, # undefine causes the reserved identifier to +lose its built-in meaning and become an ordinary identifier. + 2 +12.6 Renamed identifiers + In writing some system support software, it is often desirable +to use names for functions and external data which are not +subject to accidental conflict with user-chosen names. This +ability is provided by the rename compiler control line, which +has the form + + # rename identifier string + +The specified identifier will replaced by the given character +string when it appears in the output of the compiler. + +13. Implicit declarations + It is not always necessary to specify both the storage class +and the type of identifiers in a declaration. Sometimes the +storage class is supplied by the context: in external +definitions, and in declarations of formal parameters and +structure members. In a declaration inside a function, if a +storage class but no type is given, the identifier is assumed to +be int; if a type but no storage class is indicated, the +identifier is assumed to be auto. An exception to the latter +rule is made for functions, since auto functions are meaningless +(C being incapable of compiling code into the stack). If the +type of an identifier is ``function returning ...'', it is + +_________________________ + 1 + This facility is not supported by the UNIX C compiler. + 2 + This facility is not supported by the UNIX C compiler. + C Reference Manual - 29 + + +implicitly declared to be extern. + In an expression, an identifier followed by ( and not otherwise +declared is contextually declared to be ``function returning +int''. As an initializer, an otherwise undeclared identifier is + 1 +contextually declared to be ``function returning int''. + For some purposes it is best to consider formal parameters as +belonging to their own storage class. In practice, C treats +parameters as if they were automatic (except that, as mentioned +above, formal parameter arrays, chars, and floats are treated +specially). + +14. Types revisited + This section summarizes the operations which can be performed +on objects of certain types. + +14.1 Structures + There are only two things that can be done with a structure: +pick out one of its members (by means of the `` . '' or `` -> '' +operators); or take its address (by unary `` & ''). Other +operations, such as assigning from or to it or passing it as a +parameter, draw an error message. In the future, it is expected +that these operations, but not necessarily others, will be +allowed. + +14.2 Functions + There are only two things that can be done with a function: +call it, or take its address. If the name of a function appears +in an expression not in the function-name position of a call, a +pointer to the function is generated. Thus, to pass one function +to another, one might say + + int f(); + ... + g (f); + +Then the definition of g might read + + g (funcp) + int (*funcp)(); + + {. . . + (*funcp)(); + . . . + } + +Notice that f was declared explicitly in the calling routine +since its first appearance was not followed by `` ( ''. + + +_________________________ + 1 + The UNIX C compiler contextually declares identifiers in +initializers to be of type int. + C Reference Manual - 30 + + +14.3 Arrays, pointers, and subscripting + Every time an identifier of array type appears in an +expression, it is converted into a pointer to the first member of +the array. Because of this conversion, arrays are not lvalues. +By definition, the subscript operator [ ] is interpreted in such +a way that ``E1[E2]'' is identical to ``*((E1) + (E2))''. +Because of the conversion rules which apply to +, if E1 is an +array and E2 an integer, then E1[E2] refers to the E2-th member +of E1. Therefore, despite its asymmetric appearance, +subscripting is a commutative operation. + A consistent rule is followed in the case of multi-dimensional +arrays. If E is an n - dimensional array of rank +i x j x . . . x k, then E appearing in an expression is converted +to a pointer to an (n - 1) - dimensional array with rank +j x . . . x k. If the * operator, either explicitly or +implicitly as a result of subscripting, is applied to this +pointer, the result is the pointed-to (n - 1) - dimensional +array, which itself is immediately converted into a pointer. + For example, consider + + int x[3][5]; + +Here x is a 3x5 array of integers. When x appears in an +expression, it is converted to a pointer to (the first of three) +5-membered arrays of integers. In the expression ``x [ i ]'', +which is equivalent to ``*(x+i)'', x is first converted to a +pointer as described; then i is converted to the type of x, which +involves multiplying i by the length the object to which the +pointer points, namely 5 integer objects. The results are added +and indirection applied to yield an array (of 5 integers) which +in turn is converted to a pointer to the first of the integers. +If there is another subscript the same argument applies again; +this time the result is an integer. + It follows from all this that arrays in C are stored row-wise +(last subscript varies fastest) and that the first subscript in +the declaration helps determine the amount of storage consumed by +an array but plays no other part in subscript calculations. + +14.4 Labels + Labels do not have a type of their own; they are treated as +having type ``array of int''. Label variables should be declared +``pointer to int''; before execution of a goto referring to the +variable, a label (or an expression deriving from a label) should +be assigned to the variable. + Label variables are a bad idea in general; the switch statement +makes them almost always unnecessary. + +15. Constant expressions + In several places C requires expressions which evaluate to a +constant: after case, as array bounds, and in initializers. In +the first two cases, the expression can involve only integer and +character constants, possibly connected by the binary operators + C Reference Manual - 31 + + + + - * / % & | ^ << >> + < > <= >= == != && || ? : + +or by the unary operators + + - ~ ! + +Parentheses can be used for grouping, but not for function + 1 +calls. + A bit more latitude is permitted for initializers. Besides +constant expressions as discussed above, one can have double and +string constants, and one can apply the unary & operator to +external scalars. The unary & can also be applied implicitly by +appearance of functions or unsubscripted external arrays. An +undefined identifier appearing in an initializer is implicitly + 2 +declared to be a function returning int. + +16. Examples. + These examples are intended to illustrate some typical C +constructions as well as a serviceable style of writing C +programs. + +16.1 Inner product + This function returns the inner product of its array arguments. + + double inner (v1, v2, n) + double v1[], v2[]; + + {double sum; + int i; + sum = 0.0; + for (i = 0; i < n; i++) + sum += v1[i] * v2[i]; + return (sum); + } + +The following version is somewhat more efficient, but perhaps a +little less clear. It uses the facts that parameter arrays are +really pointers, and that all parameters are passed by value. + + + + +_________________________ + 1 + The UNIX C compiler allows sizeof, but not the relational +operators, &&, ||, !, or conditional expressions. + 2 + The UNIX C compiler also allows initializers which evaluate to +the address of an external or global static variable plus or +minus a constant, such as ``&a[3]'', where a is an external or +global static array. + C Reference Manual - 32 + + + double inner (v1, v2, n) + double *v1, *v2; + + {double sum; + sum = 0.0; + while (n--) + sum += *v1++ * *v2++; + return (sum); + } + +The declarations for the parameters are really exactly the same +as in the last example. In the first case array declarations +`` [ ] '' were given to emphasize that the parameters would be +referred to as arrays; in the second, pointer declarations were +given because the indirection operator and ++ were used. + +16.2 Tree and character processing + Here is a complete C program ( courtesy of R. Haight ) which +reads a document and produces an alphabetized list of words found +therein together with the number of occurrences of each word. +The method keeps a binary tree of words such that the left +descendant tree for each word has all the words lexicographically +smaller than the given word, and the right descendant has all the +larger words. Both the insertion and the printing routine are +recursive. + The program calls the library routines getchar to pick up +characters and cexit to terminate execution. Cprint is called to +print the results according to a format string. + Because all the external definitions for data are given at the +top, no extern declarations are necessary within the functions. +To stay within the rules, a type declaration is given for each +non-integer function when the function is used before it is +defined. However, since all such functions return pointers which +are simply assigned to other pointers, no actual harm would +result from leaving out the declarations; the supposedly int +function values would be assigned without error or complaint. + + # define nwords 1500 /* number of different words */ + # define wsize 20 /* max chars per word */ + # define tnode struct _tnode /* make tnode look like a type */ + struct _tnode /* the basic structure */ + {char tword[wsize]; + int count; + tnode *left, *right; + }; + + tnode space[nwords]; /* the words themselves */ + int nnodes nwords; /* number of remaining slots */ + tnode *nextp space; /* next available slot */ + tnode *freep; /* free list */ + /* + * The main routine reads words until end-of-file, + * i.e., '\0' returned from "getchar". + * "tree" is called to sort each word into the tree. + */ + C Reference Manual - 33 + + + main (argc, argv) + int argc; + char *argv[]; + + {tnode *top, *tree(); + char c, word[wsize]; + int i; + i = top = 0; + while (c = getchar ()) + if (('a' <= c && c<='z') || ('A' <= c && c <= 'Z')) + {if (i < wsize - 1) + word[i++] = c; + } + else + if (i) + {word[i++] = '\0'; + top = tree (top, word); + i = 0; + } + tprint (top); + } + /* + * The central routine. If the subtree pointer is null, allocate + * a new node for it. If the new word and the node's word are the + * same, increase the node's count. Otherwise, recursively sort + * the word into the left or right subtree depending on whether + * the argument word is less or greater than the node's word. + */ + tnode *tree (p, word) + tnode *p; + char word[]; + + {tnode *alloc (); + int cond; + /* Is pointer null? */ + if (p == 0) + {p = alloc (); + copy (word, p->tword); + p->count = 1; + p->right = p->left = 0; + return (p); + } + /* Is word repeated? */ + if ((cond = compar (word, p->tword)) == 0) + {p->count++; + return (p); + } + /* Sort into left or right */ + if (cond < 0) + p->left = tree (p->left, word); + else + p->right = tree (p->right, word); + return (p); + } + C Reference Manual - 34 + + + /* + * Print the tree by printing the left subtree, the given node, + * and then the right subtree. + */ + tprint (p) + tnode *p; + + {while (p) + {tprint (p->left); + cprint ("%4d: %s\n", p->count, p->tword); + p = p->right; + } + } + /* + * String comparison: return number ( >, =, < ) 0 + * according as s1 ( >, =, < ) s2. + */ + compar (s1, s2) + char *s1, *s2; + + {int c1, c2; + while ((c1 = *s1++) == (c2 = *s2++)) + if (c1 == '\0') return (0); + return (c1 - c2); + } + /* + * String copy: copy s1 into s2 until the null + * character appears. + */ + copy (s1, s2) + char *s1, *s2; + + {while (*s2++ = *s1++); + } + /* + * Node allocation: return pointer to a free node. + * Bomb out when all are gone. Just for fun, there + * is a mechanism for using nodes that have been + * freed, even though no one here calls "free." + */ + tnode *alloc () + + {tnode *t; + if (freep) + {t = freep; + freep = freep->left; + return (t); + } + if (--nnodes < 0) + {cprint ("Out of space\n"); + cexit (); + } + return (nextp++); + } + C Reference Manual - 35 + + + /* + * The uncalled routine which puts a node on the free list. + */ + free (p) + tnode *p; + + {p->left = freep; + freep = p; + } + +To illustrate a slightly different technique of handling the same +problem, we will repeat fragments of this example with the tree +nodes treated explicitly as members of an array. The fundamental +change is to deal with the subscript of the array member under +discussion, instead of a pointer to it. The struct declaration +becomes + + struct _tnode + {char tword[wsize]; + int count; + int left, right; + }; + +and alloc becomes + + alloc () + + {int t; + t = --nnodes; + if (t <= 0) + {cprint ("Out of space\n"); + cexit (); + } + return (t); + } + +The free stuff has disappeared because if we deal with +exclusively with subscripts some sort of map has to be kept, +which is too much trouble. + Now the tree routine returns a subscript also, and it becomes: + + int tree (p, word) + char word[]; + + {int cond; + if (p == 0) + {p = alloc (); + copy (word, space[p].tword); + space[p].count = 1; + space[p].right = space[p].left = 0; + return (p); + } + if ((cond = compar (space[p].tword, word)) == 0) + {space[p].count++; + return (p); + C Reference Manual - 36 + + + } + if (cond < 0) + space[p].left = tree (space[p].left, word); + else + space[p].right = tree (space[p].right, word); + return (p); + } + +The other routines are changed similarly. It must be pointed out +that this version is noticeably less efficient than the first +because of the multiplications which must be done to compute an +offset in space corresponding to the subscripts. + The observation that subscripts ( like ``a [ i ] '' ) are +less efficient than pointer indirection ( like ``*ap'' ) holds +true independently of whether or not structures are involved. +There are of course many situations where subscripts are +indispensable, and others where the loss in efficiency is worth a +gain in clarity. + C Reference Manual - 37 + + + References + 1. Johnson, S. C., and Kernighan, B. W. The + programming language B. Computing Science Technical + Report No. 8, Bell Laboratories, Murray Hill, N. J., + 1972. + 2. Peterson, T. G., and Lesk, M. E. A user's guide to + the C language on the IBM 370. Internal Memorandum, + Bell Laboratories, 1974. + 3. Richards, M. BCPL: a tool for compiler writing and + system programming. Proc. SJCC 1969, 557-566. + 4. Ritchie, D. M., and Thompson, K. L. The UNIX + time-sharing system. Comm. ACM 7, 17 (July 1974), + 365-375. + 5. Ritchie, D. M., Kernighan, B. W., and Lesk, M. E. + The C programming language. Computing Science + Technical Report No. 31, Bell Laboratories, Murray + Hill, N. J., 1975. + 6. Snyder, A. A portable compiler for the language C. + Rep. TR-149, Project MAC, M.I.T., Cambridge, Ma., 1975. + C Reference Manual - 38 + + + APPENDIX + Syntax Summary + + 1. Expressions. + + + expression: + primary + * expression + & expression + - expression + ! expression + ~ expression + ++ lvalue + -- lvalue + lvalue ++ + lvalue -- + sizeof expression + expression binop expression + expression ? expression : expression + lvalue asgnop expression + expression , expression + + + primary: + identifier + constant + string + ( expression ) + primary ( expression-list ) + opt + primary [ expression ] + lvalue . identifier + primary ->identifier + + + lvalue: + identifier + primary [ expression ] + lvalue . identifier + primary -> identifier + * expression + ( lvalue ) + + + The primary-expression operators + + ( ) [ ] . -> + + have highest priority and group left-to-right. The unary + operators + + * & - ! ~ ++ -- sizeof + + have priority below the primary operators but higher than any + C Reference Manual - 39 + + + binary operator, and group right-to-left. Binary operators + and the conditional operator all group left-to-right, and have + priority decreasing as indicated: + + binop: + * / % + + - + >> << + < > <= >= + == != + & + | ^ + && + || + ? : + Assignment operators all have the same priority, and all group + right-to-left. + + asgnop: + = + += -= *= /= %= >>= <<= &= ^= |= + =+ =- =* =/ =% =>> =<< =& =^ =| + + The comma operator has the lowest priority, and groups + left-to-right. + + 2. Declarations. + + declaration: + decl-specifiers init-declarator-list ; + type-specifier ; + + + decl-specifiers: + type-specifier + sc-specifier + type-specifier sc-specifier + sc-specifier type-specifier + + + sc-specifier: + auto + static + extern + register + C Reference Manual - 40 + + + type-specifier: + int + char + float + double + long + long int + short + short int + unsigned + unsigned int + long float + struct { type-decl-list } + struct identifier { type-decl-list } + struct identifier + + + init-declarator-list: + init-declarator + init-declarator , init-declarator-list + + init-declarator: + declarator initializer + opt + + declarator: + identifier + * declarator + declarator ( ) + declarator [ constant-expression ] + opt + ( declarator ) + + + type-decl-list: + type-declaration + type-declaration type-decl-list + + + type-declaration: + type-specifier declarator-list ; + + + declarator-list: + declarator + declarator , declarator-list + + initializer: + constant + { constant-expression-list } + C Reference Manual - 41 + + + constant-expression-list: + constant-expression + constant-expression , constant-expression-list + + + constant-expression: + expression + + + 3. Statements. + + compound-statement: + { declaration-list statement-list } + opt + + statement: + expression ; + compound-statement + if ( expression ) statement + if ( expression ) statement else statement + while ( expression ) statement + for ( expression ; expression ; expression ) statement + opt opt opt + switch ( expression ) statement + case constant-expression : statement + default : statement + break ; + continue ; + return ; + return ( expression ) ; + goto expression ; + identifier : statement + ; + + + statement-list: + statement + statement statement-list + + 4. External definitions. + + program: + external-definition + external-definition program + + external-definition: + function-definition + declaration + + + function-definition: + type-specifier function-declarator function-body + opt + C Reference Manual - 42 + + + function-declarator: + identifier ( parameter-list ) + opt + * function-declarator + function-declarator ( ) + function-declarator [ constant-expression ] + opt + ( function-declarator ) + + + parameter-list: + identifier + identifier , parameter-list + + + function-body: + type-decl-list function-statement + opt + + function-statement: + compound-statement + + 5. Compiler control lines + + # define identifier token-string + # define identifier( parameter-list ) token-string + + + # include string + + + # macro identifier ( parameter-list ) + opt + # end + + + # ifdef identifier + # ifndef identifier + # endif + + + # undefine identifier + + + # rename identifier string + \ No newline at end of file diff --git a/doc/c/cctty.info b/doc/c/cctty.info new file mode 100755 index 000000000..d636cfd4e --- /dev/null +++ b/doc/c/cctty.info @@ -0,0 +1,48 @@ +The following library routines are available ON ITS ONLY: + + Buffered I/O: + Input: there is a default prompt string (settable), which is + printed when ^L in typed; if the default prompt string is not set, + then any partial (buffered) output line is used; delete deletes a + character as usual; CR completes the reading of a buffered line. + Output: nothing happens until the buffer is full, a CR is + sent, or a tyo_flush is done. + + char tyi (); reads one buffered character; CR is changed to LF + tyo (c) char c; outputs a buffered char; ^P changed to ^ + followed by P; full buffer or CR causes buffer to be + sent + tyo_flush (); forces buffer to be sent + tyos (s) char *s; calls tyo repeatedly, changing CR to LF + setprompt (s) char *s; sets the default prompt string + + Unbuffered I/O: + char utyi (); flushes the output buffer and reads a char; + no mapping or echoing performed + utyo (c) char c; flushes the output buffer and send the + char; no mapping done EXCEPT ^P changed to ^ followed + by P + spctty (c) char c; flushes the output buffer then sends + ^P followed by the argument; for ITS display codes. + + Interrupts: + ^G and ^S are set to interrupt; they result in signals + of ctrlg_interrupt and ctrls_interrupt, respectively. To + associate a routine with them (one which does nothing is a good + way to ignore the interrupts) you do: + on (ctrlg_interrupt, f) (likewise for ^S) + where f is a function taking no arguments and returning no + results. The interrupt character will have been read at + interrupt level. If you want to handle TTY interrupts yourself, + you can do + on (ttyi_interrupt, fn) and fn will be called on a + tty input interrupt; ityic (tty_input_channel) will return + an interrupt char (-1 means it went away); tty_input_channel + is an int defined in one of the library routines. To set + which chars will interrupt, you can use: + ttyget (tty_input_channel, block) + and ttyset (tty_input_channel, block) + where block is an array of 4 ints; these 4 words are the + results and the arguments of the corresponding ITS .call's, + in the same order. Initially all chars activate, and ^G and ^S + interrupt. diff --git a/doc/c/cdoc.91 b/doc/c/cdoc.91 new file mode 100755 index 000000000..888f4ea93 --- /dev/null +++ b/doc/c/cdoc.91 @@ -0,0 +1,1607 @@ +This is the file CLIB;CDOC. + +File: CLIB;CDOC, Node: Top, Previous: (DIR), Up: (DIR), Next: Basics + + + Information on Programming in C + +C is an implementation language, similar to BCPL except that data is +typed. It is the primary language used in the UNIX operating system. +This implementation runs on the ITS and TOPS-20 operating systems and is +moderately compatible with the UNIX C implementation. The UNIX +system calls are NOT implemented, but interfaces to ITS and TOPS-20 +system calls are provided. + +* Menu: + +* Basics:: How to run C programs on ITS. + +* Details:: Differences between C on ITS and UNIX. + +* Getting Started:: Hints on writing C programs. + +* Support Routines:: An introduction to the routines available in the + C runtime environment. + +* Portable I/O:: The Portable I/O Library. + +* Storage allocation:: A dynamic storage allocator. + +* String Routines:: Basic character string routines. + +* Character Arrays:: A character string package that uses dynamic + storage allocation. + +* Math Functions:: Floating point routines for the usual functions. + +* Dates and Times:: Packages for manipulating and printing dates and + times in various formats. + +* System Routines:: Packages of routines for accessing operating + system features. + +* Timer Package:: Runtime support for timing procedure calls. + +* Debugging:: Methods and routines for debugging C programs. + +* Graphics:: Packages for graphics and for working with + images in the new image file format. + +* Miscellaneous:: Various interesting routines. + +* Internals:: Internal documentation. + + +Node: Basics, Previous: Top, Up: Top, Next: Details + + + Running C Programs on ITS + +The C compiler translates C into MIDAS and automatically invokes MIDAS +to assemble the intermediate MIDAS code into a relocatable program. The +command for invoking the C compiler is + + :cc file1 file2 ... + +where the arguments are the filenames of the C source files which are to +be compiled. Each file will be compiled in turn, and if the compilation +is successful, the resulting relocatable file will be placed in the file +"file STK". Arguments that begin with a hyphen are compiler options. + + -c Compile only, do not assemble + -g Do not delete the MIDAS file + -x Syntax check only + -s Produce a symbol table Listing + -b Compile a big function (FUNCTION TOO LARGE) + +For example, the command + + :cc -g foo + +would compile the C program in the file "FOO C" or "FOO >" in the +current directory, place the resulting relocatable program in the file +"FOO STK", and leave the MIDAS output in file "foo MIDAS". + +Relocatable programs produced by the C compiler are loaded together +with the C support routines using STINKR. To load program files "foo", +"bar", and "bletch" and produce a executable file "foo", use the following +STINKR commands (STINKR supplies the equal sign prompts). + + :stinkr + =x clib;clib + =l foo + =l bar + =l bletch + =o ts foo + =^@ + +The ^@ (ASCII NUL) signals end of file on the terminal input file. +These commands could also be written in a file, say "FOO STINKR", +without the NULL. Invoking STINKR with "foo" as a JCL argument would +then execute the STINKR command file and produce a executable program. + +To run the program merely type + + :foo + +The C startup routine will automatically parse the JCL line and pass the +components to the user main program as arguments to the main program. +Refer to node "Getting Started" for a description of these argument +passing conventions. The arguments to the program are delimited by +spaces, so an invocation such as + + :myprog foo bar bletch + +will parse into the four character strings "MYPROG", "foo", "bar", and +"bletch". Command line arguments that contain spaces may be enclosed in +double quotes. In addition, the startup procedure will open the TTY for +input and output on the Portable I/O Library files cin (standard input), +cout (standard output), and cerr (standard error output). + +Certain argument forms are interpreted by the C startup routine as +commands for special I/O initialization. The source or destination of +the standard I/O streams can be specified in the JCL by using the +delimiters described below. + + < Redirects the standard input (CIN) to come from the + indicated file. + + > Redirects the standard output (COUT) to go to the + indicated file. + + >> Redirects the standard output (COUT) to be appended to + the indicated file. + + % Redirects the standard error output (CERR) to the + indicated file. + +The default device is DSK, the default directory is the current +directory, and the default second file name is >. So for example, + + :myprog foo >my;junk + +will pass "MYPROG" and "foo" as arguments to the main routine, +initialize cin to read from file "DSK:sname;FOO >", initialize cout to +append output on the file "DSK:MY;JUNK >", and initialize cerr to direct +output to the terminal. By convention, error messages are output to +cerr. Note that if, for instance, you wanted to read the standard input +from "MY JUNK", the invocation + + :myprog )Programming.) + + +In some situations you must be careful about how you use spaces. Does +x=-10 assign minus ten to x or decrement x by 10? (I don't remember. +Try x =- 10 or x = -10.) Does + + #define FOO (X,Y) + +declare a macro of two arguments, or does it declare FOO to have the +value (X,Y)? (The later case applies, but be careful.) + +Amoung the other problems that users frequently stumble over is the +restriction on the use of ++ and -- for incrementing and decrementing +variables. The C standard does not allow variables of type float or +double to be the targets of these operands and all C implementations +adhere to this restriction. + +The placement of semicolons in programs is sometimes confusing, but the +rule is actually very simple. A statement is either an expression that +ends in a semicolon or it is a compound statement, which is a sequence +of statements enclosed in curly brackets. So + + if(x < y) x = y; else y = x; + +is correct, but + + if(x < y) x = y else y = x; + +is not, nor is + + if(x < y) {x = y; y = 0;}; else y = x; + +which has an extra semicolon after the right curly bracket. + + +Node: Support Routines, Previous: Basics, Up: Top, Next: Portable I/O + + + A Introduction to the Shared Library + + +This node briefly describes a useful subset of the routines available in +the Shared Library. For more complete documentation consult the other +nodes in this tree. Begin with the next node to see all documentation +on the available support packages. Note that not all packages are +included in the Shared Library. For example, the job manipulation +routines are not included. (*note Jobs: Job Handling.) + +When a C program is loaded the STINKR command "x clib;clib" executes a +STINKR command file that creates links to the C library CLIB (which is +also called the Shared Library since at runtime it is shared by all +users running C programs.) This library includes a very general package +for I/O (the Portable I/O Library), some basic character string routines, +the standard mathematics functions, interfaces to ITS system calls, and +the C runtime support package which reads and parses the JCL line and +initializes the standard input, output, and error output I/O streams. + +Before describing some of the routines that are available the relevant +terminology must be introduced. A variable of type SIXBIT is a word +containing left justified sixbit characters. A FILESPEC is a structure +of four SIXBIT variables which contain the device, first filename, +second filename, and directory. A filename is a character string +containing an ITS filename in the form "device:directory;first second". +A pathname is a character string that contains a UNIX-style +representation for filenames. The format is +"/device/directory/first.second". If the device specification is +omitted the file specification is written as "directory/first.second". +The pathname form "first.second" is useful for passing both first and +second filenames to a program on the command line. In filenames and +pathnames, delimiters may be quoted with control-Q. A file descriptor +is used by the Portable I/O Library to identify files and may be taken +to be of type int. The file descriptor is not an ITS channel number, +although it is sometimes referred to as a channel. + +In general, users should try to avoid circumventing the Portable I/O +Library by using the ITS system calls directly. However, access to the +lower level I/O primitives provides the user with features such as +unbuffered TTY input and random access I/O that are not implemented in +the Portable Library. The Library performs fully buffered I/O, so +random access block I/O will not work properly. For random access I/O, +use the system interfaces open, close, sysread, syswrite, and access. +The Portable I/O Library searches for unused I/O channels so users who +open files by using the system interface routine 'open' will not clash +with the Portable Library. + + + PORTABLE I/O ROUTINES + +The C startup procedure initializes the global variables cin, cout, and +cerr as file descriptors for the standard input, standard output, and +standard error output streams. + +copen (filename, mode, options) + Open the indicated file and return a file descriptor or -1 if + the open failed. The filename argument can be either a filename + or pathname specification. The mode is a single character, 'r' + for read, 'w' for write, and 'a' for append to end of file. Read is + assumed if the mode argument is omitted. The options argument + is usually omitted. The option "b" denotes binary (image mode) + I/O and "s" denotes I/O to an incore buffer. The buffer pointer + is supplied in place of the filename. Note that the mode has + type character while the options variable has type pointer to + character. + +cgetc (fd) Read a character from the indicated file descriptor and + return it or return 0 if end of file. + +cputc (c, fd) Write a character to the indicated file descriptor. + +ceof (fd) Test for end of file. +cclose (fd) Close file. +closall() Close all files opened by copen. + +cflush(fd) Force out buffer contents on output stream. +rew(fd) Reset file to beginning. + +ungetc(c, fd) Push character back into stream. + +getchar () Equivalent to cgetc (cin). +putchar (c) Equivalent to cputc (c,cout). + +gets (s1) Read a line from cin. +puts (s1) Write string and newline to cout. + +cprint (fd, format, arg1, arg2, ...) + Formatted print statement like printf in the Standard I/O Library. + The format is a string which may contain format items of the + form %nf, where n is an optional decimal integer taken to be the + minumum field width and f is one of the following characters. + + d - Print next argument (an integer) in decimal + o - Print next argument (an integer) in octal + s - Print next argument (a string) + c - Print next argument (a character) + + The file descriptor can be omitted, in which case cout is used. + +cgeti (fd) Read an integer in image mode. +cputi (i, fd) Write an integer in image mode. + +cexit (cc) Terminate job and close all files. Returning from the + main routine will have the same effect. + +fparse (fn,f) Convert file name or path name to FILESPEC. +prfile (f,fn) Convert FILESPEC to file name. + +istty (fd) Return Boolean value indicating whether file is a TTY. +itschan (fd) Return the actual ITS channel number corresponding to + the file descriptor. + +fprint(x,fd) Print the floating point number x on the indicated file. +atoi(s) Convert string to integer. The first character must be + a digit or minus sign and the conversion is always + performed in base ten. + + + TTY I/O ROUTINES + +utyi () Read character from TTY in unbuffered and unechoed mode. + The TTY is opened if necessary and the output buffer is + flushed before the read. +utyo (c) Output the character to the TTY in unbuffered mode. + The TTY is opened if necessary and the output buffer is + flushed before the write. +tyo_flush() Flush the TTY output buffer. +setprompt(s) Set the default TTY input prompt character string. +spctty(c) Outputs a ^P code to the TTY. + +It is alright to use these TTY routines while also using the Portable +I/O Library for terminal I/O. + + + STORAGE ALLOCATION + +calloc (n) Return pointer to block of n characters. +cfree (p) Free storage pointed to by p. The storage must have + been allocated calloc. + +salloc (n) Allocate a block of n words and return a pointer to it. +sfree (p) Free storage allocated by salloc. + + + BASIC STRING ROUTINES + +slen (s) Return string length. +stcpy (s1, s2) Copy string from S1 to S2. +stcmp (s1, s2) Return TRUE if character strings are equal. +upper(c) Return upper case version of character. +lower(c) Return lower case version of character. + +The Portable I/O Library contains routines for manipulating SIXBIT +characters and strings. (*note SIXBIT: Portable I/O Library.) +The character string package contains routines for manipulating byte +pointers and bit arrays. (*note Bytes and Bits: String Routines.) + + + INTERFACES TO SYSTEM CALLS + +This is a partial summary of the system calls currently available to +users of the Shared Library. Refer to (*note Details: System Routines.) +for more complete documentation. + +open (fs, mode) + Open channel specified by FILESPEC and return ITS channel number + or negative ITS failure code. The mode refers to the ITS file + access mode bits, not the character codes used by the Portable + I/O Library. The routine searches for an unused channel. + + +sysread (ch, buffer, size) Block input IOT. +syswrite (ch, buffer, size) Block output IOT. + +access (ch, i) Set file access pointer. +fillen (ch) Return file length. + +close (ch) Close the channel. + +fdate = rfdate (ch) Read file creation date. +fdate = sfdate (ch, fdate) Set file creation date. +fdate = srdate (ch, fdate) Set file reference date. +rauth (ch) Read file author in SIXBIT. +sauth (ch, w) Set file author in SIXBIT. + +rsname () Return sname in SIXBIT. +runame () Return uname in SIXBIT. +ssname (w) Set sname to value supplied in SIXBIT. + +sleep (n) Sleep for n 30th seconds. + +valret (s) .VALUE a character string or zero. + +etime() Return system elapsed time in 1/60 sec units. +cputm() Return job CPU time in 1/60 sec units. +getcpu() Return job CPU time in 4.096 micro sec units. + + +The Portable I/O Library contains routines for handling SIXBIT data +(*note SIXBIT: Portable I/O Library.) and there is a package of routines +for manipulating and printing dates and times. (*note: Dates and Times.) + +Some useful definitions are available in the file CLIB;CLIB H. + + +Node: Portable I/O, Previous: Basics, Up: Top, Next: Storage Allocation + + + The Portable I/O Library + +Most of the routines in the Portable I/O Library are written in C, but +the most frequently used I/O routines have been hand coded in MIDAS. +The C source routines will be described first, followed by descriptions +of the MIDAS source routines and internal data structures. + +The routine for opening files is COPEN. The arguments to COPEN are a +character string which is a file name or path specification, an optional +character which indicates the mode of access, and an optional character +string containing option codes. The modes are + + 'r' Open file for read access. + 'w Open for write access. + 'a' Open the file for write access and position the file + access pointer at the end of the file. + +The default mode is 'r' for read. Normally, I/O is character oriented +and produces text files. In particular, the lines of a text +file are assumed by the user to be separated by newline +characters with any conversion to the system format performed +by the I/O routines. + +If an options string is given and contains the character "b", +then I/O is integer oriented and the file is processed in image mode. + +If an options string is given containing the character "s" then I/O is +performed to or from a buffer in memory. A pointer to the buffer is +passed in place of the filename argument. Closing a string I/O file +that is open for write will append a NULL character to the string and +return a character pointer to that character. + +COPEN returns a CHANNEL, which is a pointer to a control block, if the +open is successful. It and returns -1 in case of error and leaves the +system error code in the external variable CERRNO. The macro variable +OPENLOSS is frequently set to -1 in macro packages. + +The default filename components are DSK for the device, SNAME for the +directory, and > for the second filename. No default is supplied for +the first filename. COPEN is careful to treat the TTY as a special +case. If a disk file is locked, then COPEN will wait until the file can +be opened. + +The routine GETCHAR takes no arguments, reads a character from the +standard input, and returns it. Zero is returned on end of file. + +The routine GETS takes a character pointer as an argument and reads one +line from the standard input, placing the null terminated line into the +buffer. The newline character is not included. No value is returned +and the buffer is assumed to be large enough. + +The routine PUTCHAR takes a single character as an argument and writes +it on the standard output. The character is returned. + +The routine PUTS takes a character pointer as an argument and writes the +character string and a NEWLINE on the standard output. No value is +returned. + +The routine FPRINT takes a single or double precision floating point +number and a file descriptor as arguments, and prints the floating point +number on the specified file. Both arguments are required. Eight +significant digits are printed in either fixed or floating point +notation depending on the magnitude of the number. + +The routine ATOI takes a character string as an argument and returns an +integer. The first character must be either a digit or a minus sign and +radix 10 is always used. + +The routine MOPEN is called by COPEN to open a file. The TTY is treated +as a special case and MOPEN will wait on a locked file until the file +becomes available. MOPEN requires a FILESPEC and a code word of ITS +access mode bits as arguments. MOPEN calls OPEN to open non-TTY files. +If the file is successfully opened, the ITS channel number is returned, +otherwise the negative ITS failure code is returned. + +The routine MCLOSE can be used to close a channel opened by MOPEN. An +attempt to close the TTY is ignored. A file opened by a call to COPEN +must be closed by a call to CCLOSE, not MOPEN. + +The routine FPARSE will convert an ASCIZ string representation of an ITS +filename or pathname to a FILESPEC. The routine requires a character +string pointer to the filename and a pointer to a FILESPEC block. Zero +is returned if the filename could be parse and -1 is returned if the +format was incorrect. + +The routine PRFILE will convert a FILESPEC into a filename in ITS +format. The arguments are a pointer to the FILESPEC and a character +string pointer to a buffer that is assumed to be large enough for the +resulting filename. Control-Q is placed in front of delimiters that +occur in the components of the filename. + +The routine FOPEN calls FPARSE to parse a filename and then calls OPEN +to open the file. The default filename components are DSK for device +and RSNAME for directory. There are no defaults for the first or second +filenames which s why this routine is almost never used. But if you +really wich to use it, the arguments are a character string pointer to +the filename and an integer containing the ITS mode bits. The ITS +channel number is returned if the open is succesful, otherwise the +negative ITS failure code is returned. + +The routine OPEN takes a FILESPEC and a integer containing the ITS mode +bits, opens the indicated file, and returns the ITS channel number if +the open was successful and the negative ITS failure code if the open +was unsuccessful. The routine calls CHNLOC to find an available channel. +(*note: System Calls.) + +The routines FXARG, C0OPEN, C0INIT, and PRSARG are called by the C +startup procedure to open the TTY as the standard input, output, and +error output I/O streams, parse the JCL, and redirect the standard I/O +streams if the JCL line contains specifications for standard I/O +redirection. + +The routine VALRET takes an ASCIZ character string as an argument and +valrets the command string. In some cases the command string will be +overwritten with garbage. + +The following two routines take a single character as an argument and +return a character of the indicated type as a result. + + CCTO6 Convert ASCII character to sixbit + C6TOC Convert sixbit character to ASCII + +The routine CSTO6 takes a character string pointer as an argument and +returns an integer containing the left justified sixbit representation +of the ASCII character string. The routine C6TOS takes an integer and a +character string pointer as arguments and expands the sixbit characters +in the integer into the character buffer. + +The following Portable I/O routines were hand coded in MIDAS. The +calling sequences of most of these routines are described in the node on +Support Routines (*note MIDAS I/O Routines: Support Routines.) +All of the routines take Portable I/O file descriptor pointers as +arguments, not ITS channel numbers. + + CGETC Read character + CPUTC Write character + CGETI Read integer in image mode + CPUTI Write integer in image mode + UNGETC Push character back into I/O stream + CEOF Test for end of file + CFLUSH Flush buffer + REW Reset channel to beginning + CCLOSE Close file + CLOSALL Close all files + ISTTY Return true if file is a TTY + CISFD Return true if pointer is a file descriptor + ITSCHAN Return the ITS channel number corresponding + to the file desciptor + + +In the Portable I/O Library, the file descriptor points to a file +control block that contains the information on the state of the file. +The structure of a Portable I/O Library file control block is described +below. The fields begin at the left edge of the word. + + struct FCB { + int *fbuffp : 18, /* Pointer to buffer */ + fchan : 4, /* ITS channel number */ + fdevice : 6, /* Device code */ + fflag : 8; /* Flag bits (described below) */ + int *fbprt, /* Pointer to next + character or word in buffer */ + fbnt, /* Number of characters + or words in buffer */ + int fucnt : 18, /* Number of characters + in UNGETC buffer */ + *fuptr : 18; /* Pointer to UNGETC buffer */ + int fclsr() : 18; /* Address of close routine */ + int fngetr(): 18; /* Address of normal close routine */ + int fgetcr(); /* Address of CGETC routine */ + int fputcr(); /* Address of CPUTC rouine */ + }; + + typedef FCB *FD; /* Type definition for the file descriptor + returned to the user */ + +/* Flag bits for the fflag field in the FCB */ + +#define PHYEOF 01 /* Physical EOF */ +#define OPEN 02 /* File is open */ +#define WRITE 04 /* Write access */ +#define TTY 010 /* File is TTY */ +#define UNSET 020 /* Device and channel not yet set */ + + +The size of the buffers used by the Portable I/O Library are 200 octal +words. A maximum of ten Portable I/O Library files may be open at any +time. The number of character that may be pushed back into the I/O +stream is limited to 20. + +The source for the Portable I/O Library exists in the several files in CLIB; + + C10IO C Most of the routines that are written in C + FPRINT C The floating point print routine + ATOI C The ASCII string to integer converter + C10MIO CMID The core routines written in MIDAS + + +Node: Storage Allocation, Previous: Basics, Up: Top, Next: String Routines + + + Storage Allocation + +The Shared Library provides a storage allocator for use by C users. The +storage allocator maintains a linked list of free blocks and merges +adjacent free blocks to minimize fragmentation. The operating system is +called to obtain additional memory pages only when necessary. The +allocator will return a pointer to a zeroed block of the requested size. + +calloc (size) Returns a pointer to a zeroed block of + characters of the requested size. + +cfree (p) Frees a block allocated by calloc. The blocks + may be freed in any order, but it is a gross + error to free a block that has not been + allocated by calloc. + +salloc (size) Allocate a zeroed block of words of the + requested size. + +sfree (p) Free a block allocated by salloc. + +alocstat (nwalloc, nbfree) + Compute allocation statistics. The number of free words is + returned. The number of words that have been allocated by the + operating system and the number of free blocks are returned via + pointers supplied as arguments. + +getcore (size) Calls the page allocation routine in the page + handling package to obtain the requested amount + of space and updates the allocation statistics. + This routine is for internal use. + + +The source code is in the files CLIB;ALLOC CMID and CLIB;C10COR CMID. + + +Node: String Routines, Previous: Basics, Up: Top, Next: Character Arrays + + + Character String Routines and Related Functions + +The Shared library includes several basic routines for manipulating +character strings, byte strings, and bit strings. + +slen (s) Returns the length of a character string excluding the + terminating null character. + +stcpy (s1, s2) Copies string s1 to s2 and returns a pointer to the null + byte at the end of the new copy. The space occupied by + s2 is assumed to be large enough for s1. + +stcmp (s1, s2) Returns TRUE is character strings s1 and s2 are the same + length and have equal contents. + +lower (c) Convert character to lower case. + +upper (c) convert character to upper case. + +bget (s, i) Extract the i th bit from the bit string stored at s. + The bit string begins on a word boundary and the index + origin starts at zero. + +bset (s, i) Set the i th bit. + +ildb (pbp) Do an increment and load byte on the byte pointer stored + at the address supplied as an argument. + +idpb (ch, pbp) Do an increment and deposit byte with the byte and byte + pointer address supplied as arguments. + +These routines are in the file CLIB;STRING CMID and all of these routines +are in the Shared Library. + + +A couple of basic string pattern matching routines are contained in the +file CLIB;MATCH C. The routines defined in this file are described below. + + smatch (p, s) The pattern p is a character string which is to + be matched against the data string s. Certain + characters in p have special meanings. + + '*' Match any substring + '?' Match any character + '\\' Quote following character + + + sindex (p, s) Return the index of the first occurrence of the + string p in the string s. Return -1 if p does + not occur in s. + + +Both of these routines are included in the Shared Library. + + +An interface to the PDP-10 block transfer (BLT) instruction has been +provided as a routine in the source file CLIB;BLT CMID. + + blt(source, dest, number) Will transfer the indicated + number of words from the source + address to the destination + address. + +This routine is available in the Shared Library. + + +The Portable I/O Library contains routines for converting between +characters and strings in sixbit and ASCII formats. +(*note SIXBIT: Portable I/O Library.) + + +Node: Character Arrays, Previous: Basics, Up: Top, Next: Math Functions + + + Array of Characters Package + +The array of characters package supports operations on character strings +stored in a representation that allow the strings to grow and shrink +dynamically. The routines in the array of characters package use the +dynamic storage allocator that is included in the Shared Library. +(*note Dynamic Storage: Storage Allocation.) + +The data structures used in the package can be defined by + +struct rep { /* The representation for an + array of characters */ + int count; /* Reference count */ + char *s; /* Character buffer pointer */ + int csize; /* Logical size of the array */ + int msize; /* Actual size (at least csize + 1) */ +}; + +typedef rep *AC; /* What the user works with */ + + +The functions supported by the package are described below. + + ac_new () => ac create empty array + ac_alloc (size) => ac create empty array withpreferred size + ac_create (string) => ac create with initial value + ac_xh (ac, c) => c extend array with character + ac_trim (ac) => ac trim excess storage + ac_fetch (ac, i) => c fetch character from array + ac_link (ac) => ac make new link to array + ac_unlink (ac) remove link to array + ac_puts (ac, f) print array + ac_cat (ac, ac) => ac concatenate arrays + ac_copy (ac) => ac copy array + ac_string (ac) => *char return string version + ac_size (ac) => size return current size of array + ac_flush (ac) make array empty + ac_n () => int return # of active arrays + + +The default initial storage allocation for creating character arrays is +8 words. One character is stored per word. + +The source for the array of characters package is in CLIB;AC C. + + +Node: Math Functions, Previous: Basics, Up: Top, Next: Dates and Times + + + Floating Point Routines + +All routines accept a single argument that is of type float or double. +Note that C converts all floating point arguments to double precision. + +Most of the floating point routines are in CLIB;CFLOAT CMID. The functions +contained in this package are listed below. + + LOG EXP COS SIN + ATAN SQRT DTRUNCATE DROUND DABS + + +In addition, a random number generator was borrowed from MUDDLE and +exists in the file CLIB;RANDOM CMID. Two routines are available. + + SRAND Set the random number generator seed + RAND Get a random number. + The number is a large integer. + + +Both of these packages are in the Shared Library. + + + +Node: Dates and Times, Previous: Basics, Up: Top, Next: System Routines + + + Date and Time Routines + +This node documents the routines that are available for manipulating +dates and times in various formats. To acquire a file date or the +running time of a program consult the documentation on system calls. +(*note Calls: System Calls.) + + +The date manipulating routines handle three representations for dates. + +(1) CAL Calender date, a system-independent representation + consisting of a record containing six integers + for the year, month, day, hour, minute, and second. + +(2) FDATE The ITS date representation used in file directories. + +(3) UDATE The UNIX date representation, seconds since + Jan 1, 1970, GMT. + +The routines for manipulating these date representations are + + u2cal (udate, cal) Convert UDATE to CAL format + udate = cal2u (cal) Convert CAL format to UDATE format + f2cal (fdate, cal) Convert FDATE to CAL format + fdate = cal2f (cal) Convert CAL format to FDATE + prcal (cal, fd) Print the date and time in CAL format + using the Portable I/O Library file + descriptor supplied in the call + +The structure of a CAL format date can be described by the following +structure. + + struct CAL {int year, month, day, hour, minute, second;}; + +Note that the routines that manipulate CAL format dates require a +pointer to such a structure. All of these date handling routines are +available in the Shared Library. The source is in the file CLIB;DATE C. + + +For printing times, the following routine is useful + + pr60th (time, fd) Will print the time (supplied in units + of 1/60 seconds) using the Portable I/O + Library file descriptor supplied in the + call. The display format is HH:MM:SS.XX + +The source for this routine is in CLIB;PR60TH C. This routine is included +in the Shared Library. + + +Node: System Routines, Previous: Basics, Up: Top, Next: Timer Package + + + System Routines + +This node documents packages of routines that manipulate system +resources such as jobs and memory pages, implement a facility for +handling exceptional program conditions, and provide interfaces to +operating system calls. + +* Menu: + +* TTY I/O Routines:: A package for terminal I/O. + +* Interrupts:: A package for handling user program interrupts. + +* Job Handling:: A package for manipulating ITS jobs. + +* Page Handling:: A package for hacking memory pages. + +* System Calls:: A package of interfaces to ITS system calls. + + +Node: TTY I/O Routines, Up: System Routines, Next: Interrupts + + + The TTY I/O Package + +This node documents the facilities provided by the TTY I/O Package for +performing terminal I/O. Note that this package is used by the Portable +I/O Library for performing TTY I/O and since the Portable I/O Library +provides a more general I/O interface, users are advised to avoid +circumventing the Portable I/O Library by calling the TTY I/O routines +directly. However, the TTY I/O Package provides certain facilities that +are not available in the Portable Library such as unbuffered terminal +input. The TTY I/O routines may be used in conjunction with the +Portable I/O Library for performing terminal I/O without conflict. + + +tyiopn() Open the TTY input channel. The channel number of the TTY + input channel is returned. Even if this routine is + called several times the TTY input channel is only + opened once and only one input buffer is maintained. + +tyi() Read a character from the TTY. The transfer is fully + buffered. + +utyi() Read a character from the TTY. The transfer is + unbuffered and unechoed. The TTY output buffer will be + forced out before the read is performed. + +get_buf(buffer, size, break, prompt) + Read a string from the TTY into the indicated buffer + until the buffer is filled, a NUL character is received, + or the indicated break character is received. The + indicated prompt string (or the default prompt string, + as set by setprompt) is output to the terminal, + unless characters are already available in the TTY input + buffer. The number of characters read is returned. + +setprompt(s) Set the default prompt for terminal reads via tyi(). + +tyoopn() Open the TTY output channel. The channel number is + returned. Even if this routine is called multiple + times, only one TTY output channel is created and only + one TTY output buffer is maintained. + +tyo(c) Output the character to the TTY. The transfer is fully + buffered. + +utyo(c) Output the character to the TTY without buffering. The + TTY output buffer is forced out before the transfer. + +spctty(c) Output the display code without buffering. The TTY + output buffer is forced out before the code is output. + +tyos(s) Output the character string to the TTY with buffering. + +tyo_flush() Force out the TTY output buffer. The characters are + written out using an SIOT operating system call. + +The global variable ttynp contains the address of the ^L handler. The +default ^L handler is ttxnp. When a ^L is encountered in the input +stream, the prompt and the input buffer are redisplayed and the output +buffer is forced out. Before the prompt is redisplayed a carriage +return or the special display code 'C' is output, depending on whether +the terminal is a display. + + +There are two routines that are internal to the TTY package. + +ttyih() TTY interrupt handler +ctrlch(c) Return display width of character + + +The source code for the TTY I/O Package is in CLIB;C10TTY C. + + +Node: Interrupts, Up: System Routines, Next: Job Handling + + + The C Interrupt Package + +The file CLIB;C10INT CMID contains the code for the C interrupt handling +system. Two basic routines are provided by this package for setting an +interrupt handler and signalling an interrupt. + +on (number, handler) Specifies the routine to be invoked when the + interrupt occurs. A pointer to the new handler + is provided in the call. The address of the old + interrupt handler is returned. The number + supplied in the call is one of the code numbers + used in the C interrupt system. + +signal (number) Signal the specified interrupt. + + +Default handlers have been provided for ^S and ^G interrupts. The +default ^S handler flushes the TTY output buffer and inhibits output. +The default ^G handler flushes the TTY output buffer, inhibits output, +and produces a dump of the runtime stack. + +The code numbers used by the C interrupt system are defined in CLIB;C DEFS. +All of the routines described above are in the Shared Library. + + +Node: Job Handling, Up: System Routines, Next: Page Handling + + + Routines for Manipulating Jobs + + +Two simple routines for executing inferior jobs are available, but these +routines are not included in the Shared Library. + + +execs (pname, args) Execute a program with the supplied command string. + +execv (pname, argc, argv) Execute a program with the supplied + argument vector which is in the same + format as the parameters of a main routine. + +Both routines return the following return codes. + + -5 Job valretted something and was not continued + -4 Internal fatal error + -3 Unable to load program file.d + -2 Unable to create job + -1 Unable to open program file + 0 Job terminated normally + other Job terminated abnormally with said PIRQ + +Both routines set the following global variables. + + exctime Job CPU time in 1/60 sec. units + exccode Contents of job's loc 1 at termination + +The source for these routines is in CLIB;C10EXC C. + + +For more sophisticated job handling, use the package in CLIB;C10JOB C. +These routines are not included in the Shared Library. + +The representation of a job is an integer with a value from 0 to 7, +indicating the inferior job number. + + j_create (jname) => # or error code + + j_load (filespec) => # or error code + j_fload (file_name) => # or error code + j_cload (channel, jname) => # or error code + j_own (uname, jname) => # or error code + + error codes: + + -1 unable to open program file + -2 unable to create job + -3 unable to load job + -4 fatal error + -5 (OWN) no such job + -6 (OWN) job not yours + + j_start (#) => rc (return code: non-zero => error) + j_stop (#) => rc + j_disown (#) => rc + j_forget (#) => rc + j_kill (#) => rc + j_snarf (#, inferior_name) => rc + (disown named inferior from stopped job) + j_give_tty (#) => rc + j_take_tty (#) => rc + + j_grab_tty () (grab tty if given to some inferior + and stop job) + j_retn_tty () (return tty to inferior and restart) + + j_wait (#) => status (waits for non-zero status) + j_sts (#) => status + + j_onchange (f) (set handler for status changes) + + j_sjcl (#, s) => rc (set jcl for job) + j_jcl (#) => s (get jcl) + j_ch (#) => ch (return block image output channel to job) + j_name (#, filespec) (set filespec to job name) + + j_val (#) => s (return string valretted by job) + j_fval (#) (flush valret string; or call cfree) + + Job Status: + + -5 => stopped, ^Z typed + -4 => stopped (by superior) + -3 => stopped, valret + -2 => stopped, requested suicide + -1 => no job + 0 => running + >0 => stopped, value is job's first interrupt word + + +Node: Page Handling, Up: System Routines, Next: System Calls + + + Routines for Manipulating Memory Pages + + +The routines documented in this node provide capabilities for managing +memory pages. In order to avoid conflicts with other routines in the C +runtime environment, these routines should be used for allocating memory +pages. Note that the storage management routines documented elsewhere +in this file use these page handling routines, so the manipulation of +memory pages via these routines in conjunction with the use of the +storage allocation package is permissible. + + +pg_get (n) + Allocates n contiguous, unused pages in the address space. + The number of the lowest page allocated is returned. If the + request cannot be met, -1 is returned. + +pg_ret (page, n) + Deallocates n pages in the address space, starting + with the page number supplied in the call. The + routine returns a nonzero value if an error occurs. + +pg_exist (page) + Returns TRUE if the page exists in the address space. + +pg_nshare (page) + Return number of times that a page is shared. + +The source for these routines is in CLIB;C10PAG C and all of these routines +are available in the Shared Library. + + +There are two routines available in the Shared Library for mapping disk +files into memory. + + +filmap (chan, offset, size) + Map part of the disk file open on the specified channel into + memory. The size and offset, in words, of the section to be + moved into memory is specified in the call. The routine returns + a pointer to the beginning of the section in memory. If the + mapping fails because of insufficient memory space, the routine + prints an error message on the standard output and returns + zero. If an error occurs while reading a page, then an error + message is printed on the standard output and a ponter to the + section successfully read is returned. + +filunmap (ptr, size) + Frees pages mapped into memory by filmap. The size of the + section in words and a pointer to the section are supplied in + the call. The contents of the pages are not copied back to + the file. + + +The source for these two routines is in CLIB;C10MAP C and the routines are +included in the Shared Library. + + + +Node: System Calls, Up: System Routines, Next: System Routines + + + ITS System Calls Supported by the Shared Library + +This node documents the system calls that are available through +interfaces in the Shared Library. Before using the routines listed +here, you should check the nodes pertinent to your application to +determine whether there are other routines available that provide more +amicable interfaces to the operating system. + +In general, the system call interfaces are written so that argument +values for the calls are provided as arguments of the procedure +invocation, in the order in which the arguments are expected in the +call, and the routines return the negative value of the ITS failure code +if an error occurs. Note that all channel numbers are ITS channel +numbers, not Portable I/O Library file descriptors, all names are in +sixbit, and all dates are in ITS date format. A package has been +written for manipulating dates and is described in this file. +(*note Dates: Dates and Times.) + + +sysopen (chan, filspc, mode) Open the specified channel +close (chan) Close the specified channel +chnloc() Find and return the number of an + unused channel +uiiot (chan) Perform a unit input IOT +uoiot (chan, data) Perform a unit output IOT +sysread (chan, buffp, nwords) Perform a block input IOT +syswrite (chan, buffp, nwords) Perform a block output IOT +siot (chan, bytp, nbytes) String IOT + +sysfinish (chan) Force output to finish on the + specified channel and wait for + completion +sysforce (chan) Force output to finish, but do + not wait + +reset (chan) Reset channel +status (chan) Get channel status +rfpntr (chan) Read file access pointer +access (chan, pos) Perform random access on channel +fillen (chan) Return the file length in units + appropriate to the mode in which + the file was opened +filnam (chan, filspec) Get file name by which channel + was opened +rauth (chan) Read the name of the author of + the file +sauth (chan, author) Set the name of the file author +rdmpbt (chan) Read the file dump bit +sdmpbt (chan, bit) Set the file dump get +sreapb (chan, bit) Set the file reap bit +rfdate (chan) Read the file creation date +sfdate (chan, fdate) Set the file creation date +srdate (chan, fdate) Set file reference date +dskupd (chan) Update file information +resrdt (chan) Restore file information + +ttyget (chan, block) Get TTY status + (writes 3 values into the block) +ttyset (chan, block) Set TTY status + (reads 3 values from the block) +cnsget (chan, block) Get console status + (writes 5 values into the block) +cnsset (chan, block) Set console status + (reads 5 values from the block) + +whyint (chan, block) Return block of information on + interrupt status +ityic (chan) Read tty interrupt character +syslisten (chan) Listen for input on the channel + (the number of characters is returned) +rcpos (chan) Read TTY cursor position as half + words (v,,h) +scml (chan, number) Set the number of command lines + at the bottom of the screen + +getcpu () Return cpu time in 4.069 micro seconds +cputm () Return cpu time in 1/60 seconds +sleep (time) Sleep for time specified in + 1/30 second units +etime () Return elapsed time 1/60 seconds +now (pcal) Get current date and time + +corblk (a1, a2, a3, a4, a5) Perform page hacking +cortyp (pagno, output) Get information about page +pageid (vpn, idn) Get named public page +pgwrit (job, vpn) Write page to disk + +rsname Read sname +ssname (name) Set sname +runame Read user name + +rsuset (where) USET hacking +wsuset (where, what) +ruset (who, where) +wuset (who, where, what) +wusrvar (job, spec, value) + +delete (fname) Delete a file given an ASCIZ + string containing the + filename. (The Portable I/O + routine fparse is used to + parse the filename.) +sysdelete (filspc) Delete file +renmwo (chan, filspc) Rename file open for output +sysrnm (fspec1, fspec2) Rename file given by fspec1 +syslnk (filsp1, filsp2) Create link from fspec1 to fspec2 +dirsiz (chan, block) Get info on directory size + (writes 2 values into the block) + +tranad (job, from, to, flags) File translation hacking +trancl (job, flags) +trandl (job, filspc, flags) + +sysload (job, chan) Load a program +pdump (jobch, dskch) PDUMP a program +uclose (jchan) Destroy an inferior job +sysdisown (jchan) Disown job +reown (jchan) Reown job +sysdtach (jchan) Detach job +sysatach (jchan, tty) Attach job (TTY < 0 implies default) +atty (job) Give tty to inferior +dtty (job) Take tty from inferior +wfnz (ptr) Wait for word to become non-zero +wfz (ptr) Wait for word to become zero +val7ret (str) Valret an asciz string +demsig (demon) Signal a demon process + +sstatus (valblk) Get system status + (returns 7 values) + +maktag (tagp) Make a tag table entry +gotag (tagp) + +For information on the details of the system calls consult the system +documentation. Try + + :DOC CALL + +at DDT command level. + + +Node: Timer Package, Previous: Basics, Up: Top, Next: Debugging + + + Timing Procedure Calls + +A package of routines is available to gather statistics on the time +spent in the various procedures of a program. The procedure calls are +monitored and a report which includes the percentage of CPU time spent +in each procedure is generated. + +[This package is still being debugged and documented.] + + +Node: Debugging, Previous: Basics, Up: Top, Next: Graphics + + + Debugging Packages + +The code produced by the C compiler contains the hooks for implementing +the C versions of Lisp BAKTRACE and BREAK. A first attempt at BAKTRACE +exists in the file CLIB;STKDMP C. This routine is called by the ^G handler +and so a basic back trace can be obtained by typing control-G. The +stack dump routine displays the names of the called procedures and the +procedure arguments in octal. + +These facilities will be expanded in time. + + +DDT provides several useful facilities for debugging C programs at the +assembly language level. + + ^N Single steps through a program. + p,^N When invoked at the beginning of a subroutine + will continue execution until the subroutine returns. + +There are facilities for setting breakpoints and listing memory +locations. (*note DDT: (INFO;DDT >)Top.) + + + +Node: Graphics, Previous: Basics, Up: Top, Next: Miscellaneous + + + Computer Graphics and Image Processing + +This node documents the packages available in C for displaying graphics +and performing image processing tasks. + +* Menu: + +* TV Graphics:: A basic package for graphics on the Knight TV's. + +* Image files:: A package for reading and writing images in the + new image file format. + + +Node: TV Graphics, Previous: Graphics, Up: Graphics, Next: Image Files + + + TV Graphics Package for Knight TV's + + +The TV graphics package provides the basic primitives for drawing +points, lines, and circles on the Knight TV displays. All of the +drawing functions test to see if the TV has been opened. If it has not +been opened, the TV is opened by calling tvsetup(), which allocates +space for the TV buffer, initializes the TV buffer memory map, clears +the screen, and sets the buffer update mode to inclusive-OR mode so that +overlapping lines will not erase the common point of intersection. The +routine tvwrite uses a default mode of set, since that is advantageous +for dumping images on the screen. All of the line drawing functions +take arguments in units of raster points. The raster is 454 lines high +with 576 bits per line and the origin is at the lower left corner of the +TV screen. All routines return one if the requested operation could be +performed, and zero otherwise. Note that in all cases, the specified +operation will not be performed if it would result in movement off of +the screen. The routine testp is the only plotting function that does +not return 0 if the point is off of the screen or the TV could not be +opened. + + point(x, y) Draw dot at the raster point (x, y). Returns + 0 if the point was not on the screen or the TV + could not be opened, otherwise returns 1. + + testp(x, y) Returns 1, if the indicated point is set, + 0, if the indicated point is clear, and + -1, if the indicated point is off the screen or + if the TV could not be opened. + + line(x0, 01, y1, y1) + Draw a line from (x0, y0) to (x1, y1). Returns + 0 is either point is off the screen or if the TV + mapping could not be established, otherwise + returns 1. + + connect(x, y) Draws a line from the last point drawn to the + indicated point. Returns 1, if the new point lies + on the screen and the line could be drawn, 0 otherwise. + + move(x, y) Move to (x, y) without drawing anything. It + does not open the TV if it is closed. Move + returns 0 if the point is off of the screen, and + returns 1 otherwise. + + circle(x, y, r) Draw a circle of radius r, centered at (x, y). + Returns 1, if the circle is inside the screen and + was successfully drawn, otherwise returns 0. + + tvwrite(line, word, data) + Write the 32 rightmost bits of the supplied data + to the indicated line and word of the TV buffer. + If necessary, the TV is opened and the mode is + changed to SET mode. The routine checks for out + of bounds line and word arguments. Returns 1, + unless the write could not be performed. + + +The following routines are not usually used by casual users since the +graphics primitives described above initialize the state of the TV. + + + tvopen() Map PDP-10 memory onto PDP-11 memory. + A pointer to the beginning of the TV array is + returned, but this pointer may be ignored by + the caller, since the package uses its own copy + of the TV array pointer. A return code of 0 + indicates failure. + + tvclose() Undo the mapping and release the memory used + by the TV array. No value is returned and no + action is taken if the TV is not open. + + tvmode(mode) Change the mode by which updates to the TV + buffer are affected. Useful modes are defined + as macro variables in CLIB;TV H. The previous + mode is returned. + + tvclear() Clear the screen. The cursor is moved to the + lower left corner of the screen. The request is + ignored if the TV is not open and no value is + returned. + + tvbase() Return the value of the TV pointer. The TV pointer + is always 0 if the TV memory is not mapped into + the program address space. + + tvsetup() Open TV, clear the screen, and set the TV mode + to inclusive-OR mode. A pointer to the TV buffer + is returned or 0 if the TV could not be opened. + + +The source for all of these routines is in CLIB;TV C. These routines +are not part of the Shared Library. + +For information on the algorithms used in this graphics package, +consult + + B. K. P. Horn, "Circle Generators for Display Devices", + Computer Graphics and Image Processing 5, 280-288 (1976). + + +Node: Image Files, Previous: TV Graphics, Up: Graphics, Next: Graphics + + + +Node: Miscellaneous, Previous: Basics, Up: Top, Next: Internals + + +This node documents some assorted routines that perform various +interesting and useful functions that do not fit into the taxonomy for +the rest of the C documentation. + +CLIB;C10EXP C Contains a routine for expanding an argument vector that + contains filenames with wild characters. + +CLIB;C10FD C Contains FDMAP(P,F) which calls the procedure F(S) for + all filenames S that match a pattern P. This source + file also contains some routines for working with + directories. + +CLIB;C10FIL C Contains RENAME(S1,S2) which renames file S1 to S2 and + is claimed to work even if S2 exists. + +CLIB;C10FNM C Contains routines for manipulating filenames. + +CLIB;C10STD C Contains interface routines for implementing the + functions of the Standard I/O Library using functions in + the Portable I/O Library. + +CLIB;APFNAM C Contains some routines for appending suffixes to + filenames. + +CLIB;GETSRV C Contains a routine for looking up ARPA net servers. + +CLIB;C10TAP CMID + Contains a magtape interface. + + +Node: Internals, Previous: Basics, Up: Top, Next: Top + + + +---LOCAL Modes:--- +---Mode: Text--- +---Fill Column:72--- +---Auto Fill Mode: 1--- +---End:-- diff --git a/doc/kcc/-read-.-this- b/doc/kcc/-read-.-this- new file mode 100755 index 000000000..8f66156a7 --- /dev/null +++ b/doc/kcc/-read-.-this- @@ -0,0 +1,3 @@ +This directory contains files for the KCC PDP-10 C compiler. Currently +there is just this one directory, but as the port progresses I'm sure there +will be others. -- Alan diff --git a/doc/kcc/agree.txt b/doc/kcc/agree.txt new file mode 100644 index 000000000..3aa89c5cd --- /dev/null +++ b/doc/kcc/agree.txt @@ -0,0 +1,96 @@ + KCC DISTRIBUTION POLICY + + This file describes the general KCC distribution policy -- +licensing, restrictions, and that sort of thing. If you're not sure +how it applies to your particular situation, just get in touch. + + First, note that the files are copyrighted. However, we +consider them "quasi-public" and distribute them freely; the problem +is that sometimes true public-domain stuff is acquired by private +parties and modified slightly to produce a licensed, costly product. +We wish to prevent this by keeping the sources available to everyone +who wants to use KCC, but unavailable to those who have ideas of +selling it; hence the copyright. This applies to all modifications as +well. + + Second, since the software is provided free of charge, there +is absolutely NO WARRANTY on anything in this distribution, nor any +obligation to provide maintenance, and all of the usual software +disclaimers apply to everything. If we were to be held responsible +for any problems, we could not distribute KCC at all. + + The situation with respect to including KCC as a component of +commercial software packages is fuzzy. Our current inclination is to +allow this as a convenience to the ultimate end users, provided they +are given ALL of the distribution, including sources and this notice. +However, certain cirumstances may force re-assessment of this +position; e.g. extensive modifications, huge numbers of users, +time-consuming maintenance requirements... people with such +applications in mind should contact us. + + People may be tempted to modify and "improve" the software. +This is natural and to some extent desirable, but can quickly lead to +chaos without some rules governing these additions and modifications. +So we simply state that the use of KCC automatically implies agreement +with the following policies: + +General: + 1. KCC is maintained as a primary software tool for SRI-NIC, +and ensuring that it remains reliable and useful for this purpose must +necessarily have our highest priority. + 2. Next most important is conformance to the C language +defined by Harbison and Steele, plus the forthcoming ANSI C standard +(currently a X3J11 committee draft). This includes library functions. + 3. Software portability, particularly to 4.3BSD-type UN*X, +is slightly more important than TOPS-20 efficiency. + 4. Licensed UN*X software sources will never be used or +distributed, and such contributions cannot be accepted. + +People making changes to KCC or the library should: + + 1. Retain the copyright notice for each module, and add a +history notice comment describing the change. + 2. Relay your improvements to the maintainers of the canonical +version, so that they can be incorporated into new releases! +Otherwise both you and the rest of the world will lose. + 3. Realize that your changes may not be adopted exactly as +provided, if they conflict with one of the general policies. New +library functions are particularly prone to this problem. As a +solution we will probably collect such things into a separate library +or two (e.g. for TOPS-20 specific functions). + +Finally: + If at all possible, ask people who want copies of KCC to + get it from the canonical source. If you must give + them a copy, keep all of the original distribution + intact in some form so that the baseline is constant. + + + KCC is still under active development, and new releases (with +all accumulated bug fixes, improvements, or new features) can be +expected frequently. At all times there will exist only one canonical +version of the software, from which all distributions are made. If +you can make an Internet FTP connection to SRI-NIC.ARPA, you can +retrieve it whenever you wish. + + Canonical version: KCCDIST: directory on SRI-NIC.ARPA + Maintainer mailbox: + Information list: + (to get on:) + +BUG-KCC is for bug and problem reports and is sometimes used for +discussion of esoteric internal issues. INFO-KCC members basically +receive announcements of new releases and developments, and every site +which has installed KCC should have at least one representative on +that list. If enough users express interest, a user discussion group +could also be started (probably this would deal with C on TOPS-20 in +general rather than just KCC). + + Good luck! Feel free to contact me about any problems or questions +you have. + +Ken Harrenstien Internet: Phone: (415) 859-6552 +Room EJ200 +SRI International +333 Ravenswood Ave. +Menlo Park, CA 94025 diff --git a/doc/kcc/calls.intro b/doc/kcc/calls.intro new file mode 100755 index 000000000..16b60b314 --- /dev/null +++ b/doc/kcc/calls.intro @@ -0,0 +1,963 @@ +From Sun Release 3.2 Last change: 16 July 1986 +Liberated 4/15/88 by ALAN + + +INTRO(2) SYSTEM CALLS INTRO(2) + + + +NAME + intro - introduction to system calls and error numbers + +SYNOPSIS + #include + +DESCRIPTION + This section describes all of the system calls. A "(2V)" + heading indicates that the system call performs differently + when called from programs that use the System V libraries + (programs compiled using /usr/5bin/cc). On these pages, + both the regular behavior and the System V behavior is + described. + + Most of these calls have one or more error returns. An + error condition is indicated by an otherwise impossible + return value. This is almost always -1; the individual + descriptions specify the details. Note that a number of + system calls overload the meanings of these error numbers, + and that the meanings must be interpreted according to the + type and circumstances of the call. + + As with normal arguments, all return codes and values from + functions are of type integer unless otherwise noted. An + error number is also made available in the external variable + errno, which is not cleared on successful calls. Thus errno + should be tested only after an error has occurred. + + Each system call description attempts to list all possible + error numbers. The following is a complete list of the + errors and their names as given in . + + 0 Error 0 + Unused. + + 1 EPERM Not owner + Typically this error indicates an attempt to modify a + file in some way forbidden except to its owner or + super-user. It is also returned for attempts by ordi- + nary users to do things allowed only to the super-user. + + 2 ENOENT No such file or directory + This error occurs when a filename is specified and the + file should exist but doesn't, or when one of the + directories in a pathname does not exist. + + 3 ESRCH No such process + The process or process group whose number was given + does not exist, or any such process is already dead. + + 4 EINTR Interrupted system call + An asynchronous signal (such as interrupt or quit), + which the user has elected to catch, occurred during a + system call. If execution is resumed after processing + the signal, and the system call is not restarted, it + will appear as if the interrupted system call returned + this error condition. + + 5 EIO I/O error + Some physical I/O error occurred. This error may in + some cases occur on a call following the one to which + it actually applies. + + 6 ENXIO No such device or address + I/O on a special file refers to a subdevice which does + not exist, or beyond the limits of the device. It may + also occur when, for example, a tape drive is not on- + line or a disk pack is not loaded on a drive. + + 7 E2BIG Arg list too long + An argument list longer than 10240 bytes is presented + to execve. + + 8 ENOEXEC Exec format error + A request is made to execute a file which, although it + has the appropriate permissions, does not start with a + valid magic number (see a.out(5)). + + 9 EBADF Bad file number + Either a file descriptor refers to no open file, or a + read (respectively, write) request is made to a file + which is open only for writing (respectively, reading). + + 10 ECHILD No children + A wait was executed by a process that had no existing + or unwaited-for child processes. + + 11 EAGAIN No more processes + A fork failed because the system's process table is + full or the user is not allowed to create any more + processes. + + 12 ENOMEM Not enough memory + During an execve, brk, or sbrk, a program asks for more + address space or swap space than the system is able to + supply, or a process size limit would be exceeded. A + lack of swap space is normally a temporary condition; + however, a lack of address space is not a temporary + condition. The maximum size of the text, data, and + stack segments is a system parameter. Soft limits may + be increased to their corresponding hard limits. + + 13 EACCES Permission denied + An attempt was made to access a file in a way forbidden + by the protection system. + + 14 EFAULT Bad address + The system encountered a hardware fault in attempting + to access the arguments of a system call. + + 15 ENOTBLK Block device required + A file which is not a block device was mentioned where + a block device was required, for example, in mount. + + 16 EBUSY Device busy + An attempt to mount a file system that was already + mounted or an attempt was made to dismount a file sys- + tem on which there is an active file (open file, + current directory, mounted-on file, or active text seg- + ment). + + 17 EEXIST File exists + An existing file was mentioned in an inappropriate con- + text, for example, link. + + 18 EXDEV Cross-device link + A hard link to a file on another file system was + attempted. + + 19 ENODEV No such device + An attempt was made to apply an inappropriate system + call to a device (for example, an attempt to read a + write-only device) or an attempt was made to use a dev- + ice not configured by the system. + + 20 ENOTDIR Not a directory + A non-directory was specified where a directory is + required, for example, in a pathname or as an argument + to chdir. + + 21 EISDIR Is a directory + An attempt was made to write on a directory. + + 22 EINVAL Invalid argument + A system call was made with an invalid argument; for + example, dismounting a non-mounted file system, men- + tioning an unknown signal in sigvec or kill, reading or + writing a file for which lseek has generated a negative + pointer, or some other argument inappropriate for the + call. Also set by math functions, see intro(3). + + 23 ENFILE File table overflow + The system's table of open files is full, and tem- + porarily no more opens can be accepted. + + 24 EMFILE Too many open files + A process tried to have more open files than the system + allows a process to have. The customary configuration + limit is 30 per process. + + 25 ENOTTY Inappropriate ioctl for device + The code used in an ioctl call is not supported by the + object that the file descriptor in the call refers to. + + 26 ETXTBSY Text file busy + An attempt to execute a pure-procedure program which is + currently open for writing. Also an attempt to open + for writing a pure-procedure program that is being exe- + cuted. + + 27 EFBIG File too large + The size of a file exceeded the maximum file size + (1,082,201,088 bytes). + + 28 ENOSPC No space left on device + A write to an ordinary file, the creation of a direc- + tory or symbolic link, or the creation of a directory + entry failed because no more disk blocks are available + on the file system, or the allocation of an inode for a + newly created file failed because no more inodes are + available on the file system. + + 29 ESPIPE Illegal seek + An lseek was issued to a socket or pipe. This error + may also be issued for other non-seekable devices. + + 30 EROFS Read-only file system + An attempt to modify a file or directory was made on a + file system mounted read-only. + + 31 EMLINK Too many links + An attempt to make more than 32767 hard links to a + file. + + 32 EPIPE Broken pipe + An attempt was made to write on a pipe or socket for + which there is no process to read the data. This con- + dition normally generates a signal; the error is + returned if the signal is caught or ignored. + + 33 EDOM Math argument + The argument of a function in the math library (as + described in section 3M) is out of the domain of the + function. + + 34 ERANGE Result too large + The value of a function in the math library (as + described in section 3M) is unrepresentable within + machine precision. + + 35 EWOULDBLOCK Operation would block + An operation which would cause a process to block was + attempted on an object in non-blocking mode (see + ioctl(2)). + + 36 EINPROGRESS Operation now in progress + An operation which takes a long time to complete (such + as a connect(2)) was attempted on a non-blocking object + (see ioctl(2)). + + 37 EALREADY Operation already in progress + An operation was attempted on a non-blocking object + which already had an operation in progress. + + 38 ENOTSOCK Socket operation on non-socket + Self-explanatory. + + 39 EDESTADDRREQ Destination address required + A required address was omitted from an operation on a + socket. + + 40 EMSGSIZE Message too long + A message sent on a socket was larger than the internal + message buffer. + + 41 EPROTOTYPE Protocol wrong type for socket + A protocol was specified which does not support the + semantics of the socket type requested. For example, + you cannot use the ARPA Internet UDP protocol with type + SOCK_STREAM. + + 42 ENOPROTOOPT Option not supported by protocol + A bad option was specified in a getsockopt(2) or set- + sockopt(2) call. + + 43 EPROTONOSUPPORT Protocol not supported + The protocol has not been configured into the system or + no implementation for it exists. + + 44 ESOCKTNOSUPPORT Socket type not supported + The support for the socket type has not been configured + into the system or no implementation for it exists. + + 45 EOPNOTSUPP Operation not supported on socket + For example, trying to accept a connection on a + datagram socket. + + 46 EPFNOSUPPORT Protocol family not supported + The protocol family has not been configured into the + system or no implementation for it exists. + + 47 EAFNOSUPPORT Address family not supported by protocol + family + An address incompatible with the requested protocol was + used. For example, you shouldn't necessarily expect to + be able to use PUP Internet addresses with ARPA Inter- + net protocols. + + 48 EADDRINUSE Address already in use + Only one usage of each address is normally permitted. + + 49 EADDRNOTAVAIL Can't assign requested address + Normally results from an attempt to create a socket + with an address not on this machine. + + 50 ENETDOWN Network is down + A socket operation encountered a dead network. + + 51 ENETUNREACH Network is unreachable + A socket operation was attempted to an unreachable net- + work. + + 52 ENETRESET Network dropped connection on reset + The host you were connected to crashed and rebooted. + + 53 ECONNABORTED Software caused connection abort + A connection abort was caused internal to your host + machine. + + 54 ECONNRESET Connection reset by peer + A connection was forcibly closed by a peer. This nor- + mally results from the peer executing a shutdown(2) + call. + + 55 ENOBUFS No buffer space available + An operation on a socket or pipe was not performed + because the system lacked sufficient buffer space. + + 56 EISCONN Socket is already connected + A connect request was made on an already connected + socket; or, a sendto or sendmsg request on a connected + socket specified a destination other than the connected + party. + + 57 ENOTCONN Socket is not connected + An request to send or receive data was disallowed + because the socket is not connected. + + 58 ESHUTDOWN Can't send after socket shutdown + A request to send data was disallowed because the + socket had already been shut down with a previous + shutdown(2) call. + + 59 unused + + 60 ETIMEDOUT Connection timed out + A connect request failed because the connected party + did not properly respond after a period of time. (The + timeout period is dependent on the communication proto- + col.) + + 61 ECONNREFUSED Connection refused + No connection could be made because the target machine + actively refused it. This usually results from trying + to connect to a service which is inactive on the + foreign host. + + 62 ELOOP Too many levels of symbolic links + A pathname lookup involved more than 8 symbolic links. + + 63 ENAMETOOLONG File name too long + A component of a pathname exceeded 255 characters, or + an entire pathname exceeded 1023 characters. + + 64 EHOSTDOWN Host is down + A socket operation failed because the destination host + was down. + + 65 EHOSTUNREACH Host is unreachable + A socket operation was attempted to an unreachable + host. + + 66 ENOTEMPTY Directory not empty + An attempt was made to remove a directory with entries + other than . and .. by performing a rmdir system call + or a rename system call with that directory specified + as the target directory. + + 67 unused + + 68 unused + + 69 EDQUOT Disc quota exceeded + A write to an ordinary file, the creation of a direc- + tory or symbolic link, or the creation of a directory + entry failed because the user's quota of disk blocks + was exhausted, or the allocation of an inode for a + newly created file failed because the user's quota of + inodes was exhausted. + + 70 ESTALE Stale NFS file handle + A client referenced a an open file, when the file has + been deleted. + + 71 EREMOTE Too many levels of remote in path + An attempt was made to remotely mount a file system + into a path which already has a remotely mounted com- + ponent. + + 72 unused + + 73 unused + + 74 unused + + 75 ENOMSG No message of desired type + An attempt was made to receive a message of a type that + does not exist on the specified message queue; see + msgop(2). + + 76 unused + + 77 EIDRM Identifier removed + This error is returned to processes that resume execu- + tion due to the removal of an identifier from the IPC + system's name space (see msgctl(2), semctl(2), and + shmctl(2)). + +DEFINITIONS + Descriptor + An integer assigned by the system when a file is referenced + by open(2V), dup(2), or pipe(2) or a socket is referenced by + socket(2) or socketpair(2) which uniquely identifies an + access path to that file or socket from a given process or + any of its children. + + Directory + A directory is a special type of file which contains entries + which are references to other files. Directory entries are + called links. By convention, a directory contains at least + two links, . and .., referred to as dot and dot-dot respec- + tively. Dot refers to the directory itself and dot-dot + refers to its parent directory. + + Effective User ID, Effective Group ID, and Access Groups + Access to system resources is governed by three values: the + effective user ID, the effective group ID, and the group + access list. + + The effective user ID and effective group ID are initially + the process's real user ID and real group ID respectively. + Either may be modified through execution of a set-user-ID or + set-group-ID file (possibly by one of its ancestors) (see + execve(2)). + + The group access list is an additional set of group ID's + used only in determining resource accessibility. Access + checks are performed as described below in ``File Access + Permissions''. + + File Access Permissions + Every file in the file system has a set of access permis- + sions. These permissions are used in determining whether a + process may perform a requested operation on the file (such + as opening a file for writing). Access permissions are + established at the time a file is created. They may be + changed at some later time through the chmod(2) call. + + File access is broken down according to whether a file may + be: read, written, or executed. Directory files use the + execute permission to control if the directory may be + searched. + + File access permissions are interpreted by the system as + they apply to three different classes of users: the owner of + the file, those users in the file's group, anyone else. + Every file has an independent set of access permissions for + each of these classes. When an access check is made, the + system decides if permission should be granted by checking + the access information applicable to the caller. + + Read, write, and execute/search permissions on a file are + granted to a process if: + + The process's effective user ID is that of the super- + user. + + The process's effective user ID matches the user ID of + the owner of the file and the owner permissions allow + the access. + + The process's effective user ID does not match the user + ID of the owner of the file, and either the process's + effective group ID matches the group ID of the file, or + the group ID of the file is in the process's group + access list, and the group permissions allow the + access. + + Neither the effective user ID nor effective group ID + and group access list of the process match the + corresponding user ID and group ID of the file, but the + permissions for ``other users'' allow access. + + Otherwise, permission is denied. + + File Name + Names consisting of up to 255 characters may be used to name + an ordinary file, special file, or directory. + + These characters may be selected from the set of all ASCII + character excluding \0 (null) and the ASCII code for / + (slash). (The parity bit, bit 8, must be 0.) + + Note that it is generally unwise to use *, ?, [, or ] as + part of filenames because of the special meaning attached to + these characters by the shell. See sh(1). Although permit- + ted, it is advisable to avoid the use of unprintable charac- + ters in filenames. + + Message Queue Identifier + A message queue identifier (msqid) is a unique positive + integer created by a msgget(2) system call. Each msqid has + a message queue and a data structure associated with it. + The data structure is referred to as msqid_ds and contains + the following members: + + struct ipc_perm msg_perm; /* operation permission struct */ + ushort msg_qnum; /* number of msgs on q */ + ushort msg_qbytes; /* max number of bytes on q */ + ushort msg_lspid; /* pid of last msgsnd operation */ + ushort msg_lrpid; /* pid of last msgrcv operation */ + time_t msg_stime; /* last msgsnd time */ + time_t msg_rtime; /* last msgrcv time */ + time_t msg_ctime; /* last change time */ + /* Times measured in secs since */ + /* 00:00:00 GMT, Jan. 1, 1970 */ + + msg_perm is an ipc_perm structure that specifies the message + operation permission (see below). This structure includes + the following members: + + ushort cuid; /* creator user id */ + ushort cgid; /* creator group id */ + ushort uid; /* user id */ + ushort gid; /* group id */ + ushort mode; /* r/w permission */ + + msg_qnum is the number of messages currently on the queue. + msg_qbytes is the maximum number of bytes allowed on the + queue. msg_lspid is the process id of the last process that + performed a msgsnd operation. msg_lrpid is the process id + of the last process that performed a msgrcv operation. + msg_stime is the time of the last msgsnd operation, + msg_rtime is the time of the last msgrcv operation, and + msg_ctime is the time of the last msgctl(2) operation that + changed a member of the above structure. + + Message Operation Permissions + In the msgop(2) and msgctl(2) system call descriptions, the + permission required for an operation is given as "{token}", + where "token" is the type of permission needed interpreted + as follows: + + 00400 Read by user + 00200 Write by user + 00060 Read, Write by group + 00006 Read, Write by others + + Read and Write permissions on a msqid are granted to a pro- + cess if one or more of the following are true: + + The effective user ID of the process is super-user. + + The effective user ID of the process matches + msg_perm.[c]uid in the data structure associated with + msqid and the appropriate bit of the ``user'' portion + (0600) of msg_perm.mode is set. + + The effective user ID of the process does not match + msg_perm.[c]uid and the effective group ID of the pro- + cess matches msg_perm.[c]gid and the appropriate bit of + the ``group'' portion (060) of msg_perm.mode is set. + + The effective user ID of the process does not match + msg_perm.[c]uid and the effective group ID of the pro- + cess does not match msg_perm.[c]gid and the appropriate + bit of the ``other'' portion (06) of msg_perm.mode is + set. + + Otherwise, the corresponding permissions are denied. + + Parent Process ID + A new process is created by a currently active process (see + fork(2)). The parent process ID of a process is the process + ID of its creator. + + Path Name and Path Prefix + A pathname is a null-terminated character string starting + with an optional slash (/), followed by zero or more direc- + tory names separated by slashes, optionally followed by a + filename. The total length of a pathname must be less than + {MAXPATHLEN} (1024) characters. + + More precisely, a pathname is a null-terminated character + string constructed as follows: + + ::=||/ + ::=|/ + ::=/|/ + + where is a string of 1 to 255 characters other + than the ASCII slash and null, and is a string of + 1 to 255 characters (other than the ASCII slash and null) + that names a directory. + + If a pathname begins with a slash, the search begins at the + root directory. Otherwise, the search begins at the current + working directory. + + A slash, by itself, names the root directory. A dot (.) + names the current working directory. + + A null pathname also refers to the current directory. How- + ever, this is not true of all UNIX systems. (On such sys- + tems, accidental use of a null pathname in routines that + don't check for it may corrupt the current working direc- + tory.) For portable code, specify the current directory + explicitly using ".", rather than "". + + Process Group ID + Each active process is a member of a process group that is + identified by a positive integer called the process group + ID. This is the process ID of the group leader. This + grouping permits the signaling of related processes (see + killpg(2)) and the job control mechanisms of csh(1). + + Process ID + Each active process in the system is uniquely identified by + a positive integer called a process ID. The range of this + ID is from 0 to 30000. + + Real User ID and Real Group ID + Each user on the system is identified by a positive integer + termed the real user ID. + + Each user is also a member of one or more groups. One of + these groups is distinguished from others and used in imple- + menting accounting facilities. The positive integer + corresponding to this distinguished group is termed the real + group ID. + + All processes have a real user ID and real group ID. These + are initialized from the equivalent attributes of the pro- + cess which created it. + + Root Directory and Current Working Directory + Each process has associated with it a concept of a root + directory and a current working directory for the purpose of + resolving path name searches. A process's root directory + need not be the root directory of the root file system. + + Semaphore Identifier + A semaphore identifier (semid) is a unique positive integer + created by a semget(2) system call. Each semid has a set of + semaphores and a data structure associated with it. The + data structure is referred to as semid_ds and contains the + following members: + + struct ipc_perm sem_perm; /* operation permission struct */ + ushort sem_nsems; /* number of sems in set */ + time_t sem_otime; /* last operation time */ + time_t sem_ctime; /* last change time */ + /* Times measured in secs since */ + /* 00:00:00 GMT, Jan. 1, 1970 */ + + sem_perm is an ipc_perm structure that specifies the sema- + phore operation permission (see below). This structure + includes the following members: + + ushort cuid; /* creator user id */ + ushort cgid; /* creator group id */ + ushort uid; /* user id */ + ushort gid; /* group id */ + ushort mode; /* r/a permission */ + + The value of sem_nsems is equal to the number of semaphores + in the set. Each semaphore in the set is referenced by a + positive integer referred to as a sem_num. sem_num values + run sequentially from 0 to the value of sem_nsems minus 1. + sem_otime is the time of the last semop(2) operation, and + sem_ctime is the time of the last semctl(2) operation that + changed a member of the above structure. + + A semaphore is a data structure that contains the following + members: + + ushort semval; /* semaphore value */ + short sempid; /* pid of last operation */ + ushort semncnt; /* # awaiting semval > cval */ + ushort semzcnt; /* # awaiting semval = 0 */ + + semval is a non-negative integer. sempid is equal to the + process ID of the last process that performed a semaphore + operation on this semaphore. semncnt is a count of the + number of processes that are currently suspended awaiting + this semaphore's semval to become greater than its current + value. semzcnt is a count of the number of processes that + are currently suspended awaiting this semaphore's semval to + become zero. + + Semaphore Operation Permissions + In the semop(2) and semctl(2) system call descriptions, the + permission required for an operation is given as "{token}", + where "token" is the type of permission needed interpreted + as follows: + + 00400 Read by user + 00200 Alter by user + 00060 Read, Alter by group + 00006 Read, Alter by others + + Read and Alter permissions on a semid are granted to a pro- + cess if one or more of the following are true: + + The effective user ID of the process is super-user. + + The effective user ID of the process matches + sem_perm.[c]uid in the data structure associated with + semid and the appropriate bit of the ``user'' portion + (0600) of sem_perm.mode is set. + + The effective user ID of the process does not match + sem_perm.[c]uid and the effective group ID of the pro- + cess matches sem_perm.[c]gid and the appropriate bit of + the ``group'' portion (060) of sem_perm.mode is set. + + The effective user ID of the process does not match + sem_perm.[c]uid and the effective group ID of the pro- + cess does not match sem_perm.[c]gid and the appropriate + bit of the ``other'' portion (06) of sem_perm.mode is + set. + + Otherwise, the corresponding permissions are denied. + + Shared Memory Identifier + A shared memory identifier (shmid) is a unique positive + integer created by a shmget(2) system call. Each shmid has + a segment of memory (referred to as a shared memory segment) + and a data structure associated with it. The data structure + is referred to as shmid_ds and contains the following + members: + + struct ipc_perm shm_perm; /* operation permission struct */ + int shm_segsz; /* size of segment */ + ushort shm_cpid; /* creator pid */ + ushort shm_lpid; /* pid of last operation */ + short shm_nattch; /* number of current attaches */ + time_t shm_atime; /* last attach time */ + time_t shm_dtime; /* last detach time */ + time_t shm_ctime; /* last change time */ + /* Times measured in secs since */ + /* 00:00:00 GMT, Jan. 1, 1970 */ + + shm_perm is an ipc_perm structure that specifies the shared + memory operation permission (see below). This structure + includes the following members: + + ushort cuid; /* creator user id */ + ushort cgid; /* creator group id */ + ushort uid; /* user id */ + ushort gid; /* group id */ + ushort mode; /* r/w permission */ + + shm_segsz specifies the size of the shared memory segment. + shm_cpid is the process id of the process that created the + shared memory identifier. shm_lpid is the process id of the + last process that performed a shmop(2) operation. + shm_nattch is the number of processes that currently have + this segment attached. shm_atime is the time of the last + shmat operation, shm_dtime is the time of the last shmdt + operation, and shm_ctime is the time of the last shmctl(2) + operation that changed one of the members of the above + structure. + + Shared Memory Operation Permissions + In the shmop(2) and shmctl(2) system call descriptions, the + permission required for an operation is given as "{token}", + where "token" is the type of permission needed interpreted + as follows: + + 00400 Read by user + 00200 Write by user + 00060 Read, Write by group + 00006 Read, Write by others + + Read and Write permissions on a shmid are granted to a pro- + cess if one or more of the following are true: + + The effective user ID of the process is super-user. + + The effective user ID of the process matches + shm_perm.[c]uid in the data structure associated with + shmid and the appropriate bit of the ``user'' portion + (0600) of shm_perm.mode is set. + + The effective user ID of the process does not match + shm_perm.[c]uid and the effective group ID of the pro- + cess matches shm_perm.[c]gid and the appropriate bit of + the ``group'' portion (060) of shm_perm.mode is set. + + The effective user ID of the process does not match + shm_perm.[c]uid and the effective group ID of the pro- + cess does not match shm_perm.[c]gid and the appropriate + bit of the ``other'' portion (06) of shm_perm.mode is + set. + + Otherwise, the corresponding permissions are denied. + + Sockets and Address Families + A socket is an endpoint for communication between processes. + Each socket has queues for sending and receiving data. + + Sockets are typed according to their communications proper- + ties. These properties include whether messages sent and + received at a socket require the name of the partner, + whether communication is reliable, the format used in naming + message recipients, etc. + + Each instance of the system supports some collection of + socket types; consult socket(2) for more information about + the types available and their properties. + + Each instance of the system supports some number of sets of + communications protocols. Each protocol set supports + addresses of a certain format. An Address Family is the set + of addresses for a specific group of protocols. Each socket + has an address chosen from the address family in which the + socket was created. + + Special Processes + The processes with a process ID's of 0, 1, and 2 are spe- + cial. Process 0 is the scheduler. Process 1 is the ini- + tialization process init, and is the ancestor of every other + process in the system. It is used to control the process + structure. Process 2 is the paging daemon. + + Super-user + A process is recognized as a super-user process and is + granted special privileges if its effective user ID is 0. + + Tty Group ID + Each active process can be a member of a terminal group that + is identified by a positive integer called the tty group ID. + This grouping is used to arbitrate between multiple jobs + contending for the same terminal (see csh(1), and tty(4)). + +SEE ALSO + intro(3), perror(3) + + +LIST OF SYSTEM CALLS + Name Appears on Page Description + + _exit exit(2) terminate a process + accept accept(2) accept a connection on a socket + access access(2) determine accessibility of file + acct acct(2) turn accounting on or off + adjtime adjtime(2) correct the time to allow synchronization of the system clock + async_daemon nfssvc(2) NFS daemons + bind bind(2) bind a name to a socket + brk brk(2) change data segment size + chdir chdir(2) change current working directory + chmod chmod(2) change mode of file + chown chown(2) change owner and group of a file + chroot chroot(2) change root directory + close close(2) delete a descriptor + connect connetc(2) initiate a connection on a socket + creat creat(2) create a new file + dup dup(2) duplicate a descriptor + dup2 dup(2) duplicate a descriptor + execve execve(2) execute a file + fchmod chmod(2) change mode of file + fchown chown(2) change owner and group of a file + fcntl fcntl(2) file control + flock flock(2) apply or remove an advisory lock on an open file + fork fork(2) create a new process + fstat stat(2) get file status + fsync fsync(2) synchronize a file's in-core state with that on disk + ftruncate truncate(2) truncate a file to a specified length + getdirentries getdirentries(2) gets directory entries in a filesystem independent format + getdomainname getdomainname(2) get name of current domain + getdtablesize getdtablesize(2) get descriptor table size + getegid getgid(2) get group identity + geteuid getuid(2) get effective user identity + getgid getgid(2) get group identity + getgroups getgroups(2) get group access list + gethostid gethostid(2) get unique identifier of current host + gethostname gethostname(2) get name of current host + getitimer getitimer(2) get value of interval timer + getpagesize getpagesizename(2) get system page size + getpeername getpeername(2) get name of connected peer + getpgrp setpgrp(2V) set and/or return the process group of a process + getpid getpid(2) get parent process identification + getppid getpid(2) get process identification + getpriority getpriority(2) get program scheduling priority + getrlimit getrlimit(2) control maximum system resource consumption + getrusage getrusage(2) get information about resource utilization + getsockname getsockname(2) get socket name + getsockopt getsockopt(2) get options on sockets + gettimeofday gettimeofday(2) get date and time + getuid getuid(2) get user identity + ioctl ioctl(2) control device + kill kill(2) send signal to a process + killpg killpg(2) send signal to a process group + link link(2) make a hard link to a file + listen listen(2) listen for connections on a socket + lseek lseek(2) move read/write pointer + lstat stat(2) get file status + mkdir mkdir(2) make a directory file + mknod mknod(2) make a special file + mmap mmap(2) map or unmap pages of memory + mount mount(2) mount file system + msgctl msgctl(2) message control operations + msgget msgget(2) get message queue + msgop msgop(2) message operations + msgrcv msgop(2) message operations + msgsnd msgop(2) message operations + munmap munmap(2) map or unmap pages of memory + nfssvc nfssvc(2) NFS daemons + open open(2V) open or create a file for reading or writing + pipe pipe(2) create an interprocess communication channel + profil profil(2) execution time profile + ptrace ptrace(2) process trace + quotactl quotactl(2) manipulate disk quotas + read read(2V) read input + readlink readlink(2) read value of a symbolic link + readv read(2V) read input + reboot reboot(2) reboot system or halt processor + recv recv(2) receive a message from a socket + recvfrom recv(2) receive a message from a socket + recvmsg recv(2) receive a message from a socket + rename rename(2) change the name of a file + rmdir rmdir(2) remove a directory file + sbrk brk(2) change data segment size + select select(2) synchronous I/O multiplexing + semctl semctl(2) semaphore control operations + semget semget(2) get set of semaphores + semop semop(2) semaphore operations + send send(2) send a message from a socket + sendmsg send(2) send a message from a socket + sendto send(2) send a message from a socket + setdomainname getdomainname(2) set name of current domain + setgroups getgroups(2) set group access list + sethostname gethostname(2) set name of current host + setitimer getitimer(2) set value of interval timer + setpgrp setpgrp(2V) set and/or return the process group of a process + setpriority getpriority(2) set program scheduling priority + setregid setregid(2) set real and effective group IDs + setreuid setreuid(2) set real and effective user IDs + setrlimit getrlimit(2) control maximum system resource consumption + setsockopt getsockopt(2) set options on sockets + settimeofday gettimeofday(2) set date and time + shmat shmop(2) shared memory operations + shmctl shmctl(2) shared memory control operations + shmdt shmop(2) shared memory operations + shmget shmget(2) get shared memory segment + shmop shmop(2) shared memory operations + shutdown shutdown(2) shut down part of a full-duplex connection + sigblock sigblock(2) block signals + sigpause sigpause(2) atomically release blocked signals and wait for interrupt + sigsetmask sigsetmask(2) set current signal mask + sigstack sigstack(2) set and/or get signal stack context + sigvec sigvec(2) software signal facilities + socket socket(2) create an endpoint for communication + socketpair socketpair(2) create a pair of connected sockets + stat stat(2) get file status + statfs statfs(2) get file system statistics + swapon swapon(2) add a swap device for interleaved paging/swapping + symlink symlink(2) make symbolic link to a file + sync sync(2) update super-block + syscall syscall(2) indirect system call + tell lseek(2) locate read/write pointer + truncate truncate(2) truncate a file to a specified length + umask umask(2) set file creation mode mask + uname uname(2V) get name of current UNIX system + unlink unlink(2) remove directory entry + unmount umount(2) remove a file system + utimes utimes(2) set file times + vadvise vadvise(2) give advice to paging system + vfork vfork(2) spawn new process in a virtual memory efficient way + vhangup vhangup(2) virtually ``hangup'' the current control terminal + wait wait(2) wait for process to terminate or stop + wait3 wait(2) wait for process to terminate or stop + write write(2V) write output + writev write(2V) write output diff --git a/doc/kcc/calls.open b/doc/kcc/calls.open new file mode 100755 index 000000000..2b68f3e63 --- /dev/null +++ b/doc/kcc/calls.open @@ -0,0 +1,330 @@ + + + +OPEN(2V) SYSTEM CALLS OPEN(2V) + + + +NAME + open - open or create a file for reading or writing + +SYNOPSIS + #include + + int open(path, flags [ , mode ] ) + char *path; + int flags, mode; + +DESCRIPTION + path points to the pathname of a file. open opens the named + file for reading and/or writing, as specified by the flags + argument, and returns a descriptor for that file. The flags + argument may indicate the file is to be created if it does + not already exist (by specifying the O_CREAT flag), in which + case the file is created with mode mode as described in + chmod(2) and modified by the process' umask value (see + umask(2)). If the path is a null string, the kernel maps + this null pathname to ., the current directory. flags + values are constructed by ORing flags from the following + list (only one of the first three flags below may be used): + + O_RDONLY Open for reading only. + + O_WRONLY Open for writing only. + + O_RDWR Open for reading and writing. + + O_NDELAY When opening a FIFO with O_RDONLY or O_WRONLY set: + + If O_NDELAY is set: + + An open for reading-only will return without + delay. An open for writing-only will return + an error if no process currently has the file + open for reading. + + If O_NDELAY is clear: + + An open for reading-only will block until a + process opens the file for writing. An open + for writing-only will block until a process + opens the file for reading. + + When opening a file associated with a communication + line: + + If O_NDELAY is set: + + The open will return without waiting for car- + rier. The first time the process attempts to + + + +Sun Release 3.2 Last change: 16 July 1986 1 + + + + + + +OPEN(2V) SYSTEM CALLS OPEN(2V) + + + + perform I/O on the open file it will block + (not currently implemented). + + If O_NDELAY is clear: + + The open will block until carrier is present. + + O_APPEND If set, the file pointer will be set to the end of + the file prior to each write. + + O_CREAT If the file exists, this flag has no effect. Oth- + erwise, the owner ID of the file is set to the + effective user ID of the process, the group ID of + the file is set to the group ID of the directory in + which the file is created, and the low-order 12 + bits of the file mode are set to the value of mode + modified as follows (see creat(2)): + + All bits set in the file mode creation mask of + the process are cleared. See umask(2). + + The ``save text image after execution'' bit of + the mode is cleared. See chmod(2). + + O_TRUNC If the file exists, its length is truncated to 0 + and the mode and owner are unchanged. + + O_EXCL If O_EXCL and O_CREAT are set, open will fail if + the file exists. This can be used to implement a + simple exclusive access locking mechanism. If + O_EXCL is set and the last component of the path- + name is a symbolic link, the open will fail even if + the symbolic link points to a non-existent name. + + The file pointer used to mark the current position within + the file is set to the beginning of the file. + + The new descriptor is set to remain open across execve sys- + tem calls; see close(2) and fcntl(2). + + There is a system enforced limit on the number of open file + descriptors per process, whose value is returned by the + getdtablesize(2) call. + +SYSTEM V DESCRIPTION + If the O_NDELAY flag is set on an open, that flag is set for + that file descriptor (see fcntl) and may affect subsequent + reads and writes. See read(2V) and write(2V). + +RETURN VALUE + The value -1 is returned if an error occurs, and external + variable errno is set to indicate the cause of the error. + + + +Sun Release 3.2 Last change: 16 July 1986 2 + + + + + + +OPEN(2V) SYSTEM CALLS OPEN(2V) + + + + Otherwise a non-negative numbered file descriptor for the + new open file is returned. + +ERRORS + Open fails if: + + ENOTDIR A component of the path prefix of path is not + a directory. + + EINVAL path contains a character with the high-order + bit set. + + ENAMETOOLONG The length of a component of path exceeds 255 + characters, or the length of path exceeds + 1023 characters. + + ENOENT O_CREAT is not set and the named file does + not exist. + + ENOENT A component of the path prefix of path does + not exist. + + ELOOP Too many symbolic links were encountered in + translating path. + + EACCES Search permission is denied for a component + of the path prefix of path. + + EACCES The required permissions (for reading and/or + writing) are denied for the file named by + path. + + EACCES The file referred to by path does not exist, + O_CREAT is specified, and the directory in + which it is to be created does not permit + writing. + + EISDIR The named file is a directory, and the argu- + ments specify it is to be opened for writing. + + ENXIO O_NDELAY is set, the named file is a FIFO, + O_WRONLY is set, and no process has the file + open for reading. + + EMFILE The system limit for open file descriptors + per process has already been reached. + + ENFILE The system file table is full. + + ENOSPC The file does not exist, O_CREAT is speci- + fied, and the directory in which the entry + for the new file is being placed cannot be + + + +Sun Release 3.2 Last change: 16 July 1986 3 + + + + + + +OPEN(2V) SYSTEM CALLS OPEN(2V) + + + + extended because there is no space left on + the file system containing the directory. + + ENOSPC The file does not exist, O_CREAT is speci- + fied, and there are no free inodes on the + file system on which the file is being + created. + + EDQUOT The file does not exist, O_CREAT is speci- + fied, and the directory in which the entry + for the new file is being placed cannot be + extended because the user's quota of disk + blocks on the file system containing the + directory has been exhausted. + + EDQUOT The file does not exist, O_CREAT is speci- + fied, and the user's quota of inodes on the + file system on which the file is being + created has been exhausted. + + EROFS The named file does not exist, O_CREAT is + specified, and the file system on which it is + to be created is a read-only file system. + + EROFS The named file resides on a read-only file + system, and the file is to be opened for + writing. + + ENXIO The file is a character special or block spe- + cial file, and the associated device does not + exist. + + EINTR A signal was caught during the open system + call. + + ETXTBSY The file is a pure procedure (shared text) + file that is being executed and the open call + requests write access. + + EIO An I/O error occurred while reading from or + writing to the file system. + + EFAULT path points outside the process's allocated + address space. + + EEXIST O_EXCL and O_CREAT were both specified and + the file exists. + + EOPNOTSUPP An attempt was made to open a socket (not + currently implemented). + + + + + +Sun Release 3.2 Last change: 16 July 1986 4 + + + + + + +OPEN(2V) SYSTEM CALLS OPEN(2V) + + + +SEE ALSO + chmod(2), close(2), dup(2), fcntl(2), lseek(2), read(2V), + write(2V), umask(2) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Sun Release 3.2 Last change: 16 July 1986 5 + + + diff --git a/doc/kcc/calls.read b/doc/kcc/calls.read new file mode 100755 index 000000000..96eac981a --- /dev/null +++ b/doc/kcc/calls.read @@ -0,0 +1,198 @@ + + + +READ(2V) SYSTEM CALLS READ(2V) + + + +NAME + read, readv - read input + +SYNOPSIS + cc = read(d, buf, nbytes) + int cc, d; + char *buf; + int nbytes; + + #include + #include + + cc = readv(d, iov, iovcnt) + int cc, d; + struct iovec *iov; + int iovcnt; + +DESCRIPTION + read attempts to read nbytes of data from the object refer- + enced by the descriptor d into the buffer pointed to by buf. + readv performs the same action, but scatters the input data + into the iovcnt buffers specified by the members of the iov + array: iov[0], iov[1], ..., iov[iovcnt-1]. + + For readv, the iovec structure is defined as + + struct iovec { + caddr_t iov_base; + int iov_len; + }; + + Each iovec entry specifies the base address and length of an + area in memory where data should be placed. readv will + always fill an area completely before proceeding to the + next. + + On objects capable of seeking, the read starts at a position + given by the pointer associated with d (see lseek(2)). Upon + return from read, the pointer is incremented by the number + of bytes actually read. + + Objects that are not capable of seeking always read from the + current position. The value of the pointer associated with + such an object is undefined. + + Upon successful completion, read and readv return the number + of bytes actually read and placed in the buffer. The system + guarantees to read the number of bytes requested if the + descriptor references a normal file which has that many + bytes left before the end-of-file, but in no other case. + + + + + +Sun Release 3.4 Last change: 25 July 1986 1 + + + + + + +READ(2V) SYSTEM CALLS READ(2V) + + + + If the returned value is 0, then end-of-file has been + reached. + + When attempting to read from a descriptor associated with an + empty pipe, socket, or FIFO: + + If O_NDELAY is set, the read will return a -1 and errno + will be set to EWOULDBLOCK. + + If O_NDELAY is clear, the read will block until data is + written to the pipe or the file is no longer open for + writing. + + When attempting to read from a descriptor associated with a + tty that has no data currently available: + + If O_NDELAY is set, the read will return a -1 and errno + will be set to EWOULDBLOCK. + + If O_NDELAY is clear, the read will block until data + becomes available. + + If O_NDELAY is set, and less data are available than are + requested by the read or readv, only the data that are + available are returned, and the count indicates how many + bytes of data were actually read. + +SYSTEM V DESCRIPTION + When an attempt is made to read a descriptor which is in + no-delay mode, and there is no data currently available, + read will return a 0 instead of returning a -1 and setting + errno to EWOULDBLOCK. Note that this is indistinguishable + from end-of-file. + +RETURN VALUE + If successful, the number of bytes actually read is + returned. Otherwise, a -1 is returned and the global vari- + able errno is set to indicate the error. + +ERRORS + read and readv will fail if one or more of the following are + true: + + EBADF d is not a valid file descriptor open for + reading. + + EISDIR d refers to a directory which is on a file + system mounted using the NFS. + + EFAULT buf points outside the allocated address + space. + + + + +Sun Release 3.4 Last change: 25 July 1986 2 + + + + + + +READ(2V) SYSTEM CALLS READ(2V) + + + + EIO An I/O error occurred while reading from or + writing to the file system. + + EINTR A read from a slow device was interrupted + before any data arrived by the delivery of a + signal. + + EINVAL The pointer associated with d was negative. + + EWOULDBLOCK The file was marked for non-blocking I/O, and + no data were ready to be read. In addition, + readv may return one of the following errors: + + EINVAL Iovcnt was less than or equal to 0, or + greater than 16. + + EINVAL One of the iov_len values in the iov array + was negative. + + EINVAL The sum of the iov_len values in the iov + array overflowed a 32-bit integer. + + EFAULT Part of iov points outside the process's + allocated address space. + +SEE ALSO + dup(2), fcntl(2), open(2), pipe(2), select(2), socket(2), + socketpair(2) + + + + + + + + + + + + + + + + + + + + + + + + + + + +Sun Release 3.4 Last change: 25 July 1986 3 + + + diff --git a/doc/kcc/calls.stat b/doc/kcc/calls.stat new file mode 100755 index 000000000..f2a98f9b9 --- /dev/null +++ b/doc/kcc/calls.stat @@ -0,0 +1,198 @@ + + + +STAT(2) SYSTEM CALLS STAT(2) + + + +NAME + stat, lstat, fstat - get file status + +SYNOPSIS + #include + #include + + stat(path, buf) + char *path; + struct stat *buf; + + lstat(path, buf) + char *path; + struct stat *buf; + + fstat(fd, buf) + int fd; + struct stat *buf; + +DESCRIPTION + _s_t_a_t obtains information about the file named by _p_a_t_h. + Read, write or execute permission of the named file is not + required, but all directories listed in the path name lead- + ing to the file must be searchable. + + _l_s_t_a_t is like _s_t_a_t except in the case where the named file + is a symbolic link, in which case _l_s_t_a_t returns information + about the link, while _s_t_a_t returns information about the + file the link references. + + _f_s_t_a_t obtains the same information about an open file refer- + enced by the argument descriptor, such as would be obtained + by an _o_p_e_n call. + + _b_u_f is a pointer to a _s_t_a_t structure into which information + is placed concerning the file. The contents of the struc- + ture pointed to by _b_u_f include the following members: + + dev_t st_dev; /* device inode resides on */ + ino_t st_ino; /* this inode's number */ + u_short st_mode;/* protection */ + short st_nlink;/* number of hard links to the file */ + short st_uid; /* user ID of owner */ + short st_gid; /* group ID of owner */ + dev_t st_rdev;/* the device type, for inode that is device */ + off_t st_size;/* total size of file, in bytes */ + time_t st_atime;/* file last access time */ + time_t st_mtime;/* file last modify time */ + time_t st_ctime;/* file last status change time */ + long st_blksize;/* optimal blocksize for file system i/o ops */ + long st_blocks;/* actual number of blocks allocated */ + + + + +Sun Release 3.2 Last change: 16 July 1986 1 + + + + + + +STAT(2) SYSTEM CALLS STAT(2) + + + + st_atime Time when file data was last read or modified. + Changed by the following system calls: _m_k_n_o_d(2), + _u_t_i_m_e_s(2), _r_e_a_d(2V), _w_r_i_t_e(2V), and _t_r_u_n_c_a_t_e(2). + For reasons of efficiency, st_atime is not set + when a directory is searched, although this + would be more logical. + + st_mtime Time when data was last modified. It is not set + by changes of owner, group, link count, or mode. + Changed by the following system calls: _m_k_n_o_d(2), + _u_t_i_m_e_s(2), _w_r_i_t_e(2V). + + st_ctime Time when file status was last changed. It is + set both both by writing and changing the i- + node. Changed by the following system calls: + _c_h_m_o_d(2) _c_h_o_w_n(2), _l_i_n_k(2), _m_k_n_o_d(2), _r_e_n_a_m_e(2), + _u_n_l_i_n_k(2), _u_t_i_m_e_s(2), _w_r_i_t_e(2V), _t_r_u_n_c_a_t_e(2). + + The status information word _s_t__m_o_d_e has bits: + #define S_IFMT 0170000/* type of file */ + #define S_IFIFO 0010000/* fifo special */ + #define S_IFCHR 0020000/* character special */ + #define S_IFDIR 0040000/* directory */ + #define S_IFBLK 0060000/* block special */ + #define S_IFREG 0100000/* regular file */ + #define S_IFLNK 0120000/* symbolic link */ + #define S_IFSOCK 0140000/* socket */ + #define S_ISUID 0004000/* set user id on execution */ + #define S_ISGID 0002000/* set group id on execution */ + #define S_ISVTX 0001000/* save swapped text even after use */ + #define S_IREAD 0000400/* read permission, owner */ + #define S_IWRITE 0000200/* write permission, owner */ + #define S_IEXEC 0000100/* execute/search permission, owner */ + + The mode bits 0000070 and 0000007 encode group and others + permissions (see _c_h_m_o_d(2)). + +RETURN VALUE + Upon successful completion a value of 0 is returned. Other- + wise, a value of -1 is returned and _e_r_r_n_o is set to indicate + the error. + +ERRORS + _s_t_a_t and _l_s_t_a_t will fail if one or more of the following are + true: + + ENOTDIR A component of the path prefix of _p_a_t_h is not + a directory. + + EINVAL _p_a_t_h contains a character with the high-order + bit set. + + + + +Sun Release 3.2 Last change: 16 July 1986 2 + + + + + + +STAT(2) SYSTEM CALLS STAT(2) + + + + ENAMETOOLONG The length of a component of _p_a_t_h exceeds 255 + characters, or the length of _p_a_t_h exceeds + 1023 characters. + + ENOENT The file referred to by _p_a_t_h does not exist. + + EACCES Search permission is denied for a component + of the path prefix of _p_a_t_h. + + ELOOP Too many symbolic links were encountered in + translating _p_a_t_h. + + EFAULT _b_u_f or _p_a_t_h points to an invalid address. + + EIO An I/O error occurred while reading from or + writing to the file system. + + _f_s_t_a_t will fail if one or both of the following are true: + + EBADF _f_d is not a valid open file descriptor. + + EFAULT _b_u_f points to an invalid address. + + EIO An I/O error occurred while reading from or + writing to the file system. + +CAVEAT + The fields in the stat structure currently marked _s_t__s_p_a_r_e_1, + _s_t__s_p_a_r_e_2, and _s_t__s_p_a_r_e_3 are present in preparation for + inode time stamps expanding to 64 bits. This, however, can + break certain programs which depend on the time stamps being + contiguous (in calls to _u_t_i_m_e_s(2)). + +SEE ALSO + chmod(2), chown(2), readlink(2), utimes(2) + + + + + + + + + + + + + + + + + + + + +Sun Release 3.2 Last change: 16 July 1986 3 + + + diff --git a/doc/kcc/calls.write b/doc/kcc/calls.write new file mode 100755 index 000000000..e9823f306 --- /dev/null +++ b/doc/kcc/calls.write @@ -0,0 +1,198 @@ + + + +WRITE(2V) SYSTEM CALLS WRITE(2V) + + + +NAME + write, writev - write output + +SYNOPSIS + cc = write(d, buf, nbytes) + int cc, d; + char *buf; + int nbytes; + + #include + #include + + cc = writev(d, iov, iovcnt) + int cc, d; + struct iovec *iov; + int iovcnt; + +DESCRIPTION + _w_r_i_t_e attempts to write _n_b_y_t_e_s of data to the object refer- + enced by the descriptor _d from the buffer pointed to by _b_u_f. + _w_r_i_t_e_v performs the same action, but gathers the output data + from the _i_o_v_c_n_t buffers specified by the members of the _i_o_v + array: iov[0], iov[1], ..., iov[iovcnt-1]. + + For _w_r_i_t_e_v, the _i_o_v_e_c structure is defined as + + struct iovec { + caddr_t iov_base; + int iov_len; + }; + + Each _i_o_v_e_c entry specifies the base address and length of an + area in memory from which data should be written. _w_r_i_t_e_v + will always write a complete area before proceeding to the + next. + + On objects capable of seeking, the _w_r_i_t_e starts at a posi- + tion given by the pointer associated with _d, see _l_s_e_e_k(2). + Upon return from _w_r_i_t_e, the pointer is incremented by the + number of bytes actually written. + + Objects that are not capable of seeking always write from + the current position. The value of the pointer associated + with such an object is undefined. + + If the O_APPEND flag of the file status flags is set, the + file pointer will be set to the end of the file prior to + each write. + + If the real user is not the super-user, then _w_r_i_t_e clears + the set-user-id bit on a file. This prevents penetration of + system security by a user who "captures" a writable set- + + + +Sun Release 3.2 Last change: 16 July 1986 1 + + + + + + +WRITE(2V) SYSTEM CALLS WRITE(2V) + + + + user-id file owned by the super-user. + + When using non-blocking I/O on objects that are subject to + flow control, such as sockets, pipes (or FIFOs), or termi- + nals, _w_r_i_t_e and _w_r_i_t_e_v may write fewer bytes than requested; + the return value must be noted, and the remainder of the + operation should be retried when possible. If such an + object's buffers are full, so that it cannot accept any + data, then _w_r_i_t_e and _w_r_i_t_e_v will return -1 and set _e_r_r_n_o to + EWOULDBLOCK. Otherwise, they will block until space becomes + available. + +SYSTEM V DESCRIPTION + A _w_r_i_t_e (but not a _w_r_i_t_e_v) on an object that cannot accept + any data will return a count of 0, rather than returning-1 + and setting _e_r_r_n_o to EWOULDBLOCK. + +RETURN VALUE + Upon successful completion the number of bytes actually wri- + ten is returned. Otherwise a -1 is returned and the global + variable _e_r_r_n_o is set to indicate the error. + +ERRORS + _w_r_i_t_e and _w_r_i_t_e_v will fail and the file pointer will remain + unchanged if one or more of the following are true: + + EBADF _d is not a valid descriptor open for writing. + + EPIPE An attempt is made to write to a pipe that is + not open for reading by any process (or to a + socket of type SOCK_STREAM that is connected + to a peer socket.) Note: an attempted write + of this kind will also cause you to recieve a + SIGPIPE signal from the kernel. If you've + not made a special provision to catch or + ignore this signal, your process will die. + + EFBIG An attempt was made to write a file that + exceeds the process's file size limit or the + maximum file size. + + EFAULT Part of _i_o_v or data to be written to the file + points outside the process's allocated + address space. + +a signal whose.SM SV_INTERRUPT + The call is forced to terminate prematurely due to the arrival of + bit in sv_flags is set (see _s_i_g_v_e_c(2)). + _s_i_g_n_a_l(3V), in the System V compatibility + library, sets this bit for any signal it + catches. + + + + +Sun Release 3.2 Last change: 16 July 1986 2 + + + + + + +WRITE(2V) SYSTEM CALLS WRITE(2V) + + + + EINVAL The pointer associated with _d was negative. + + ENOSPC There is no free space remaining on the file + system containing the file. + + EDQUOT The user's quota of disk blocks on the file + system containing the file has been + exhausted. + + EIO An I/O error occurred while reading from or + writing to the file system. + + EWOULDBLOCK The file was marked for non-blocking I/O, and + no data could be written immediately. + + In addition, _w_r_i_t_e_v may return one of the following errors: + + EINVAL _I_o_v_c_n_t was less than or equal to 0, or + greater than 16. + + EINVAL One of the _i_o_v__l_e_n values in the _i_o_v array + was negative. + + EINVAL The sum of the _i_o_v__l_e_n values in the _i_o_v + array overflowed a 32-bit integer. + +SEE ALSO + fcntl(2), lseek(2), open(2V), pipe(2), select(2) + + + + + + + + + + + + + + + + + + + + + + + + + + + +Sun Release 3.2 Last change: 16 July 1986 3 + + + diff --git a/doc/kcc/cc.doc b/doc/kcc/cc.doc new file mode 100755 index 000000000..d6e6cc085 --- /dev/null +++ b/doc/kcc/cc.doc @@ -0,0 +1,1526 @@ + KCC USER DOCUMENTATION +<1 About KCC> + + KCC is a compiler for the C language on the PDP-10. It was +originally begun by Kok Chen of Stanford University around 1981 (hence +the name "KCC"), and has had many improvements made to it since then +by a number of people at Stanford, Columbia, and SRI. It implements C +as described by the following references: + H&S: Harbison and Steele, "C: A Reference Manual", + HS1: (1st edition) Prentice-Hall, 1984, ISBN 0-13-110008-4 + HS2: (2nd edition) Prentice-Hall, 1987, ISBN 0-13-109802-0 + K&R: Kernighan and Ritchie, "The C Programming Language", + Prentice-Hall, 1978, ISBN 0-13-110163-3 + + Currently KCC is only supported for TOPS-20, although there is +no reason it cannot be used for other PDP-10 systems or processors, if +the need arises. The remaining discussion assumes you are on a +TOPS-20 system. + +<1 Using KCC> + + C source files should have the extension ".C", such as PROG.C +and SUBS.C. To build a C program, whether from one or more source +files ("modules"), there are three things that must be done. First, +all modules have to be compiled with KCC to produce .REL files (e.g. +PROG.REL and SUBS.REL); second, the LINK loader must be invoked to +load all of the necessary modules into an executable core image; and +third, this image must be saved on disk as an .EXE file. + + Every complete C program must contain one and only one module +that defines the function "main". This function is where control begins +when the program is executed, and unless otherwise specified the .EXE +file will be named after the module that "main" appears in. + + You can make a C program either by using the EXEC commands +COMPILE, LOAD, and SAVE, or by invoking KCC directly. For example, +suppose "main" is defined in PROG.C, and the file SUBS.C contains +auxiliary subroutines. Then, + +To make: EXEC command Direct KCC invocation +------- ------------ --------------------- +PROG.EXE from .C files: @LOAD PROG,SUBS @CC -q PROG SUBS + @SAVE PROG + +Just the .REL files: @COMPILE PROG,SUBS @CC -q -c PROG SUBS + +PROG.EXE from .RELs: Same as 1st @CC PROG.REL SUBS.REL + + One advantage of using the EXEC commands is that they will +only compile those files which appear to require it, i.e. modules for +which the .C file is more recent than the .REL file. The EXEC can also +translate TOPS-20 directory names into a format that the DEC loader will +understand, so that commands like @COMPILE PROG are possible. + However, KCC will do a similar form of conditional compilation +if the -q switch is set, for those modules specified without a .C +extension. (This may become the default someday.) More commonly, the +EXEC at your site may not have been modified to know about KCC, or you +may wish to specify certain options to the compilation, or you may +just come from a UNIX background and feel more used to the direct +invocation method. + +<1 Direct Invocation - Compiler switches> + + The KCC compiler switches are intended to resemble those of the +UN*X "cc" command as closely as possible. If you are familiar with these, +you can probably use KCC instinctively. The command line is broken up into +argument strings each separated by a space (NOT by a comma). If an argument +string starts with a "-", it is a switch, otherwise it is a filename. +Case is significant in switches! + Normally, if the filename as given exists, it is used +regardless of its form. The exception is files with a ".REL" +extension, which are never compiled but are passed on to the linking +loader. If a filename does not exist and appears to have no +extension, ".C" is added. This feature is primarily useful with the +-q switch as it requests conditional compilation. Case is not +significant in filenames. + + If none of -c, -E, or -S are given as switches, KCC will invoke +LINK after compilation and an executable file (*.EXE) will be produced. + + The ordering of switches and filenames, in general, does not +matter; all switches are processed before compiling starts. However, +note that filenames and libraries will be compiled and/or loaded in +the order given, and -I paths will also be scanned in the order given. + + It is possible to specify KCC switches while giving a +COMPILE-class command to the EXEC, if your EXEC recognizes the switch +/LANGUAGE-SWITCHES. The argument to this EXEC switch should be a +double-quoted string which starts with a space. For example: + @compile foo /laNGUAGE-SWITCHES:" -m -d=sym" + +------------------------------------------------------------------------ +The following are the available compiler switches, in alphabetical order. +They are the same as those used by UN*X "cc", except where marked with +a "*" -- these are mainly of interest to KCC implementors. + +* -A Specify a file name for the assembler header file (included + at the start of all assembler output). + -c Compile and assemble, but don't link (produce *.REL). + -C Retain comments in preprocessor (only useful with -E). +* -d Debugging output. Same as -d=all. Generates many debug files. +* -d= Debugging fine-tuning. + are flag names of particular kinds of debug output files. + The names can be abbreviated. Prefixing the name with a + '+' turns it on; '-' turns it off. All flags are initially + assumed off. Current flags are: + parse Parse tree output (*.DEB) + pho Peep-Hole Optimizer output (*.PHO) - HUGE!!! + sym Symbol table output (*.CYM) + all All of the above + E.g. "-d=parse+sym" == "-d=all-pho" + -D Define following ident to "1" or string after '='. + E.g. "-DMAXSIZE=25". Several of these may be specified. + -E Run source only through preprocessor, to standard output. +* -H Specify a non-standard location for <>-enclosed #include files. + -i Loader: load code for multi-section (extended addressing) operation. + -I Supply a search path for doublequoted #include files. + Several of these may be specified, and will be searched in + that order. +* -L Loader: Specify a non-standard location for library files. +* -L= Loader: Specify an arbitrary string argument to the loader. + Note that the syntax does not permit spaces to be included. + Several of these may be given. + -lnam Loader: Specify library filename for loader. The "nam" + argument is used to construct the filename LIBnam.REL in the + library directory path and this is searched when encountered + in the specifications. +* -m Use MACRO rather than FAIL. Semi-obsolete, same as -x=macro. + -O Optimize (no-op, defaults on). Same as -O=all. +* -O= Optimization fine-tuning. Mainly for debugging. + are flag names of particular kinds of optimizations. + The names can be abbreviated. Prefixing the name with a '+' turns + it on; '-' turns it off. All flags are initially assumed off, + so to ask for no optimization use -O= (same as -O=-all). + Current flags are: + parse Parse tree optimization + gen Code generator optimizations + object Object code (peephole) optimizations + all All of the above + E.g. "-O=parse+gen" == "-O=all-object" + -o= Specify output filename for the executable image. + For UN*X-compatibility kicks, "-o " also works. +* -P= Portability level specifications. Several switches may be given in + a format similar to that for -d and -O. The flags + specify the C implementation level that the compiler should use: + base Base level C -- most portable and restricted + carm H&S CARM level -- full implementation + ansi ANSI C draft level (only partly effective) + Only one of the previous 3 is allowed, plus an optional: + kcc Permit KCC-specific extensions to the selected level. + The default is "ansi+kcc" if -P is not given. -P alone is + interpreted as "base". +* -q Conditional compilation. All file specs without an extension will + only be compiled if the .C file is more recent than the .REL file. + For example, "cc -q foo bar.c arf.rel" + compiles FOO.C if it is more recent than FOO.REL, + always compiles BAR.C, and never compiles ARF. + -S Don't assemble (produce *.FAI or *.MAC, plus *.PRE) + -U Undefine following identifier. All -U switches are processed + before any -D switches. Only __FILE__ and __LINE__ are predefined. +* -v Verbose - same as "-v=all". +* -v= Verbosity switches, similar to -d and -O. + fundef - print function names as they are defined (not yet). + stats - show statistics for run + load - show command string given to loader (if any) + -w Don't type out warnings. +* -x= Cross-compile switches. Several switches may be given in + a format similar to that for -d and -O. The flags + specify an aspect of the "target machine" that the + code should be compiled for (case is significant!): + Target System: tops20, tops10, waits, tenex, its + Target CPU: ka, ki, ks, kl0, klx + Target Assembler: fail, macro, midas + Target char size: ch7 (to compile with 7-bit chars) + e.g. "-x=ka+tenex". See "Cross-compiling". +------------------------------------------------------------------------ + +NOTE: syntax + The -I, -H, and -L switches all take a "path" as argument. +This is interpreted as specifying both a prefix and a postfix string +which are used to sandwich a partial filename from some other source +(#include "xxx", #include , and -lxxx respectively). The two +strings are separated by the character '+' (this is site dependent +however). Thus, for example: + Specification Prefix Postfix Sample with "xxx" + -I+[SYS,NEW] "" "[SYS,NEW]" xxx[SYS,NEW] + -HNEWC: "NEWC:" "" NEWC:xxx + -LPS:LIB+.REL "PS:LIB" ".REL" PS:LIBxxx.REL + +NOTE: Obsolete features + + The following switches and interpretations are obsolete. They will +likely be flushed altogether, but are documented here for historical reasons: + + * -n same as -O= (no optimization) + * -s same as -d=sym (output *.CYM symbol table dump) + + It used to be a feature that "simple" switches, which did not +take any arguments, could be lumped together into a single switch +string. For example, "cc -mS test" is the same as the more standard +"cc -m -S test". However, use of this feature is discouraged; the +potential confusion and inconsistency don't seem to be worth it. + +NOTE: Switch Portability + + The following lists the switches implemented by other systems +but not by KCC. This information seems useful and this is a convenient +place to put it. Other-system switches that KCC implements are not included. +Switches which mean one thing to KCC but another thing to other systems +are included. Currently only 4.2BSD switches are listed. + -g Output additional symtab info for dbx(1), pass -lg to ld(1) + -go Ditto for sdb(1). + -p Output profiling code for prof(1). + -pg Ditto but for gprof(1). + -R Passed on to as(1) to make initialized vars shared and read-only. + -Bpath Use substitute compiler pass programs specified by . + -t[p012] Use only the pass programs from -B designated by -t. +ld(1) switches: + A, D, d, e, l, M, N, n, o, r, S, s, T, t, u, X, x, y, z + +<1 User Program - Command line interpretation> + + The C runtime startup interprets the command line to a C program +in a consistent fashion, and supports (1) argument string passing, +(2) I/O redirection, (3) pipes, and (4) background processing. There +is also provision for (5) suppressing this default command line +interpretation. + +(1) Command line arguments: + Command line arguments can be passed to the main() function +from the EXEC or monitor in the UN*X fashion. That is, main() is +given two arguments, the first of which is an argument count and +the second a pointer to an array of char pointers, each of which +constitutes an argument. Thus it is conventional to declare the +parameters to main() in this way: + main(argc, argv) + int argc; + char **argv; +For example, if you have a C program saved as PROG.EXE, then invoking +PROG with the command: + @PROG one two +will set argc to 3, and the strings that argv points to will +be "PROG", "one", and "two". Note that arguments are separated by +blanks and not by commas! + +(2) I/O redirection: + I/O redirection of stdin and stdout is also supported. +Thus: + 1. @PROG bar ; will send all stdout output to a new file "bar". + 3. @PROG >>log ; will append all stdout output to the old file "log". +These can be combined: + @PROG bar ; does both 1 and 2. (from "foo", to "bar") +However, + @PROG bar ; interprets "bar" as a single argument string, + ; because it looks like a filename. + +(3) Pipes: + On TOPS-20 systems which implement the PIP: device (developed at +Stanford), pipes can also be supported, so that a command such as: + @PROG | BAZ +causes the stdout of PROG to be redirected to the stdin of BAZ. + +(4) Background processing: + Again, provided the EXEC has been suitably modified, a +command line ending in an ampersand ('&') will cause the program +to be run in the background, while the user goes on to do other +things: + @PROG one two& + +(5) Suppressing the command line interpretation: + In certain unusual circumstances it may be necessary to suppress +the default command line interpretation, so that the user program itself +can handle it in a different way. For information on how to do this, +see the include file . + +<1 C as implemented by KCC> + + KCC is intended to conform to the description of C as +specified by Harbison & Steele's "C: A Reference Manual". It is +strongly recommended that all C programmers use this book in preference +to Kernighan & Ritchie. As the ANSI C standard becomes more concrete, +KCC will likewise evolve to conform to this standard; some of the +proposed ANSI features are already implemented. + + The -P (portability) switch controls the exact level at which +KCC attempts to compile a C program. There are three possible levels, +and only one of these may be in effect: + ANSI - permits all currently implemented ANSI constructs to be + recognized and compiled. This is basically CARM level + plus some new things; KCC does not yet fully + implement the ANSI draft standard, as it keeps changing. + Users should be cautious about using ANSI features. + CARM - Disables all ANSI-added features which are not in Harbison + and Steele's CARM book. KCC fully implements this level. + BASE - The most restrictive level. This is basically the same as + CARM, but will make KCC complain about some constructs + or usages that are likely to be unimplemented by some + other compilers. + In addition, there is a "KCC extensions" flag which is independent +of the level; when enabled, this permits a number of KCC-specific extensions +to be recognized regardless of whatever level is in effect. + Normally KCC uses the ANSI level with KCC extensions enabled; +this corresponds to "-P=ansi+kcc". + + The next several pages document KCC's implementation of C by +following the general ordering of H&S and pointing out aspects where +KCC differs or describing which of several optional behaviors KCC +implements. Any ANSI features which are implemented are also described. + +<2 KCC Lexical Elements> [H&S 2, "Lexical Elements"] + + KCC uses the US ASCII character set. There is provision for +using a separate target character set, different from the source set, +but currently the only such is a target set for WAITS ASCII. + + KCC has no maximum line length. Error messages will quote +only the most recent part of an offending line if it is longer than 80 +characters. + + KCC is standard in that nested comments are not supported. If +the sequence "/*" is seen within a comment, a warning message will be +printed just in case the user neglected to terminate the previous +comment. + +<2 Identifier names> + + KCC adheres to the standard definition of C identifier syntax, +allowing the character "_", the letters A-Z and a-z, and the digits +0-9 as valid identifier characters. Identifiers may have any length, +but only the first 31 characters (case sensitive) are unique during +compilation, which conforms to the ANSI minimum. This applies to all +of the following name spaces (as per H&S 4.2.4): + Macro names + Statement labels + Structure, union, and enum tags + Component (member) names + Ordinary names: + Enum constants and typedef names. + Variables (see discussion of storage classes). + + However, the situation is different for symbols which must be +exported to the PDP-10 linker. Such names are truncated to 6 +characters and case is no longer significant. The character '_' +(underscore) is transformed into '.' (period); the PDP-10 software +allows the additional symbol characters '$' and '%', but there is no +way to generate these with C unless special provision is made; see +#asm and '`' under "KCC Extensions". See also the discussion of +exported symbols. + +<2 Reserved Words> + KCC has a number of additional reserved words depending on +the portability level setting. When KCC extensions are allowed, as +is normally the case, the following keywords exist: + "asm" - used for assembly code inclusion. + "entry" - only in certain special circumstances. + See the discussion of libraries and entry points. + When ANSI level is in effect (again, the normal case), there +are three additional reserved words. All can be considered type +modifiers: + "signed" Indicates integer type is signed. Implemented. + "const" Constant object (recognized but unimplemented) + "volatile" Volatile object (recognized but unimplemented) + +<2 Constants> + + The types "int" and "long" are the same -- one PDP-10 word of +36 bits, with the high bit a sign bit. Thus, the largest positive integer +constant is 0377777777777, or 34,359,738,368. + The type "double" is represented by a PDP-10 hardware format +standard range double precision number (two words). On KA processors +the format is slightly different. The decimal range is from 1.5e-39 +to 1.7e38, with eighteen digits of precision. + Character constants have type "int". Multicharacter constants +are non-standard and not supported. Because characters are 9-bit bytes, +numeric escape code values can range from '\0' to '\777'. Hexadecimal +character constants are not permitted. + String constants are stored as 9-bit byte strings, and do not +share storage. That is, two instances of the constant string "foo" +will be stored in two distinct places. On TOPS-20, string constants +are put in the "pure" segment of a program, but this does not actually +enforce any read-only restrictions. + If the portability level is ANSI then adjacent string constants +are concatenated into a single string. Thus, "foo" "bar" is the same +as "foobar". + +<2 Preprocessor directives> [H&S 3, "The C Preprocessor"] + +All standard C preprocessor directives are supported as described in +Harbison and Steele, including #elif and the "defined" operator. This +page specifies how KCC behaves for situations which are implementation +dependent. + +Lexical Conventions: [H&S 3.2] + Preprocessor commands must have '#' as the first character on +the line; whitespace cannot precede it. KCC allows whitespace between +the '#' and the command name (this is non-portable). Formal parameter +names ARE recognized within character and string constants in macro +body definitions. Comments are treated as whitespace and not passed +on to anything else; however, KCC will print a "Nested comment" +warning if it encounters a comment which contains "/*". This serves both +to catch slightly non-portable usage (see H&S 2.2) and to detect +places where the user may have accidentally omitted a "*/". + +Defining Macros: [H&S 3.3] + When defining a macro, formal parameter names are recognized +within string and character constants, and therefore no check is made +for lexical correctness of such constants; this will change when the +ANSI standard firms up. Any comments and whitespace in the macro body +are replaced by a single space. KCC permits an argument token list +(arguments to a macro call) to extend over multiple lines. Arguments +to a call are converted in a fashion similar to that for macro bodies +-- comments and whitespace are replaced by a single space. Newlines +within an argument list are also considered whitespace. However, +string and character constants in arguments are treated as tokens, and +their contents are not scanned for macro names. + +Predefined Macros: [H&S 3.3.4] + __LINE__ expands into the current decimal line number. (BSD, ANSI) + __FILE__ expands into the current source filename. (BSD, ANSI) + __DATE__ expands into the date of compilation. + __TIME__ expands into the time of compilation. + The date/time of compilation is cleared at the start of + compilation for each source file, and is set by the first + occurrence of __DATE__ or __TIME__ within that source file. + __STDC__ expands into the ANSI standard level # (not implemented yet). + +The first two macros are furnished for compatibility with 4.2BSD; the +next two were added from ANSI. __STDC__ will only be added when -P=ansi +is a full implementation. There are no other predefined macros; use the +file for standard KCC environment definitions. + +Undefining and Redefining Macros: [H&S 3.3.5] + It is not an error to redefine an already defined macro, but a +warning message will be output unless the new macro definition is the +same as the old definition; i.e. redundant definitions are allowed. +There is no macro definition stack, i.e. definitions are not +pushed/popped by #define/#undef. Attempting to define a macro named +"defined" will cause an error, since otherwise it would conflict with +the "defined" operator. + +Converting Tokens to Strings: [HS2 3.3.8] + KCC does recognize formal parameter names within string and +character constants. This will change as the ANSI standard shapes up. + +File Inclusion: [H&S 3.4] + Included files may be nested to 10 levels. Macro expansion +is done on the line if the filename does not start with '<' or '"'. +Filenames may contain '>' or '"' characters. + #include looks only in the standard directory. + #include "filename" looks first in DSK:, + then in the -I paths in order of specification (left to right), + then in the standard directory. +The standard directory for include files is C: on TOPS-20, on +TENEX, and [SYS,KCC] on WAITS, but this is site dependent in any case. + +Conditional Compilation: [H&S 3.5] #if,#else,#endif,#elif,#ifdef,#ifndef + The "defined" operator is recognized only within #if and #elif +expressions. Note that neither #elif nor "defined" are in K&R, and +H&S is used as the reference here; neither will be recognized unless +the portability level is at least "carm". Within the body of a failing +conditional, only other conditional commands are recognized; all others, +even illegal commands, are ignored. + +Explicit Line Numbering: [H&S 3.6] #line + The information from #line will be used in KCC error messages. +Macro expansion is performed on the line. Like all other +preprocessor commands, #line is eliminated and not passed on when +using the -E switch. With regard to "#" alone at the start of a line, +remember that whitespace is allowed between the "#" and the command +name, thus KCC will not recognize a "#" alone as a synonym for "#line". +If there is no command name, the line is simply ignored without error. + +KCC-specific Commands: + #asm, #endasm + These two commands cause the text delimited by them to be +macro-expanded (as for -E) and converted into an "asm()" expression +for direct inclusion in the output assembly language file. This +currently only works inside functions. This feature is very likely to +change, and should only be used where absolutely necessary. Keep the +code simple, as someday KCC may want to parse it. +See "KCC Extensions" for additional details. + +<2 Storage classes> [H&S 4.3 "Storage Class Specifiers"] + +KCC implements the standard storage classes of auto, extern, register, +static, and typedef (H&S sec 4.3), with the following notes: + +REGISTER declarations are currently equivalent to AUTO. KCC does not +assign variables to registers, and optimizations are performed without +using the "hint" given by REGISTER. AUTO variables are almost always +more efficient, and in any case they are easier to implement. + +KCC uses the "omitted-EXTERN" solution to deal with the question of +top-level definitions versus references (H&S sec 4.8). That is, +omitting "extern" from a top-level declaration has the effect of +indicating that this is a defining declaration rather than a referencing +declaration. + +Duplicate Declarations: + As per H&S 4.2.5, KCC permits any number of external +referencing declarations, if the types are the same. However, because +KCC treats omitted-extern declarations as defining declarations, these +references must all have an explicit "extern". Likewise, an external +reference may be later followed by a defining declaration. + KCC has additional special handling for declarations of +functions, because it can always be determined whether a function +declaration is a reference or a definition. Any number of "static" +referencing declarations are allowed. Conflicts are resolved as +follows: If an implicit external reference is followed by a static +reference or definition, KCC will assume the function is static. It +is an error if the first reference has an explicit "extern". It is +also an error if a static reference is followed by an external +reference or definition. In either case compilation proceeds as if +the function was static. + +<2 Initializers> [H&S 4.6 "Initializers"] + + KCC adheres to H&S in all required respects. The following +notes cover points which H&S describes as implementation dependent: + +Optional braces are allowed for all non-aggregate initializers. It is +permitted to drop braces from initializer lists under the rules +described in H&S 4.6.8 (HS1 4.6.9), but KCC attempts to perform +extremely stringent checking on the "shape" of initializers, and will +complain about too many or too few braces. + +FLOATING-POINT initializers may be of any arithmetic type. KCC performs +compile-time floating-point arithmetic, so initializers for static and +external variables may use any constant arithmetic expression. + +POINTER initializers, as described in H&S, must evaluate to an integer or +to an address plus (or minus) an integer constant. + +ARRAY initializers are currently not allowed for automatic arrays. +This will change as ANSI permits it. + +ENUMERATION initializers may use any integer (as well as enum) expression. + +STRUCTURE initializers can initialize bit fields with any integer expression. +As for arrays, automatic and register structures cannot be initialized. +This will change as ANSI permits it. + +UNIONS currently cannot be initialized. This will change as ANSI +permits it. + +<2 Exported symbols> [H&S 4.8 "External Names"] + +Symbols which are exported to the assembler file have special restrictions +imposed by current PDP-10 software, which only recognizes 6-character +symbols from the set A-Z, 0-9, '.', '$', and '%'. In particular, case +is not significant. + +Also, there is a distinction between symbols exported only to the assembler +and those exported both to the assembler and the linker. While there is +technically no reason that any symbol has to be given to the assembler if +it is not also meant for the linker, in practice it is convenient for +debugging to have some "local" symbol definitions available so that DDT +can access them. + +Here is a breakdown of export status by storage class: + +typedef = Exports nothing. (Not a real storage class) +auto = Exports nothing. (Local stack variables use an internal offset) +register = Exports nothing. +static = If not global scope (i.e. is within a block) then nothing exported; + an internally-generated label is used. + If global (top-level, within no block) then exported to assembler only. + A label is made, but no INTERN or ENTRY statement. +extern = Always exported to both assembler and linker. + Omitted-extern: a DEFINITION. A label, INTERN, and ENTRY are output. + Explicit-extern: a REFERENCE. An EXTERN statement is output, but only + if the symbol is actually referenced by the code. + +Omitted-Extern: + External declarations with no "extern" storage class +explicitly given are assumed to be external DEFINITIONS. A defined +extern symbol will have its own label, plus an INTERN statement +telling the assembler that this is an externally visible symbol, plus +an ENTRY statement which allows library routine search to find this +symbol. ENTRY statements will be put into the .PRE output file rather +than the main output file, since the assembler will need to scan them +prior to anything else. + +Explicit-Extern: + If an "extern" is explicitly given, the compiler assumes that +it is simply a REFERENCE. Nothing will be done unless the symbol is +actually referenced by the code, in which case an EXTERN line will be +generated in the assembler output for that file. The reason for the +reference count check is that each assembler EXTERN constitutes a +library search request which must be satisfied by a module with the +corresponding symbol declared as an ENTRY. Unless this is only done +for actual references, the many superfluous declarations found in *.h +files will tend to cause many unneeded library modules to be loaded. + +Static symbols: + Note that global static symbols are passed on to the assembler +even though this is not necessary; an internally-generated label could +be used just as well. The main reason this is done is to facilitate +debugging with DDT, otherwise it could be difficult to identify static +functions when looking at the machine instructions. This may cause +problems if identifiers which are otherwise distinct become identical +as a result of the conversion to a 6-char PDP-10 symbol. + +However, a symbol declared static within a given source file will +never be visible from another file that you may link later with it. For +example, a function declared as + + static char *function() + { + ... + } + +will only be visible from other functions within the same source file. +This allows several modules to have functions with the same name +modulo the six character limit, as long as no two of the functions are +both extern. It is STRONGLY recommended for multi-module programs +that you declare as many functions as possible to be "static". + +<2 Libraries and Entries> + +REL files to be converted by MAKLIB into object libraries must have +any external symbols declared with ENTRY rather than merely INTERNing +them, and this declaration must be at the start of the REL file. In +order to do this, KCC generates a *.PRE "prefix" output file in +addition to the *.FAI or *.MAC output file, and invokes the assembler +in such a way that the PRE file is assembled before the main file. +This file contains ENTRY statements and any other predeclarations that +are needed before the assembler sees the actual code. Normally the +user will never see this file, but if the -S switch is used then it +will be left around as well as the FAI/MAC file. Note that if running +the assembler manually on the FAI/MAC file, you must invoke it with +a command line like this: + [@]FAIL [@]MACRO + [*]FOO=FOO.PRE,FOO.FAI [*]FOO=FOO.PRE,FOO.MAC + + +COMPATIBILITY INFO: + For compatibility, KCC will continue to recognize an "entry" +keyword for some time to come. The following describes the obsolete +syntax: + +To declare an entry, use the "entry" keyword at the start of the source, +before any other declarations: + + "entry" ident ["," ident ...] ";" + +i.e., the keyword "entry", followed by a list of identifiers separated +by commas, followed by a semicolon. This is passed on essentially +verbatim to the assembler, and has no other affect on compilation. It +should be used at the start of any runtimes or other file intended for +a library, on all variables and functions that should be visible as +entries in the library. + +Note that it should still be safe to use "entry" as a non-keyword; if +used other than at the start of the file it will be treated like any +other normal identifier. + +To repeat: the "entry" statement is no longer necessary. It should not +be used in new code, and should be removed from old code. + +<2 Types> [H&S 5 "Types"] + +STORAGE UNITS: + A KCC storage unit (what "sizeof" returns) is a 9-bit byte, and +there are 4 of these in each 36-bit PDP-10 word, ordered left to right +from most significant to least significant. + +INTEGERS: + KCC's integer types have the following sizes: + Type Bits "sizeof" value + char 9 1 + short 18 2 (PDP-10 halfword) + int 36 4 (PDP-10 word) + long 36 4 (PDP-10 word) + +All of these types may be explicitly declared as "signed" if ANSI +level is in effect. Single variables declared as "char" or "short" +are stored right-justified into a full word; only when packed into an +array or structure are they stored as 9-bit (or 18-bit) bytes, left to +right within each word. + +UNSIGNED INTEGERS: + Unsigned integers are fully implemented; any integer object +may be either "signed" or "unsigned", and both forms use exactly the +same amount of storage, with the high order bit considered the sign +bit (if the object is signed). However, because the PDP-10 has +no instructions specifically for unsigned data, some operations are +slower for unsigned ints. + Addition (+) and subtraction (-) are the same. + == and != are the same. + Left shift (<<) always uses the LSH instruction (logical shift). + Right shift (>>) uses LSH for unsigned, ASH for signed operands. + ASH is an arithmetic shift which propagates the sign bit. + <,<=,>,>= are slightly slower for unsigned operands. + Casts to floating-point are slower. + Multiply (*) is also slightly slower. + Divide (/) and remainder (%) are much slower. + +CHARACTER: + The plain "char" type is "unsigned char". Sign extension is +done only if chars are explicitly declared as "signed char". Normally +a char is 9 bits, although it is possible to compile code using a +7-bit assumption (see the section on char pointer hints). + Old versions of KCC used to store the chars of a string +constant in 7-bit form, packed 5 to a word (ASCIZ format); this is no +longer the case and string constants are normally now full 9-bit char +strings. + An extension to KCC provides five additional types of "char" +objects, specified as "_KCCtype_charN", where N is the number of bits +in the char and may be one of 6, 7, 8, 9, or 18. All may be signed +or unsigned; their "plain" form is unsigned. See the "KCC Extensions" +section for additional details. + + +FLOATING-POINT: + The "float" type is represented by one word in the PDP-10 +single precision floating point format; there is one bit of sign, 8 +bits of exponent, and 27 bits of mantissa. + The "double" type uses two words in the PDP-10 double +precision format. (Note that for the KA-10 this is a software format +rather than the more usual hardware format.) The exponent range is +approximately 1.5e-39 to 1.7e38 in both formats; single precision has +about 8 significant digits and double precision has 18. See a PDP-10 +hardware reference manual for details. + KCC also supports the new ANSI "long double" type when ANSI +level is in effect. Currently this is the same as "double" but this +will probably change on KL-10s to use "G" format floating point, which +has an exponent range of 2.8e-309 to 9.0e307 but only 17 significant +digits. + The (double) type can represent all values of (long). That +is, conversion of a (long) to a (double) and back to (long) results in +exactly the original value. + +POINTERS: + Pointers are always a single word, but can have two different +internal formats. Pointers to chars, shorts, or bit fields, are PDP-10 byte +pointers (local or one-word global); pointers to all other objects are +PDP-10 global word addresses. Byte pointers point to the byte itself +rather than to the preceding byte, thus LDB instead of ILDB is done +to fetch the byte. + It is very important to ensure that functions which return +values of (char *) be properly declared; likewise, any function +arguments which are expected to be (char *) must be cast to this if +necessary. Operations which expect a char pointer will not work +properly when given a word pointer, and vice versa. See the section +on "pointer hints" near the end of this file for additional information. + The "NULL" pointer is represented internally as a zero word, +i.e. the same representation as the integer value 0, regardless of +the type of the pointer. The PDP-10 address 0 (AC 0) is zeroed and +never used by KCC, in order to help catch any use of NULL pointers. + +ARRAYS: + The only special thing about arrays is that arrays of chars +consist of 9-bit bytes packed 4 to a word, and arrays of shorts have +18-bit halfwords packed 2 to a word; all other objects occupy at least +one word. + +ENUMERATIONS: + KCC treats enumeration types simply as integers. In the words +of H&S 5.5 (HS1 5.6.1), KCC uses the "integer model" of enumerations, +which is what ANSI has adopted. + +STRUCTURES and UNIONS: + Structures and unions are always word-aligned and occupy a +whole number of words. Unlike the case for other declarations of type +"char" or "short", adjacent "char" and "short" members in a structure +are packed together as for arrays. Structures and unions may be +assigned, passed as function parameters, and returned as function +values. + Bit fields are implemented; the maximum size of a bit field is +36 bits. They may be declared as "int", "signed int", or "unsigned +int"; plain "int" bitfields are unsigned. Fields are packed left to +right, conforming to the PDP-10 byte ordering convention. It's too +bad that C does not allow pointers to bit fields, because the PDP-10 +byte pointer instructions are perfectly suited to this application! + +FUNCTIONS: + As per H&S. A pointer to a function is simply a word address. +For the gory details of function calls and stack usage, see the +"Internals" section. + +TYPEDEFS: + As per H&S. With regard to 5.10.2 (HS1 5.11.1), KCC has no +problems with redefining typedef names in inner blocks. + +<2 Type Conversions> [H&S 6 "Conversions and Representations"] + +Integer conversions: + + There are no representation changes when converting any +integer type to any other integer type of the same size. Sign +extension and truncation are performed when necessary to convert from +one size to another. Conversions from pointers are done as per H&S +6.2.3 (V1 6.3.4); a pointer is treated as an unsigned int and then +converted to the destination type using the integral conversion rules. + +Floating-point conversions: + + Casting (float) to (double) or (long double) retains the +same value. However, (double) to (long double) may lose one digit +of precision, depending on the implementation chosen for (long double). + A cast to (float) of an int may lose some precision, +although a char or short can always be fully transformed. (double) +can retain the exact value of an int or long int, which can be +restored to its original value by converting back to int. + Casting an unsigned integer to a floating-point value always +results in a positive number. + +Pointer conversions: + + There are a great variety of pointer conversions possible; however, +you can make sense of them if you simply note the following Three Laws of +Pointers: + (1) Nihil ex nihilis -- a NULL pointer always remains NULL. + (2) Smaller is finer -- a pointer to any object can always + be converted into a pointer to a SMALLER (or equal-sized) + object, without losing any information. Converting it back + to the original type restores the original value. + (3) Bigger is blunter -- converting a pointer to any object to + a pointer to a LARGER object will force the pointer to + have an alignment suitable for that of the larger type; + any fine details of positioning within the new type are lost, + and the original pointer cannot be recovered (unless it + was already properly aligned to begin with). The new + object pointed to will completely enclose the smaller + object. + +Specifically: + Chars are aligned on 9-bit byte boundaries, shorts on halfword +boundaries, and all other data types on word boundaries (with the +exception of bitfields and the _KCCtype_charN types). Converting any +pointer to a (char *) and back is always possible, as a char is the +smallest possible object. If the original object was larger than a +char, the char pointer will point to the first byte of the object; this +is the leftmost 9-bit byte in a word (if word-aligned) or in the halfword +(if a short). + + A cast to (int *) of a char pointer produces an address that +points to the word that the char pointer indicates, regardless of +which byte in the word was being pointed at. + + Pointer casts are not always trivial, but they are reasonably +fast (from 1 to 4 instructions depending on the alignment requirements). + + The only exception to the 3 rules is the case of pointers to +objects of _KCCtype_charN types (see the KCC extensions section). +Casting any pointer to or from those types is performed by first +converting the original pointer into a word pointer (thus forcing +alignment to a word boundary) and then applying the desired +conversion. + +Assignment conversions: + + KCC permits any casting conversion during an assignment, but +will complain about an implied cast if the conversion is not one of +the legal assignment conversions. + +Unary conversions: + + The "Usual Unary Conversions" are different for CARM and ANSI: + Original operand type Converted type + CARM ANSI (default) + float double float + signed char/short/bitfield int int + unsigned char/short unsigned int int + unsigned bitfield unsigned int *int or @unsigned int + * = if bitfield has fewer bits than an int. + @ = if bitfield has more (or same #) bits than an int. + + The first difference is (float) to (double). What H&S +describes as an "optional compilation mode" to suppress the unary +conversion of (float) to (double) is always in effect for ANSI level, +as ANSI is allowing this feature as part of the standard conversions, +and the resulting PDP-10 code is much more efficient. If ANSI level +is not selected, then all (float) values will be implicitly converted +into (double) as per the old C standard. Note that all portability +levels require that (float) values always be promoted to (double) in +function arguments, so this particular implicit conversion is always +in effect. + The second difference is in the integer promotions. CARM uses +what ANSI calls "unsigned preserving" rules; ANSI uses "value preserving" +rules, meaning that a conversion to a wider type should always result in +a signed integer type regardless of whether the shorter type was unsigned +or not, as long as the new type can represent all values of the old type. + +Binary conversions: + + As already noted, (float) values are not always implicitly +converted to (double) before being operated on, if ANSI level is in +effect. There is one other difference between ANSI and CARM +with respect to the usual binary conversions: + If one operand is "long" and the other is "unsigned int", + CARM: makes both "unsigned long". + ANSI: makes both "long". + +<2 Expressions> [H&S 7 "Expressions"] + +As per H&S, with the following notes: + +[7.2.2] (V1 7.2.3) Overflow and underflow are neither noticed nor +handled. The result is whatever the PDP-10 hardware gives in those +cases. + +[7.3.3] KCC correctly does not use parentheses to force the usual +unary conversions. + +[7.4.2] (V1 7.3.5) KCC permits component selection for structures +returned from functions, except when the component is an array. That +is, "f().a" will work and will select component "a" of the returned +structure, but it is not legal to do "f().array[i]". This point may +be clarified in the future by the ANSI draft standard. + +[7.4.3] (V1 7.3.6) KCC correctly does not allow formal parameters of +type "function", so the issue of converting this type does not arise. + KCC does not currently do any checking to see if the types of +the arguments match the types of the parameters for the called +function. When ANSI function prototypes are implemented, this will +change. KCC does not issue any warnings about discarded function +return values. + +[7.5.1] (V1 7.4.1) Casts - KCC correctly implements "narrowing" casts +for floating point and for integers. + +[7.5.2] (V1 7.4.2) "sizeof" - the result of "sizeof" currently has type (int). +This is far more than adequate for any possible size value. The +result of sizeof is ALWAYS in terms of 9-bit bytes, regardless of the +setting of -x=ch7, with two exceptions: the size of a char is always +1, and the size of a char array is the # of elements (chars) in the +array. This is true no matter how many bits are in a char. + +[7.5.6] (V1 7.4.6) '&' - Attempting to apply '&' to a "register" variable +simply causes KCC to issue a warning message and force the variable to +class "auto". KCC does not permit '&' to be applied to array or +function names; this will change as ANSI permits it. + +[7.5.7] (V1 7.4.7) '*' - Applying the indirection operator to a null +pointer (0) simply retrieves (or sets) the contents of AC 0, which +should always be zero if nothing accidentally sets it. Treating the +null pointer as a char pointer will always retrieve zeroes and set +nothing. + +[7.6.1] (V1 7.5.1) '*','/','%' - + Division by zero is a no-op; the value will be that of the dividend. +Truncation is always toward zero whether the operands are negative or +not: + 5/2 == (-5)/(-2) == 2 + (-5)/2 == 5/(-2) == -2 + For the remainder operator, (x)%0 gives unpredictable garbage. +The sign of the remainder will be the same as that of the dividend: + 5%2 == 5%(-2) == 1 + (-5)%2 == (-5)%(-2) == -1 + These operations are slower for unsigned than for signed operands. +Division in particular is slow. + +[7.6.2] (V1 7.5.2) '-' - The type of the difference between two +pointers is (int). + +[7.6.3] (V1 7.5.3) '<<','>>' - Left shift (<<) always uses logical +shifting; bits can be shifted into the sign bit. Right shift uses +logical shifting for unsigned integer types (the sign bit is shifted +out, and 0-bits shifted in), but uses ARITHMETIC shifting for signed +integer types (the sign bit is propagated). + Using a negative value for the right operand reverses the +direction of the shift. Using a large number (36 or greater) simply +shifts everything to oblivion as expected. Note that it is possible +to use left-shift arithmetic shifting (the ASH instruction) by giving +a negative shift distance to >>; of course this is very non-portable. + +[7.8] (V1 7.7) '?' - KCC correctly permits the result of a conditional +expression to have structure, union, enumeration, or void types. + +[7.9.1] (V1 7.8.1) Structure and union assignment is (of course) permitted. + +[7.9.2] (V1 7.8.2) 'op=' Compound assignment - + KCC does not support the obsolete "=+" compound assignment forms. + +[7.11] (V1 7.10) Constant expressions - + KCC can and does evaluate constant floating-point expressions at +compile time. Almost all casts are also allowed, except certain +pointer-pointer conversions where the result would depend on whether +the program was running multi-section. + KCC is currently somewhat too liberal about the constant +expressions in preprocessor #if statements; it allows the use of any +integral constant expression, including enum constants and sizeof +operators. This is possible because the preprocessor is integrated +with the compiler. The eventual fix for this will probably issue a +warning but permit the usage. + +[7.12] (V1 7.11) KCC correctly does not interleave expression +computations. + +[7.13] (V1 7.12) KCC tries to issue warnings about discarded values. +This may change with time. + +[7.14] (V1 7.13) KCC does some optimization of memory accesses, but +not much. This may change with the coming of ANSI's "volatile" type +modifier. + +<2 Statements> [H&S 8 "Statements"] + +As per H&S, with the following notes: + +[8.7] switch statement - KCC permits the control expression of a switch +statement to be of any integral or enumeration type. + +<2 Functions> [H&S 9 "Functions"] + +[9.4] Adjustments to Parameter Types + Parameters which are declared as "char" or "short" are really +handled as type "int", and "float" is really "double"; however, KCC +does not implement narrowing as per 9.4, because the description of +this is too unclear -- what happens if such a parameter is used as +an lvalue? + The situation will improve with ANSI function prototypes. + + KCC follows the language strictly and does not permit formal +parameters of type "function returning...". + +<1 The C Libraries> [H&S Part II (V1 11: "The Run-time Library")] + + ALL of the facilities described in H&S part II are +implemented as described. In addition, various UN*X system call +emulations and standard library routines are also supported. + The file LIBC.DOC furnishes a complete summary of the +implemented library routines; there is also USYS.DOC, which both +summarizes the system-call simulations. In general, users are advised +to read H&S or a UPM (Unix Programmer's Manual) for complete +descriptions of library functions, as these files are primarily +intended to document KCC-specific differences rather than to provide a +user guide. + +<2 [H&S 13] Standard Language Additions> +<2 [H&S 14] (V1 11.1) Character Processing> +<2 [H&S 15] (V1 11.2) String Processing> +<2 [H&S 16] Memory Functions> +<2 [H&S 17] (V1 11.5) Input/Output Facilities> (V1: "Standard I/O") +<2 [H&S 18] (V1 11.4) Storage Allocation> +<2 [H&S 19] (V1 11.3) Mathematical Functions> +<2 [H&S 20] Time and Date Functions> +<2 [H&S 21] Control Functions> +<2 [H&S 22] Miscellaneous Functions> +<2 C Library - Other Library Functions> + A few other miscellaneous facilities exist which are not + listed in CARM, such as jsys() and the TERMCAP library. They + are described in LIBC.DOC. + +<1 C Library - UN*X System Calls> + + The KCC runtime environment is intended to resemble that of UN*X +to a limited extent. For example, main() is invoked with "argc, argv" +arguments parsed from the command line, and many system calls are +emulated. This emulation is not intended to be complete, and the calls +exist primarily to help transport software to and from UN*X systems. +Whenever possible, the standard portable routines as described in H&S +should be used instead of these "system calls". + The file USYS.DOC summarizes the calls which KCC supports, and +describes how they differ from the UN*X versions. A UPM (Unix +Programmer's Manual) should be consulted for descriptions of how these +calls should behave on UN*X itself. + +<1 KCC Language Extensions> + + KCC implements a number of extensions to the C language which +are intended to allow for better integration with other PDP-10 software. +It is possible to disable these extensions by means of the -P switch. +These extensions are: + [1] The "entry" keyword (obsolete). + [2] The '`' identifier quoting mechanism. + [3] The #asm and asm() assembly language mechanism. + [4] The "_KCCtype_charN" data types. + + +<2 Extension [1] - The "entry" keyword> + + The use of this statement has been described earlier in the +discussion of library entry points. However, it is an obsolete feature +and should no longer be needed for any purpose. Future versions of KCC +will flush it if no one objects. + + +<2 Extension [2] - Identifier Quoting> + + The current PDP-10 software allows symbols to have 6 characters +from the set A-Z, 0-9, ., %, $. KCC maps 0-9 to 0-9, a-z and A-Z to A-Z, +and '_' to '.'. + KCC supports a non-standard extension to C whereby any characters +enclosed within accent-grave ('`') marks are treated as a valid C identifier. +This allows the user to specify identifiers containing the characters '$' +and '%', as well as any arbitrary character, although KCC will print a +warning if a character not in the PDP-10 set is seen. + Examples: `$FOO`, `OPENF%`, `$$BP`, `switch` + + This mechanism should be used ONLY where necessary. It is not +portable and should be conditionalized if used in portable code. +Identifiers defined in this way should be CONSISTENTLY quoted in this +way, because they are stored internally with '`' as their first +character to distinguish them from normal unquoted identifiers and +keywords. This avoids potential confusion and allows one to specify +an identifier which is otherwise a reserved keyword, such as `if`. + +<2 Extension [3] - #asm and asm()> + + Many C compilers have an escape mechanism which allows the +programmer to specify a series of assembly language instructions within +a C program. KCC's means of doing this is with the "asm()" expression, +which looks exactly like a function call. + Currently only one argument is allowed to asm() and this must +be a string literal. The text of the string is simply passed directly to +the assembler output file at that point in the compilation. + There is also a preprocessor command called #asm, which converts +everything up to an #endasm into an asm() expression. This is convenient +for very long stretches of assembler code, or where the enclosed text +must be macro-expanded. + + Invoke %%CODE or %%DATA to switch between assembling pure and +impure (variable) code/data. #asm inclusions will always begin in the +code segment, and must always end in the code segment. Never use +%%CODE when already in the code segment, or %%DATA when already in the +data segment. + + Because asm() is syntactically an expression, it can only +appear where an expression is legal. However, any attempt to use it +anywhere but as the sole contents of a function body is highly fraught +with peril. If it is necessary to specify some assembler directives +separate from any function, an acceptable way of doing this is by +means of a static dummy function, such as: + static void + dummyfunct(){ + asm("%%DATA\n STUFF: ASCIZ/foo/ \n %%CODE\n"); + } + + It cannot be repeated too often that use of asm() is strongly +discouraged. It is possible that someday its functionality will be +extended to the point that KCC can parse and understand the contents +(thus, for example, references to C auto variables would be allowed); +however, this would primarily be for the purpose of allowing KCC to +generate .REL files directly rather than to encourage wider use of asm(). + + At the start of the assembler file, a PURGE is done of all the +assembler IF pseudos. Thus, assembler code cannot use any IF pseudo +tests, nor macros which use them. Incidentally, attempting to use a +SEARCH MONSYM will cause FAIL to barf several times with a "FAIL BUG +IN SEARCH" message, due to the lack of the IF pseudos; this is +annoying but harmless. MACRO does not have this problem. + +<2 Extension [4] - "_KCCtype_charN" data types> + + Normally the "char" data type is 9 bits. In the PDP-10 world +much existing software depends on 7-bit characters, and to make it +easier to write the necessary system-dependent code a 7-bit char data +type was introduced and generalized. The 5 possible char sizes (6, 7, +8, 9, and 18) were chosen because it is only for those sizes that +OWGBPs exist (one-word global byte pointers), and thus only those sizes +can be guaranteed to work when using extended addressing. + + Any of the char types can be signed or unsigned; if the plain +form is used, unsigned is assumed. Narrowing and widening is done +properly whatever the size. Note that the 18-bit size corresponds +to "short"; it is included mainly for completeness rather than in the +expectation that someone would actually use it. The 9-bit size +is the same as regular "char", unless the -x=ch7 option is in effect, +in which case "char" is the same as the 7-bit size. + + These types can normally be used just as for "char". However, +there are some special effects associated with certain operations: + (1) "sizeof" of a N-bit char array returns the number of N-bit + chars (elements) in the array. Usually this is what you + want. Giving this number to malloc will cause problems + only for chars of 18 bits. + (2) A cast (explicit or implicit) of a string literal to a + N-bit char pointer will cause the string literal to be + stored as N-bit bytes. This is NOT strict C, which would + merely convert the char pointer; however, this is the + most useful interpretation. This permits the somewhat + bizarre construct of using a string literal to make + an array of 18-bit bytes (this is the only aspect where + "_KCCtype_char18" differs from "short"). + (3) 6-bit string literals are stored as SIXBIT rather than using + the low 6 bits of the ASCII char values. Note that while + such strings are null-terminated, null is a valid + SIXBIT character (meaning space). The value of invalid + SIXBIT characters is undefined. + (4) Function parameters cannot be declared to have a type of + char size 7 or 8. The reason is complicated; see + the last part of this section. + +Some examples: + _KCCtype_char6 tmp[] = "tmp"; /* A 4-element array of SIXBIT chars */ + _KCCtype_char7 wd[5] = "word"; /* A 5-element array of 7-bit chars */ + _KCCtype_char8 packet[40]; /* A 40-element array of 8-bit chars */ + _KCCtype_char18 useless; /* Same as "unsigned short useless;" */ + _KCCtype_char7 *arg = "text"; /* A pointer to an ASCIZ string */ + _KCCtype_char6 *pt6; /* A pointer to a 6-bit char string */ + + arg = "othertext"; /* Implicit conversion to ASCIZ */ + pt6 = "dskdmp"; /* Implicit conversion to SIXBIT */ + pkg_call((_KCCtype_char7 *)"argtext"); /* Explicit cast to ASCIZ */ + +Portability issues: + + The long names for these types were deliberately chosen so as to +minimize the chances of possible conflict with identifiers in software +imported from elsewhere, and to discourage the indiscriminate (non-portable) +use of the types. Note that users who must make heavy use of them (for +good reasons, we hope) can simply use typedefs or #defines at the start +of their code in order to equate them with simpler names; e.g. + + #define char7 _KCCtype_char7 /* Use shorter typename */ + + This method also has the advantage of localizing non-portable +constructs in a way that gives others a fighting chance to port the +software elsewhere by changing the initial definitions. + +Storage: + + There are a few aspects of the way N-bit char objects are stored +which may be surprising at first. Char arrays are always packed starting +with the leftmost byte in a word; however, single-char objects (such as +"char c;" have their value stored in the rightmost ALIGNED BYTE. + This is a necessary consequence of the fact that the '&' +operator applied to a char object must result in a valid char pointer, +and the very strong desire that all C code work with extended addressing. +There are only a few possible kinds of OWGBPs and they all require this +alignment. For 6, 9, and 18 bits this causes no difficulty since bytes +of those sizes completely fill a word, and there are no unused low-order +bits; thus char values may be stored completely right-justified, and in +some cases full-word operations can be performed on them. + However, for 7 and 8 bit bytes the rightmost byte will leave 1 +and 4 unused low-order bits, respectively, and this is where KCC +stores the values for such objects. Debuggers examining a program with +IDDT may be surprised that "_KCCtype_char8 foo = 1;" results in a +word labelled FOO with its value 020 instead of 1. + This alignment restriction causes no real problems except for +the obscure case of function parameter declarations. In the absence +of ANSI function prototypes, the default "function argument +promotions" are performed when a call is made; all integers shorter +than (int) are converted to (int) and passed as such. But this means +that the integer value is right-justified; if the function parameter +was declared to match the promoted type (int) then all is well, but +attempts to declare it as a 7 or 8 bit char will just result in a +confused function (attempts to read the parameter value or take its +address will fail since the value is not properly aligned). This +could be fixed by having KCC do an implicit conversion upon function +entry, but it is far simpler and much, much more efficient to simply declare +such parameters as (int) in the first place. + If the code will never be run on a KL then, of course, this and +many other things could be simplified. + +<1 KCC Internals> +<2 KCC Internals - Memory organization> + + A C program compiled by KCC has four distinct memory regions: +data, text (code), stack, and free. + DATA - This contains all user-declared data variables, both + initialized (set to user's specification) and + un-initialized (set to zero). + The first address following this region is stored in "_edata". + TEXT - This is the UNIX terminology for program code. + The first address following this region is stored in "_etext". + STACK - The program stack. This grows upwards in memory. + FREE - The region of memory that malloc() can dynamically allocate. + This starts at the address stored in "_end" and can allocate + memory up to (but not including) the address stored in + "_ealloc". + +In addition, there may be small unused areas of memory. + +The normal layout on TOPS-20 for a single-section program: + + Start addr End addr Region Name + LOW _edata-1 DATA + _edata STACK + HIGH-1 - (unused) + HIGH _etext-1 TEXT + _etext _ealloc-1 FREE + _ealloc 777777 - (unused, reserved) + +Normally LOW == 0 and HIGH == 400000. These correspond to the normal +addresses for low and high segments. Also, normally _ealloc is set to +770000, so that pages 770-777 can be reserved for mapping DDT (some people +seem to prefer that to IDDT). + +The normal layout on TOPS-20 for a MULTI-section program: + Start End Region Name + Section 0 - (unused) + Section 1 + 1,,LOW _edata-1 DATA + _edata 1,,HIGH-1 - (unused) + 1,,HIGH _etext-1 TEXT + _etext 1,,777777 - (unused) + Section 2 + 2,,0 STACK + 2,,777777 - (unused) + Sections 3-37 + 3,,0 _ealloc-1 FREE (all sections up to 37) + _ealloc 37,777777 - (unused, reserved) + +Normally _ealloc is set to 37,,700000 so that pages 700-777 of section 37 +are reserved for mapping XDDT (again, for those people who don't know about +IDDT). + +<2 KCC Internals - Stack structure> + +The organization of the portion of the stack seen by a C routine is +shown in the following diagram (with the top of the stack being the +earlier lines in this file, and the stack pointer at the very top): + +SP-->________________________________________________________________ + | Spilled registers | + | generated when we need more intermediate values than | + | there are available PDP-10 registers | + |________________________________________________________________| + | | | + | (as many | Arguments being stacked for the next call | + | repetitions | These are generated in the reverse of | + | of these | lexical order; thus the first argument | + | two areas | appears at the top of the stack. This is | + | as levels | so that functions like printf which take a | + | of nesting | variable number of arguments can work. | + | in function |__________________________________________________| + | calls) | | + | | Values to be saved over the call | + | | e.g. if we do foo()+bar() then one function | + | | has to be called first, and we save its | + | | value here so we can add it to the other | + | | result once the second call returns | + |_____________|__________________________________________________| + | | + | Local variables | + | stored in lexical order, i.e. the first declared | + | variable is lowest on the stack | + |________________________________________________________________| + | | + | Return address for calling function | + |________________________________________________________________| + | Pointer for return value | + | this only exists if the function returns a struct | + | that takes more than two words; otherwise the result | + | is returned in registers 1 and (if two words) 2 | + |________________________________________________________________| + | | + | Arguments to this call | + | in reverse lexical order as described above | + |________________________________________________________________| + +Of course, not all of these areas are likely to appear at once. +There is no frame pointer, only a stack pointer; generated code always +knows the location of the stack pointer in relation to changes in the +above structure (as arguments get pushed and popped, registers get +spilled and despilled, etc). Thus code to access an argument or local variable +will use a different offset from the stack pointer depending on where +it is generated. + +<2 KCC Internals - Calling conventions and register use> + + Arguments to KCC C functions are passed on the stack and +returned in the registers. Functions are not expected to save +any registers upon entry, and in fact are assumed to clobber all +of ACs 1-16 inclusive. + +Caller conventions - argument passing: + + Since all function calls are assumed to clobber the registers, +it is up to the caller to save on the stack any register values which +it wishes to preserve over the function call. + As described in the section on stack structure, function +arguments are then pushed in reverse order onto the stack; the last +argument is pushed first, and the first argument is pushed last. +Passing a structure as argument consists of copying it whole onto the +stack. If the function is expected to return a structure or union +longer than two words, a "zeroth arg" must also be pushed, which is +the address of a location that the function should copy the returned +structure into. The function is then called with a PUSHJ 17, +instruction which adds the return address onto the stack. + +Caller conventions - result returning: + All accumulators (except AC17) are at the callee's disposal. +However, AC0 is never used by generated code, as some old programs +assume NULL always points to zero, and as the hardware imposes several +restrictions on its use. AC15 and AC16 are also reserved for minor +KCC runtime functions. + Single word function return values are left in AC1; double +word returns go in AC1 and AC2. Return values larger than that are +copied into the location specified by the struct-return pointer, which +is provided by the caller as the "zeroth" argument. + +<2 KCC Internals - Extended addressing> + + A C program can be run in an extended section by specifying +this in either of two ways at load time, depending on whether you are +using KCC or the EXEC to do the loading. + + (a) KCC: Use the "-i" switch. + e.g. @cc -i prog.c + (b) LOAD (or LINK): The first module should be C:LIBCKX. + e.g. @load c:libckx,prog + +No special switches need be given to KCC for the generated code to be +suitable for extended addressing - the same code will always run +either extended or non-extended. + + In extended sections, code and permanently allocated data +(i.e. global variables) live in section N, the stack lives in section +N+1, and allocated memory begins in section N+2, expanding to fill all +higher sections. Normally N==1; this can be changed if really +necessary. All byte pointers not intended for immediate use (e.g. +literal arguments to a LDB or DPB instruction) are constructed as +OWGBPs (One-Word Global Byte Pointer). + +<1 Cross-compiling> + +The -x, -L, -H, and -A switches allow some degree of cross-compilation. +The effects of the various -x specifications are listed below: + +CPU: ka, ki, ks, kl0, klx + KCC can compile code to run on any CPU type; this is done both +by means of different code generation sequences and by assembler +macros which KCC also generates as needed. "ka" specifies a KA-10 +using software format floating point doubles (all other types use +hardware format). "ki" specifies a KI-10, and "ks" both a KS-10 and a +KL-10A without extended addressing. "kl0" specifies a KL-10B capable +of extended addressing, but restricts the code to section 0; "klx" +specifies a KL-10B non-zero section environment. + + It is possible to specify more than one CPU type; the intent +is to allow for producing code that will run on all specified +machines. As distributed, KCC code is compiled for "ks+kl0+klx". +However, the results of other combinations are somewhat unpredictable +and should be avoided at the moment. + +SYSTEM: tops20, tenex, tops10, waits, its + + Currently there are only two things affected by this setting: +character and string constant values, and ERJMP. + [1] If compiling for WAITS (or for anything else if on WAITS), + character values are mapped to and from WAITS ASCII and standard US + ASCII. + [2] If compiling for TOPS20 or TENEX, the proper value of + ERJMP and an auxiliary definition called ERJMPA are generated. +There may be more distinctions in the future. + + +ASSEMBLER: fail, macro, midas + + The assembler selection is independent of the system or CPU. +Currently either FAIL and MACRO can be selected and both will work. +Selecting MIDAS does not yet work completely. + + +CHARSIZE: ch7 + + It is possible to request that KCC generate code which assumes +that chars are 7 bits, and char pointers are 7-bit byte pointers. +Thus, arrays of chars will have 5 chars per word, instead of 4. This +feature, invoked by the "-x=ch7" switch, is mainly of use to people +who must integrate C code with old software that cannot deal with +anything but 7-bit bytes. It is not really guaranteed to work in all +conceivable cases. In particular, you should be aware that many of +the normally-compiled library routines (such as malloc) will continue +to return 9-bit char pointers, although the str- and mem- functions +should work with either 9-bit or 7-bit strings. + The values returned by "sizeof" will not change. As explained +in the discussion of the sizeof operator, sizes are always in terms of +9-bit bytes, except that the size of a char array is always the number of +elements (chars) in the array. sizeof(char) is always 1. + +General comments: + Ideally KCC (on any system) should be able to generate code +for any other PDP-10 system. To actually do this requires some +understanding of how the various parts of a program come together. It +is not enough just to specify some -x switches; you must take care of +the following: + + 1. #include files. You may need to use an alternate standard + include-file directory to satisfy <>-type includes. -H can be + used to specify an alternate location. + + 2. Switches. You should use -D to predefine any parameters + from which are not properly defaulted. + Alternatively you can put a different version of c-env.h in + a non-standard location pointed to by -H (as above). + + 3. Library. The C runtime library loaded with the program must + be the correct one (already cross-compiled for the target). KCC + always generates a default "-lc" request for the C runtime library; + the location searched for this can be specified by the -L switch. + +For details on porting the C library and KCC itself, see the file PORT.DOC +in the KCC source directory. + +<1 Char Pointer Hints> + + The code generated for handling char pointers always uses +byte-pointer instructions, and so will work for any byte size (at +least on machines implementing the ADJBP instruction). This can +sometimes be useful when dealing with PDP-10 based data structures. +However, such pointers have to be constructed "by hand" since all char +pointers that KCC generates are either 9-bit or 7-bit. See also the +-x=ch7 option in "Cross-compiling". + + In general, when char pointers are involved, constructs like +*++ptr are faster than *ptr++. This is because *++ptr can usually be +folded by the optimizer into an ILDB (or IDBP) instruction. There is +no equivalent on the PDP-10 to a *ptr++ construct; this must always +be done as at least two instructions. + + Whenever possible, try to avoid using two char pointers in +subtraction, as in (ptr1-ptr2). Many instructions have to be executed +to find the difference between two char pointers, due to the strange +internal format. For the same reason, try to avoid less-than (<, <=) +or greater-than (>, =>) comparison of char pointers. Tests for +equality (== and !=) are fine, however. Finally, on machines which do +not implement the ADJBP instruction (KA, KI), it is also helpful to +avoid addition or subtraction of integers to char pointers. + + None of this applies to other types of pointers, such as (int *), +which are simple addresses and can be manipulated very efficiently. + +<1 Portable Math Library> +* Menu: +* PML: (KCC-PML) Portable Math Library +<1 Local library additions> +* Menu: +* LIBLCL: (KCC-LIBLCL) Local library additions +* LIBT20: (KCC-LIBT20) Frank Wancho's TOPS-20 library diff --git a/doc/kcc/coding.doc b/doc/kcc/coding.doc new file mode 100755 index 000000000..209f527f7 --- /dev/null +++ b/doc/kcc/coding.doc @@ -0,0 +1,123 @@ + KCC Runtime Library Coding + + This file is oriented towards KCC implementors and describes +some general rules for writing C library functions, along with a more +detailed explanation of certain crucial files in the library. + + The source for all library routines is kept in where +"xx" depends on your system; on the distribution it is usually something +like "KCC-4". Since that doesn't matter here, it will be left out of +any filenames mentioned. + + <.LIB*> Runtime library sources: Documented in: + <.LIB> General-purpose library routines. (LIBC.DOC) + <.LIB.STDIO> Standard I/O package routines. (LIBC.DOC) + <.LIB.MATH> Math library routines. (LIBC.DOC) + <.LIB.USYS> Unix simulation routines. (USYS.DOC) + <.LIB.NETWORK> A stab at a couple BSD net routines. (LIBC.DOC) + <.LIB.PML> Unused "portable math library" routines. + <.LIB.TEST> Unused testing routines. + +Guidelines for writing C library modules: + + ALWAYS #include "c-env.h" before anything else, unless the +code is truly portable and completely in C. Note this is "c-env.h", +not , so that it is easy to test variations. + + All source files for KCC and the LIBC routines should refer to +their header files with "" instead of <>, so that different versions +of the .H files can be tested easily in the source directory before +installation in the system-wide standard location. Normal compilation +can simply point to the standard include-file location with the -I +switch. However, note that user programs, unlike the library sources, +should always use <>. + + Keep all versions of a function together in the same file, +rather than having a different file for each version depending on the +system. Just conditionalize any system-dependent code appropriately. + + If trying to assemble for an unsupported system or CPU, invoke +the #error preprocessor command to cause a compilation error with an +appropriate message. Alternatively, if it is important that a +function symbol exist even though it is unsupported, you can code for +a function that always fails with an error printout to stderr. + + In general each specific library function should have its own +individual file, although several functions can be collected in the +same file if they are really tightly bound together. Any routine that +needs to use a system call should consider using #asm for efficiency, +instead of the jsys() or syscal() functions. (This is not normally +recommended, but since library routines are heavily used and rarely +modified, it's somewhat more acceptable.) + + +ASSEMBLER HACKERS: + .FAI or .MAC source modules should never exist. Functions which +require assembly code should use the #asm or asm() feature of KCC. +See the KCC user documentation for details on this. + Use $ or % for runtime-only externals, switches, and macros, +to avoid possible conflict with C symbols. Remember "_" in a C symbol is +equivalent to "." in an assembler symbol. + The C runtime support has the following symbol conventions: + $$$xxx Major module entry name. + $$xxxx Global symbol value, internal to runtimes. + $xxxxx Global label, internal to runtimes. + %xxxxx Macro-type instruction. + %%xxxx Miscellaneous macro function. + +USYS routines: + If writing a UNIX system-call simulation routine (which should +go into <.USYS>), there are additional things to be careful of, and the +file USYS.DOC should be consulted. + +IMPORTANT FILES: + +C-ENV.H - C Environment defs. Contains all system/CPU configuration defs. + Copy kept in standard include dir. + + Should be included by every LIBC routine which has any system +or environment dependencies. Different versions of this file exist +for different systems/machines/configurations. Can also be included +by user programs. IFNDEFs allow testing temporary changes by using +the -D switch, although this should be done in conjunction with the +appropriate -x= switches. + + +CPU.C - Load-time CPU definitions for target machine, determined by C-ENV.H. + Entry point: $$$CPU (a dummy) + + This module is part of the C library and defines the values for +several symbols which depend on the specific processor that the program +is being loaded for. The primary usefulness of this module is that it +allows the user to defer until load time the decision of whether to build +a program for extended or non-extended operation. It also sets the right +symbols to ensure that all loaded modules are compatible with the CPU type +being loaded for. + +CRT.C - Standard C Run Time support for KCC. + Entry point: $$$CRT (a dummy) + Globals: $START Startup + __EXIT Exit + _END,_ETEXT,_EDATA,_EALLOC Hack: Unix simul (shd move) + $RET,$RETF,$RETZ,$RETT,$RETP,$RETN Return points + $ZERO Handy zero constant + $ADJBP ADJBP simulation + $Bxxxx Various byte-pointer tables + All C programs start here. CRT is responsible for setting up +the C environment so that C code can execute properly. It specifically +does not do anything about setting up a UNIX environment; that is up to +whatever it is loaded with. When the C environment is ready, it calls +the routine _runtm() which should perform any remaining setup and then +call main(). Normally this routine is found in URT.C, the Unix-simulation +Run Time module. + + +URT.C - UNIX-simulation Run Time support (in <.LIB.USYS>) + Entry points: _RUNTM EXIT .EXIT ERRNO + + This module is written in C, and is called by CRT once the C +environment is set up. URT then sets up the UNIX simulation +environment, parsing the command line and setting up stdin, stdout, +and stderr. When ready, it calls the user's main() function with the +parsed command line as a normal "argc,argv" array of char pointers. + diff --git a/doc/kcc/codsig.doc b/doc/kcc/codsig.doc new file mode 100755 index 000000000..e8e3a30f2 --- /dev/null +++ b/doc/kcc/codsig.doc @@ -0,0 +1,469 @@ + UN*X SIGNAL SIMULATION - IMPLEMENTATION NOTES + + This file documents some of the unholy and intimate details of +how the KCC USYS library routines simulate the Un*x signal mechanism. +It does not explain everything. To seriously understand what is going +on, you need to read the exhaustive comments in the source files +(SIGVEC.C and ) as well as the user-oriented file +SIGNAL.DOC. + + +KCC Implementation: + + We try to implement 4.3BSD, since with that the older, cruder +mechanisms can be emulated also. + + ITS has a very powerful software interrupt system and can +readily handle the BSD scheme. T20 has more trouble. T10 as far as +known is simply out of it altogether. This document is primarily about +the T20 implementation (the only one currently existing). + +First, a note on what "a/synchronous" means: + + A SYNCHRONOUS interrupt is one that happens at a specific PC +due to the instruction at that location. Typical examples: illegal +instruction interrupts (which can include JSYS calls), floating-point +exceptions. For these types of interrupts the PC is significant and +it or the contents it points to may need to be checked to determine +what to do, because simply continuing the process at that PC will +very likely just generate another such interrupt. + An ASYNCHRONOUS interrupt is one that may happen at ANY time, +regardless of place; these are generated by events external to the +program. Typical examples: TTY input interrupts, timer interrupts. +For these, the PC is unimportant except that it should be preserved +and restored if the interrupt handler wishes to continue whatever was +interrupted. + + No UN*X C signal handler has the capability of returning from +handling a synchronous interrupt. In fact there is no mechanism +provided for a signal handler to find out what its return PC is. +(it's possible, with trickery, but I've never seen an example). +4.3BSD (as opposed to 4.2 or any other Un*x) now makes this possible +by providing the handler with a pointer to a saved-context structure! + + Note that some signal handlers return to normal code by +means of longjmp(); this is particularly true for alarm() handlers. +ANSI specifies that longjmp should restore the environment properly +even from within a signal handler, but is not required to do anything +meaningful if called from a nested signal handler. KCC supports this +use of longjmp(). + +T20 Problems: + + (1) T20 PSIs have 3 priority levels. While interrupting at +one level, no other interrupts at that level can be serviced (although +they will be deferred). This is unlike UN*X where signals and their +handlers are all completely independent of each other. + + (2) The T20 return address may be a "monitor address" which +indicates that a JSYS was interrupted before it completed. This can +only be continued by the first DEBRK at that interrupt level; anything +else aborts that JSYS, with unknown consequences. + To clarify: if the return PC does not have the user-mode bit set, +then it was interrupted out of a JSYS. The PC address however refers to +the user address space, and points to the JSYS return location (that is, +one greater than the location of the JSYS call.) As far as I can tell +from examination of the monitor code, a JSYS which is interrupted just +as it is about to return will have already changed the PC to user mode, +so the fact of a monitor-mode PC should always imply that there is still +something left which the JSYS hasn't yet finished doing. Unfortunately, +the fact that IIC% itself interrupts with a monitor-mode PC implies that +this analysis is bogus and that the PSI system is even more losing than +could possibly be expected. + + Doing a DEBRK back to this place will complete the JSYS. +Doing a DEBRK anywhere else (or turning on the user-mode bit) will +abort it and the monitor forgets the saved context. It isn't clear +whether re-starting the JSYS, by backing up the PC by one, will do the +right things; the ACs may not have been properly adjusted. Given the +usual history of T20/10X, they probably haven't. This is the biggest +problem, but can be coped with if user code is careful. I think. + IIC% should NOT be restarted! SIN% and probably SOUT% appear +to update their ACs and can be restarted. + + It is possible to execute a signal handler entirely within an +interrupt level even if the handler uses longjmp(), if longjmp conspires +to do a DEBRK when jumping back to the previous context. So the main +problem with handling interrupts in this way is the lock-out of other +interrupts. + +FINAL PLAN: + + Key aspect: all T20 PSI interrupts are handled IMMEDIATELY + and DEBRK'd as soon as possible. The time spent at interrupt + level is absolutely minimal, and no signal handlers will + be called at interrupt level unless specifically (and non-portably) + requested. + + The UN*X signal mechanism is primarily implemented by + the PSI handling code, including the BSD signal block mask, rather + than by trying to use the monitor's PSI Jsys calls. + + No USYS (Un*x System call simulation) routine should be interrupted. + If an internal JSYS is interrupted then that system call should + return -1 with errno set to EINTR. Otherwise things run through + to completion and the signal takes effect just before returning + a normal return value. + +************************************************************************ + NOTE!!!!! THE DETAILS OUTLINED IN THE REST OF THIS FILE ARE NOT + NECESSARILY COMPLETE!!! Read the sources also, namely: + SIGVEC.C - main source, PSI handlers + SIGDAT.C - additional important source + JSYS.C - jsys() function +************************************************************************ + +Global variables: + +int _sigusys; /* Positive if within critical USYS code */ +unsigned int _sigpendmask; /* 36-bit mask of pending signals */ + /* PSI can add to this during _sigusys. */ +unsigned int _sigblockmask; /* 36-bit mask saying which signals to block */ + /* PSI cannot change this during _sigusys. */ + + +BSD signal block mask: + This is implemented softwarily, by having the +PSI code check the mask itself and so forth, thus implementing +whatever notion of blocking is needed, rather than trying to defer the +actual PSIs themselves. This doesn't actually turn off the PSI +channels, and it is in theory possible there might be runaway +interrupts, but this seems unlikely. It cannot happen with +synchronous interrupts, and the asynchronous ones all seem fairly +sporadic. Installing the default action or no handler will still +result in turning off the relevant PSI channel. + +If a PSI is triggered for a signal which is currently handled by +SIG_DFL (default) or SIG_IGN (ignored) then the actions are +straightforward. Actually no PSI should ever arrive for a SIG_DFL +handler. + + If a PSI comes in for which a user signal handler is defined, +there are four cases that must be checked for: + (1) USYS code, user-mode PC - Simple deferred case. + (2) USYS code, JSYS-mode PC - Complex but all planned for. + (3) User code, user-mode PC - Simple immediate case. + (4) User code, JSYS-mode PC - Complex and unpredictable; worst!! + +In general, user-mode PCs are simple. For JSYS-mode PCs things are +more complex. In fact, much of the hair in jsys() is due to the need +to coordinate its activities with those of the PSI handling system. +There is one special case: an illegal instruction interrupt (.ICILI) +is ALWAYS delivered with the PC claiming JSYS mode, regardless of +whether the offending instruction was a failing JSYS or a bad word +value. To handle this screw-up, the PSI handler checks +.ICILI interrupts specially and turns back on the user-mode PC bit if +the bad instruction was not a JSYS. This simplifies things for the +rest of the code. + +(1) USYS code, user-mode PC - Simple deferred case. + If we are in USYS code we cannot call a handler. What we do +is set the signal's bit in _sigpendmask (for pending ints) and do a +simple DEBRK right back to the place interrupted from, so the USYS +code can run on to completion. Special checking is required for +synchronous interrupts, since those usually cannot be continued. +If the interrupt is .ICAOV or .ICFOV we can simply continue. Otherwise +we must fail with an error message. + +(2) USYS code, JSYS-mode PC - Complex but all planned for. + Again, synchronous interrupts are a problem. For a JSYS-mode +synchronous interrupt we can only continue if the PC was within the +jsys() routine, in which case we can cause the jsys() call to abort +(whether or not it was marked as interruptible). Otherwise, our only +recourse is to halt the process with an error message. + The important thing about asynchronous interrupts is that +because the code interrupted from was USYS code, we can always be +assured that whatever form the JSYS invocation took, both the USYS +code and the PSI code agree on how the interrupt will be handled. + The pending signal bit is always set; no handler is ever invoked +from within inside USYS code. Since we always do an immediate DEBRK%, we +always have the option of either fully continuing a JSYS or of aborting it. + The USYS code has four different ways of invoking a JSYS: + Asynch Synch + (a) in-line with ERJMP Continue + (b) in-line without ERJMP Continue fail with err msg + (c) jsys() with int flag do intret + (d) jsys() without flag Continue + +The interesting case here is (c) where an asynchronous interrupt +causes the PSI code to turn on the user-mode bit and then DEBRK% to a +specific interrupt return location within jsys(). Otherwise, execution +merely continues. + A synchronous interrupt within jsys() is very unlikely, +because jsys() always has an ERJMP following a jsys invocation; also, +any reasonable USYS assembler code should have ERJMP to handle +expected errors. But if one happens, then: + b: halt process with error message. + a: should not happen, but halt with error msg anyway. + c,d: fail with an "interrupted" return value. + This is also not supposed to happen, but a reading of the + T20 page fault code seems to indicate it is possible for a + PSI to happen regardless of whether an ERJMP exists. + +(3) User code, user-mode PC - Simple immediate case. + This is the simple, straightforward case. +We simply DEBRK to the signal handler, after saving all stuff on stack +such that if the handler returns, control returns to interrupted point. +Note that for synchronous interrupts that have the CF_OPC flag, the saved +PC is that of the guilty instruction, so that continuation will simply +generate another interrupt if the handler failed to remedy the problem! + +(4) User code, JSYS-mode PC + This is the biggest problem. + Would be nice to only DEBRK when signal handler returns, cuz then +the JSYS could be continued. But having the priority level stuck will +prevent other signals from being handled, and longjmp() would have to +test for needing to hack DEBRK% itself. If at all possible we want +to avoid running signal handlers at interrupt level. + So we evidently have to do the same thing as for a user-mode +PC, and just call the signal handler with the DEBRK%. + Users can use the interruptable flag bit for jsys() calls, and this +will work. Otherwise, if there was any way of knowing whether it +was safe to re-start a JSYS then we could back up the PC to do this, +but as far as is known there isn't any guaranteed method. So "random" +JSYS calls that don't go through the jsys() facility are subject to +being aborted without any error indication. + +Continuation actions for user code, JSYS-mode PC: + Asynch Synch +(a) in-line with ERJMP Restart +(b) in-line without ERJMP Restart Restart +(c) jsys() with int flag do intret +(d) jsys() without flag Restart + +There is one exception to the "Restart" action: if the JSYS was IIC% then +we simply "continue" (at the next instruction). + +USYS interrupt handling: + + We need some macros or functions, one to suspend interrupts at +the start of critical USYS code, and another to reactivate them once +the critical code is done. May also need one which checks for pending +interrupts. Hard part is how to handle an interruptible invocation of +jsys() such that no signal gets missed between the test for pending +signals and actual execution of the JSYS. The best way seems to be +for the PSI code to know about the address range of critical code in +jsys(). + +USYS_BEG(); - Suspends all signals; used by USYS code to tell + PSI stuff that a usys call is being executed. + #define USYS_BEG() (++_sigusys) + +USYS_END(); - Reactivates all signals; used by USYS code to tell + PSI stuff that it's OK to handle signals now, and checks to + see if there are any pending signals that should be handled. + NOTE: the signal suspension is lifted BEFORE we test for pending + signals; if a signal happens after the lift but before the + _sigpendmask test, the worst that happens is we call + _sigtrigpend() unnecessarily. + If the ordering was reversed then we might + get a signal after the _sigpendmask test but before lifting + the suspension; that signal would never be serviced. + #define USYS_END() { if (--_sigusys <= 0 && _sigpendmask) \ + _sigtrigpend(); } + +USYS_RET(val); - Reactivates all signals; used by USYS code to tell + PSI stuff that a usys call is finished and about to return. + Also must check for pending signals and dispatch to handler if any. + NOTE: the return value is computed into a temporary location + so it cannot change regardless of what a signal handler does. + #define USYS_RET(val) { int tmpret = (val); \ + USYS_END(); \ + return tmpret; } + +USYS_INTRET(-1); - same as above, when USYS code knows that a jsys() has + been interrupted and is returning for this reason. No test + of _sigpendmask is needed. We set errno last in an attempt to avoid + having its value changed by the signal handler; of course another + signal could happen anytime after errno is set and before it is + checked by the user program, but that is a problem with this + stupid mechanism on UN*X too. + #define USYS_INTRET(val) { --_sigusys; _sigtrigpend(); \ + errno = EINTR; \ + return val; } /* Should always be -1 */ + +USYS_SIGTST() - Tests for pending signals. Needed? + #define USYS_SIGTST() (_sigpendmask) + +USYS code cautions: + The USYS routines need to be careful to always invoke the +signal macros appropriately. But a more important (and more subtle) +problem is that they also have to be careful of calling on other +library routines! + UN*X code assumes, of course, that a system call is executed +entirely within the kernel, and has no effect on anything in the process +address space other than things explicitly requested by the call. This +means that our USYS code must have the same non-effect; the routines +can only call those library functions which are fully re-entrant and +change no static data. + The problem is worse for those USYS calls invoked by a signal +handler. In particular, if any USYS routine calls malloc() then it +runs the risk of enormous unexpected screwage if the main user code +happened to be in the middle of a malloc call! Of course the user's +signal handler itself could call malloc() with similar bad consequences, but +in this case the user has control over what is going on, and presumably +knows what is going on. + +PSI code: + + All signal PSIs happen at level 2. If we are so unfortunate as to +get a panic PSI within the PSI code, this will cause process +termination, but the PSI code will be extremely minimal and should not +cause any problems. + Levels 1 and 3 can be reserved for the user, thus giving +flexibility of using a level either higher or lower than the Un*x +signal facility; some mechanism would have to be figured out, though +(see next page). + +When a PSI interrupt happens: + Monitor saves PC in predefined table, jumps to PSI handler. + Save an AC or two for checking. + If SIG_DFL (default) was in effect, we shouldn't be getting this + PSI at all. + If SIG_IGN was in effect and PSI was enabled for this signal + then the PSI handler will be a do-nothing that just + DEBRK%s immediately. + Otherwise, we must have a handler of some type. "psisig" is the + location of the PSI handler for such signals. It distinguishes + between interrupts from USYS code and user code, as well as + whether the PC is a monitor (JSYS) or user-mode PC. + If the handler is invoked, this is done with a DEBRK%. + +More on extensions to sigvec(): + + For additional flexibility, the "sigvec" structure can be extended +to include additional parameters. Existence of a new bit in sv_flags +would indicate that the additional structure members are significant. + +The things we'd like to be able to specify, independently of each other: + SV_XINTLEV: ON If handler should run at interrupt level. + SV_XASMHAN: ON If handler is special assembler routine (ACs not saved, + no args given). Otherwise, normal C handler. + SV_XOS: ON If the OS structure should be checked for: + (1) Exact PSI channel # to use for this signal (0 = existing). + (2) What PSI level to use (0 = existing). + (3) .TICxx code (plus 1) to ATI% to this channel (0 = none). + +Some of those are interdependent: + Specification of a positive .TIC code always replaces any existing +code by the new one, and use of -1 always clears any existing code. If +the value is 0, however, then the meaning depends on whether a channel # +was specified. If the channel # was given, 0 is the same as -1. Otherwise, +if no channel # was given, then 0 means leave any existing code alone. + If the handler is an assembly routine, then it MUST run at interrupt +level. Thus, it is an error to specify SV_XASMHAN without SV_XINTLEV. + + Currently only SV_XINTLEV is implemented. It should work to use +longjmp() within handlers called with this flag! + +Fixing up jsys(): + + The jsys() function has been fixed up so that it understands +which calls take what kinds of returns, and can guarantee the +following return values: + -1: interrupted + 0: error (return error code in acs[0]) + 1,2,3: success, returned at that location. + +The calls SIBE, SOBE, SOBF are weird. Not clear how to distinguish +an error return from a "normal" +1 or +2 return. Same applies to SKPIR. +GFRKH and GFRKS have both +1, +2, and Illeg-Instr PSIs. + +Last remaining ambiguities are for the +3 returns, of which only 3 +exist: ERSTR, GACTF, and STDIR (10X only). STDIR and GACTF are +simple as only the successes need to be distinguished. ERSTR is hard as +there are two different error returns. + STDIR (10X only) - all 3 returns counted as success. Error happens + if illegal instruction interrupt. Can distinguish success + returns by value 1,2,3. + GACTF - Returns 2 or 3 for success, 0 for failure (+1 return). + ERSTR - We'll have to fudge this one: + Return 0 for interrupt or +1 failure return. + Return 2 for the +2 failure return. + Return 3 for the +3 full winnage return. + +So, for those three calls, the caller has to check the exact return value +and cannot just assume a positive return value means winnage. + + The jsys() code needs to distinguish skipping from +non-skipping calls in order to return the right stuff. + + Normally skips Non-skip + JSYS JSYS + ERJMP ret0 ERJMP ret0 + JRST ret2 JRST ret1 + JRST ret3 + +For 10X the normally-skipped ERJMP would have to be replaced by a +ERJMPA ret0, and the PSI handler made to know about this. Different +code sequences may be needed for the weirdos like SIBE/SOBE/SOBF. + +Rather than keeping a global table indexed by JSYS value, it would be +possible to have the jsys-number argument to jsys() carry some flags +with it in the high-order bits which indicated what sort of jsys it +was, or what actions jsys() should carry out. The jsys-name macro +definitions in can include those flags. This also makes it +easy for user programs to examine the flags as well, and they can be +used in #if preprocessor tests. This would involve putting the table +into UNV.C or whatever program is used to generate the type +files from MONSYM.UNV. + +JSYS-related PSI handling: + + Some special handling is needed for interrupts from JSYS calls +and from within jsys(). This is discussed in more detail in a previous +part of this documentation. + +_SIGTRIGPEND discussion: + Once _sigusys is turned off, any signals which arrived during the +call execution (but were suspended because _sigusys was on) need to be +triggered, and this is the function of _sigtrigpend(). + If user handlers never expect to run at interrupt level then we +could "trigger" these signals entirely within our own software, without +fiddling with the T20 PSI system. However, to be most general it is best +if actual PSIs are triggered -- this can be done with IIC%. + While we are checking the pending signal mask, we have to be +careful, because if an asynchronous signal happens just after the code +has decided to trigger that signal, but before doing so, then +the handler could be called twice. We need to ensure that incoming +PSIs/signals are not serviced for the signals in _sigpendmask, even though +_sigusys must be off. + One straightforward method of solving this is to use DIR% to +temporarily disable the PSI system: + DIR% ; Suspend PSIs + + IIC% ; Initiate PSIs on chans + EIR% ; Allow PSIs to take effect + + However, the overhead of additional monitor calls is a little +annoying. We could also solve this in our software by having the PSI +code NOT service an interrupt if _sigpendmask already indicates a +signal instance is outstanding. Thus, _sigtrigpend() can proceed to +trigger signals on just those bits in _sigpendmask without fear that +an asynchronous interrupt will wrest away control in the middle; if a PSI +does happen for one of those bits, it will see that it is already set in +the mask and will simply DEBRK% right back. + Note that the only place that _sigpendmask bits are turned off +is within the PSI code just before jumping to a handler. + There's a BUG in this scheme, though: how does the signal handler +know when an interrupt on an already outstanding signal should be ignored, +and when it should be handled? _sigtrigpend() is doing an IIC% which +causes an interrupt indistinguishable from the real thing! So, to patch +this up, the PSI code needs to check the PC and see whether it's at the +IIC% in _sigtrigpend(); if it is, then it's OK to handle the signal. + + A third solution might be possible such that IIC% is +unnecessary too. If the handler for a signal happens to want +interrupt-level processing (as this would be a special feature, most +handlers won't) then IIC% could be used anyway. This seems unlikely +however since at SOME point PSIs need to be turned off so that +bits can be tested and set without fear of having them changed out from +under, and normally it's the PSI code itself that has this security because +it is running at interrupt level. After all, we need to turn the bit in +_sigpendmask off just before jumping to the handler. + Another idea: don't worry about calling it twice, after all +the interrupt did happen at least twice? + Bad thought: not good to permit interrupts during sigtrigpend +because some of them might want to change things about the signal system, +by calling sigsetmask(), sigvec(), etc! Much safer to just prohibit +any handler calls whatsoever. diff --git a/doc/kcc/codsys.doc b/doc/kcc/codsys.doc new file mode 100755 index 000000000..5eab55d76 --- /dev/null +++ b/doc/kcc/codsys.doc @@ -0,0 +1,59 @@ + CODSYS.DOC - Information on writing USYS routines. + +Writing USYS routines: + + USYS Header Files + + T20 Defs for USYS I/O routines. + T20 Defs for USYS Signal stuff. + T20 Defs for USYS TTY stuff. + + In order to implement signal handling correctly, we have to ensure +that no USYS routine has control wrested away from it, leaving internal +data in an inconsistent state. For this reason certain rules +must be followed: + +[1] At the START of a USYS routine, invoke the macro USYS_BEG(). + This and other USYS_ macros are defined in . + +[2] When RETURNING from a USYS routine, invoke one of the following macros: + USYS_RET(val); Return a local variable or constant value. + USYS_RETVOLATILE(val); Return a volatile (global or function) value. + USYS_RETERR(err); Return -1, setting errno to "err". + USYS_RETINT(); Return -1, setting errno to EINTR. + +Each of the USYS_RETxxx macros serves as a "return" statement, and +each takes care to process any pending signals. If a signal occurred +during the USYS routine, it is deferred until the USYS_RETxxx happens, +and is then handled just before returning to the user's program. The +reason for distinguishing RETVOLATILE from simple RET is because the +former case must save the return value in a temporary local-scope +location while any pending interrupts are processed. Otherwise, by the +time the actual return took place, the return value may have changed from +what it should have been! + +Non-interference: + USYS routines are assumed not to interfere in any way with +non-USYS library functions. Thus, it is very, VERY bad for a USYS +routine to call a C library function which changes static data; the +user program will not be expecting this. Note that signal handlers +must be able to execute system calls, and any normal C library +function could be interrupted. malloc(), in particular, cannot be +used by a USYS routine for this reason! + +JSYS calls: + When making a JSYS call, "fast" and "slow" calls need to be +distinguished. "fast" calls are those which never block or hang, and +for which no asynchronous PSI interrupts are expected to happen; +"slow" calls are those which have the potential to hang for indefinite +amounts of time, and should be considered interruptible. If using a +"slow" call, the JSYS_INTERRUPTABLE flag should be added to the +JSYS-number argument in the call to jsys(). The latter will return -1 +if the call was interrupted, and in this case the USYS routine should +return with USYS_RETINT(). + Things are actually a little more complicated than this, as +certain calls such as read() and write() must check to see whether they +should really be interrupted or if the signal handler permits them to +continue what they were doing. This is done by invoking USYS_END() and +examining its return value to see what happened. Look at the existing +code for read(), write(), and ioctl() for examples. diff --git a/doc/kcc/humble.doc b/doc/kcc/humble.doc new file mode 100755 index 000000000..036a5f5cd --- /dev/null +++ b/doc/kcc/humble.doc @@ -0,0 +1,128 @@ + -*-Text-*- + +HUMBLE - Functions for hacking inferiors. + + + #include + +The header file for using the humble package. + + + int j_create() + +Creates an inferior, returns an FD or -1 if failure + +This FD can be used just like any other file FD. It can even be used to do +I/O, although that is of limited utility. Conversely, a file FD obtained +from open() (by opening "USR:COMSAT IV" for example) will work with the +rest of the functions in the humble package, you don't have to use +j_create(). Although in that case it is kind of hard to control the +details of opening the USR: device. + +Possible errors: + - Already have 8 inferiors. + - This job has no free channels. + - The system has no free job slots. + + + int j_kill(fd) + int fd; + +Kills the specified inferior. Just like close() except it actually +destroys the job. Can only be applied to the last open FD. (Although I +don't see much advantage to dup'ing a job FD, it will work...) + +Possible errors: + - Not a job FD. + - Not the last open FD. + + + int j_read(fd, addr, buf, count) + int fd, addr, *buf, count; + +Copies COUNT words into BUF from the inferior starting at ADDR. Normally +returns 0. If error, it returns the number of words left to be written. + +Possible errors: + - Not a job FD, negative count, negative or huge address. + - Tried to read nonexistent memory. + + + int j_write(fd, addr, buf, count) + int fd, addr, *buf, count; + +Copies COUNT words from BUF into the inferior starting at ADDR. Creating +memory if it doesn't exist. Normally returns 0. If error it returns the +number of words left to be written. + +Possible errors: + - Not a job FD, negative count, negative or huge address. + - System doesn't currently have enough virtual address space. + - Can't write into a foreign job. + + + int j_dump(job_fd, file_fd) + int job_fd, file_fd; + +Writes a PDUMP file for the job on the freshly opened file. +Returns 0 if success and -1 if failure. + +Possible errors: + - Not a job FD. + - File FD isn't open to DSK. + - DSK or directory full, etc. + + + int j_load(job_fd, file_fd) + int job_fd, file_fd; + +Loads a PDUMP or SBLK file into the job from the freshly opened file. +Returns 0 if success and -1 if failure. + +Possible errors: + - Not a job FD. + - File FD isn't open to DSK. + - File is in the wrong format. + + + int j_vread(fd, var, loc) + int fd, var, *loc; + +Read a user variable. Returns 0 if it wins, -1 if failure. +VAR is a user variable specifier. LOC is where to store the result. + +The header file humble.h defines the macro SIXBIT so that user variable +specifiers can be easily constructed. SIXBIT("USTP") returns the integer +that is the sixbit of "USTP". Other specifiers acceptable to .CALL USRVAR +will work as well. + +Possible errors: + - Not a job FD. + - No such user variable. + + + int j_vwrite(fd, var, val) + int fd, var, val; + +Write a user variable. Returns 0 if it wins, -1 if failure. +VAR is a user variable specifier. VAL is the new value. + +Possible errors: + - Not a job FD. + - No such user variable. + - Can't write into this job. + + + int j_atty(fd) + int fd; + +Allow an inferior to make use of the TTY. Returns 0 if it wins, -1 +if failure. +Possible errors: + - Not a job FD. + - Job isn't an inferior. + + + int j_dtty() + +Devour the TTY. Always returns 0. Can't possibly fail! diff --git a/doc/kcc/instal.doc b/doc/kcc/instal.doc new file mode 100755 index 000000000..9a3464416 --- /dev/null +++ b/doc/kcc/instal.doc @@ -0,0 +1,143 @@ + INSTALLING KCC ON A TOPS-20 SYSTEM + + KCC is normally distributed over the Internet via FTP from +SRI-NIC.ARPA, but can also be obtained on tape. This file describes +the basics of installing KCC for each, but you should read everything +regardless of which medium you are using. + +TAPE INSTALLATION: + + The TOPS-20 tape consists of a single DUMPER saveset with +several directories. They are organized into a single tree with the +top-level directory named "KCCDIST" or "KCC-n" or whatever; for +purposes of discussion we'll assume the fictitious name "KCC-n". You +can either retrieve all files at once, or extract only those necessary +to install a working copy of KCC. + +Retrieving all files: + Create a directory, such as KCCDIST, to hold the distribution. +This will contain the following: + + General information files, in particular: + CC.EXE The KCC compiler. + AGREE.TXT Ethical issues. + INSTAL.DOC Installation info; what you are reading now. + NEWS.TXT News about this version. + + C: => Runtime library and include files. + CC.DOC KCC user documentation. + More include files, Un*x oriented. + + FAIL assembler source, binary, and manual. + KCC compiler sources and auxiliary files. + Runtime library sources: + General-purpose library routines. + Standard I/O package routines. + Math library routines. + Unix simulation routines. + A stab at a couple BSD net routines. + Unused "portable math library" routines. + Unused testing routines. + + +Installing a working KCC: + + To install a working copy of KCC, you should create a +directory to hold the runtime library and include files, and define +the logical name C: to point to it. This may be the same directory as +the one you restored the tape to (i.e. ), or it may be +a different place. This directory should also have a subdirectory +called SYS to hold certain other include files. + Read the NEWS.TXT file for any special news about this version +of the distribution. Then define C:, copy CC.EXE to your site's SYS: +directory, and you're all set. + To summarize, these are the files you need: + + CC.EXE + C: => *.*.0 Runtime library and include files. + *.*.0 More include files, Un*x oriented. + + Be sure to read the General Notes farther on in this file. + +FTP transfer notes: + + If you are fortunate enough to have Internet access, I +recommend that you use FTP (File Transfer Protocol) to get copies of +any KCC files that you need. + + Connect to SRI-NIC.ARPA, and use the FTP anonymous login +convention (username "anonymous", password your real name) to log in. +The complete distribution is available in the directory tree defined +by the logical name KCCDIST:, which is organized exactly as described +in the previous (tape installation) section. You can retrieve any of +the files or directories mentioned as needed. + NOTE: the exact directory name which KCCDIST: points to will +vary! It may be , or , or something else, so be +prepared to modify the given filenames appropriately. + +BETA-TEST Versions: + + For those interested in serving as "beta test" sites and +living dangerously, it is possible to acquire the very latest binaries +by transferring the following files: + SYS:CC.EXE Latest KCC. + C:*.*.0 Latest runtime library and include files. + C:*.*.0 Latest Un*x-emulation include files. + +Since KCC and the library are still evolving steadily, you will almost +certainly get something newer and better. Just be aware that you also +have the following risks: + (1) The binaries may contain new bugs. + (2) The new versions may be incompatible (this will cause linker + error messages to alert you, however). + (3) The new sources will not be available until things stabilize + enough for a new distribution to be made. + +General Notes: + + It is wise to retain the file version numbers during +installation, since that is how KCC versions are numbered; a higher +file version number means a more recent version of KCC. + + C:CC.DOC contains user documentation. More internal details +may be available at various places in the directory tree, in files +named *.DOC. + + KCC as distributed will invoke the FAIL assembler by +default, rather than MACRO, because FAIL is much faster. If you do +not have FAIL, you may wish to install it also. The .EXE binary, +.FAI source, and line-printer style manual (FAIL.MANUAL) are included. +If you don't want to try this, or for other reasons prefer to use +MACRO, then you will want to either recompile KCC with the CCSITE.H +file changed to reflect your preferences, or you can patch the "tgasm" +variable in CC.EXE to have the value 1. + + +MONITOR/EXEC Modifications: + + There are certain modifications to the TOPS-20 Monitor and Exec +which KCC can make use of, if they are available. They are: + (1) Monitor: the PIP: device, to support pipes. + (2) Exec: PRARG% protocol to support "&" background processing. + (3) Exec: COMPIL-class command support. + +Many sites already have these modifications, but if you don't, you +should be able to obtain them from their origin at Stanford; contact +Stu Grossman . Note that they are not +necessary, either to use KCC or run the C programs it compiles. + +The COMPIL-class command change is simple enough to be described here: + + The EXEC modifications to make its COMPILE/LOAD/etc commands + work with KCC can be found in the Stanford version of the EXEC + in the EXECCS module: + + In the LANGUAGE macro definition, add: + L (DDT,C,CC,KCC) ;KCC + Immediately after the instruction at BSRC1, add: + CAIN P4,LT.C + RET + Immediately after the instruction at PUTDF, add: + CAIN P4,LT.C + SKIPA ; If KCC, always use native mode. + diff --git a/doc/kcc/itskcc.mail b/doc/kcc/itskcc.mail new file mode 100755 index 000000000..8232b4d20 --- /dev/null +++ b/doc/kcc/itskcc.mail @@ -0,0 +1,1230 @@ +Received: from REAGAN.AI.MIT.EDU (CHAOS 13065) by AI.AI.MIT.EDU 12 Aug 88 14:45:34 EDT +Received: from MARLEY.AI.MIT.EDU by REAGAN.AI.MIT.EDU via CHAOS with CHAOS-MAIL id 129231; Fri 12-Aug-88 14:40:28 EDT +Date: Fri, 12 Aug 88 14:40 EDT +From: Alan Bawden +Subject: ITS changes merged +To: KLH@SRI-NIC.ARPA +cc: its-kcc@AI.AI.MIT.EDU +In-Reply-To: <12421695068.58.KLH@SRI-NIC.ARPA> +Message-ID: <19880812184027.3.ALAN@MARLEY.AI.MIT.EDU> + +Just some quick answers to a subset of your comments: + + Date: Thu, 11 Aug 88 17:08:57 PDT + From: Ken Harrenstien + OK, I have merged in your stuff. I'll set up another distrib directory + or something soon so you can get back the new things. Here is a list of + the files, followed by some comments on your notes.txt: + + ? C-HITS.FAI.2;P775202 1 343(7) 25-Mar-88 12:29:03 ALAN + ? .MAC.2;P775202 1 343(7) 25-Mar-88 12:29:03 ALAN + We should be able to make KCC generate the right headers + instead of using these. I don't think using XC: in the + .REQUEST is necessary, and everything else should be OK. + You know about the -L switch? Basically C: is used for + 3 things, all of which can be set with the -I,-H, and -L switches. + +I forget now why I found it easier to generate my own header files +manually, I recall fighting with what KCC was generating for an afternoon +and just giving up. Certainly we should fix KCC to generate proper +headers and flush these. The XC: in the .REQUEST is just insurance that I +don't ever accidentally link in 20X rel files from C:. I'd be happy to do +this some other way if some 20X wizzard wants to straightem me out on the +proper combination of logical name and linking loader hackery. I just +found something that worked. + + ? ITSSYM.FUN.1;P775202 11 5361(36) 1-Apr-88 06:38:32 ALAN + ? .UNV.1;P775202 4 1793(36) 1-Apr-88 06:38:05 ALAN + This is an odd place to have these files. Probably they should + be copied to UNV:. + +Probably, but I don't have the access to put them there on XX. + + For distribution purposes, they could be kept in + the <.FAIL> directory. #asm code that needs the syms can explicitly + get them with a SEARCH ITSSYM, the same way T20/10X code does + SEARCH MONSYM -- this shouldn't be in the header. + +I suppose. The thing is that if this ever runs on ITS there won't need to +be a SEARCH anything because FAIL just gets the symbols from ITS. Thats +why I put the SEARCH ITSSYM in the common header, to simulate how it would +work on ITS. It wouldn't be hard to arrange for SEARCH ITSSYM to be a +no-op on ITS, and insert them explicitly where 20X FAIL needs them. + + - XCC.PCL.11;P775202 1 456(7) 18-Jul-88 18:57:18 ALAN + Do you want to keep this? I could move it into the compiler + directory (already have a cross-compiler example there for 10X). + +It's just for my convenience. You can ignore it. + + - UIO.H.1007;P775202 2 3199(7) 29-Apr-88 20:34:35 ALAN + This file (uio.h) should be flushed completely. Nothing uses it. + +Well, the man page for read() on a Unix I just checked told me to include +, so aren't there compatible programs that are likely to look +for this? + + M5 SYSCAL.C.1004;P775202 1 2343(7) 23-Apr-88 12:01:07 ALAN + Some questions here: + Should ato6() and afrom6() be here? I'm not sure where + to put them; if we want to make them universally available + to all PDP-10 configs then they need a header file. + Should sysits.h be syscal.h and itssym.h? + I edited afrom6() to avoid using an ADJBP. + +I don't know of anyplace that uses ato6 and afrom6, so I would suggest that +we just flush them. I don't see any compelling reason to split this file +since I can't imagine wanting either half alone. + + PS: + ? HUMBLE.C.36;P775202 5 10719(7) 11-May-88 23:17:38 ALAN + - .REL.2;P775202 4 1601(36) 22-Jul-88 12:56:41 ALAN + This should be a separate package, not part of the C library. + It can be put into a <.ITS> subdir of the distrib. + Invent a library name for it, such as LIBINF or LIBKID. + +Yes. I just put it in the library for convenience while I was debugging +it. + + Some comments about NOTES.TXT: + + LSEEK: On block mode output (_UIO_HANDPACK is set) you can only position + from one word boundary to another. Doesn't know how to do L_XTND. + (Usual EOF screw.) ITS won't let you do any operations if you + position yourself beyond the end of the file anyway. + [KLH: hmm, should be able to position at byte boundaries. What you really + need to be able to do is based on STDIO -- binary streams should be able + to fseek to arbitrary places within a file (ignore beyond EOF), text + streams should be able to fseek to anyplace that they ftell'd from, plus + 0 and EOF. This can be fixed up, right?] + +I'm only talking about -binary- -output- here. Input streams work just +fine, as do 7-bit output streams. Positioning to an arbitrary byte in an +binary output stream requires you to read in the part of the 36-bit word +to the left of where you are going to write. ITS does not provide a +reliable way to do this. + + Should support append mode. (How? No reasonable way to find the + end of a file...) Also overwrite? (Can't truncate!) + [KLH: don't worry about truncation, that's rarely used. append is + a lot more common, tho. Time to play with ITS? I thought the hack + I put into COMSAT for "FAST-APPEND" was reasonable.] + +Yeah, I agree that append is important. I could just believe what ITS +tells me is the length of the file, and we could live with the fact that +spurious ^C's would appear when you append to a file that was written by +Emacs, or restored from tape. + +I'll look at the COMSAT hack. The only thing I can imagine it doing is +page mapping in the last page of the file (which only works because you +know that the file has actually been written to disk because you just +opened it) so that it can get a look at the last word to count the ^C's. I +thought of doing that initially, but decided it was complicated enough that +I had more important things to do first. + +Appending to a binary file can never work without a lot more work. + +[ Before everybody starts jumping on me about this issue again. (You all + always do when this comes up.) Let me point out for the 100th time that + this is not a problem with ITS itself. It cannot just be -fixed- by + modifying ITS. The problem is caused by the zillion user programs that + "know" that they can write an ASCII file by writing a bunch of 36-bit + words with a bunch of ^C's for padding. I am not going to spend the rest + of my life tracking those programs all down and fixing them. I apologize + to everybody for these facts. I know that it sucks. ] + + EXIT: The reasons for waiting for a subfork are different on ITS? + [KLH: Huh?] + +Just a note to myself that the 20X code for EXIT was going to be very +different from the ITS code. + + CRT: Initialization of P is wrong everywhere but ITS. + [KLH: Huh?] + +P is initialized to contain -,, +everywhere except on ITS where I fixed it to be +-,,-1 so that you don't try to +write in the first word beyond the stack before getting the stack overflow +interrupt. (On ITS I wanted to get at %PIPDL instead of a %PIMPV.) + +Received: from SRI-NIC.ARPA (TCP 1200000063) by AI.AI.MIT.EDU 11 Aug 88 20:14:14 EDT +Date: Thu, 11 Aug 88 17:08:57 PDT +From: Ken Harrenstien +Subject: ITS changes merged +To: alan@ai.ai.mit.edu +cc: its-kcc@ai.ai.mit.edu +Message-ID: <12421695068.58.KLH@SRI-NIC.ARPA> + +OK, I have merged in your stuff. I'll set up another distrib directory +or something soon so you can get back the new things. Here is a list of +the files, followed by some comments on your notes.txt: + + +Merge List of ITS C files: + + - - ignored, not made part of distrib. + = - Same, no merging needed. + Mnn - Merged, new version is nn. Must get it. + ! - New KCC version exists. Must get it. + + PS: +- 5.TAGS.2;P777700 5 11807(7) 23-Apr-88 17:12:32 ALAN +- INCLUDE.DIRECTORY.1;P20200 0 0(0) 5-Aug-88 18:38:26 KLH +- LIB.DIRECTORY.1;P20200 0 0(0) 5-Aug-88 18:39:18 KLH +? NOTES.TXT.42;P777700 2 4348(7) 22-Jul-88 13:24:04 ALAN + I'd like to retain this in the distrib but I'm not sure where to + put it. How about <.LIB>ITSNOT.DOC? Another name is fine, but + I do think <.LIB> is the right place, and the .DOC extension will + ensure that it is automatically snarfed into the distrib. +- SRCCOM.TECO.1;P777700 1 239(7) 18-Jul-88 19:11:20 ALAN +- TAGS.TECO.1;P777700 1 470(7) 2-Apr-88 23:03:28 ALAN + + Total of 11 pages in 8 files + + PS: +M20 C-ENV.H.1002;P775202 3 5451(7) 23-Apr-88 12:21:57 ALAN +? C-HITS.FAI.2;P775202 1 343(7) 25-Mar-88 12:29:03 ALAN +? .MAC.2;P775202 1 343(7) 25-Mar-88 12:29:03 ALAN + We should be able to make KCC generate the right headers + instead of using these. I don't think using XC: in the + .REQUEST is necessary, and everything else should be OK. + You know about the -L switch? Basically C: is used for + 3 things, all of which can be set with the -I,-H, and -L switches. +M6 FCNTL.H.1005;P775202 1 1678(7) 18-Jul-88 18:20:55 ALAN +? ITSSYM.FUN.1;P775202 11 5361(36) 1-Apr-88 06:38:32 ALAN +? .UNV.1;P775202 4 1793(36) 1-Apr-88 06:38:05 ALAN + This is an odd place to have these files. Probably they should + be copied to UNV:. For distribution purposes, they could be kept in + the <.FAIL> directory. #asm code that needs the syms can explicitly + get them with a SEARCH ITSSYM, the same way T20/10X code does + SEARCH MONSYM -- this shouldn't be in the header. +- LIBC.REL.1048;P775202 47 24060(36) 22-Jul-88 13:04:29 ALAN +M31 STDIO.H.1005;P775202 3 6470(7) 23-Apr-88 12:01:23 ALAN +- SYS.DIRECTORY.1;P20200 0 0(0) 5-Aug-88 18:39:01 KLH +M6 SYSITS.H.1018;P775202 4 8124(7) 11-May-88 15:30:54 ALAN +- XCC.PCL.11;P775202 1 456(7) 18-Jul-88 18:57:18 ALAN + Do you want to keep this? I could move it into the compiler + directory (already have a cross-compiler example there for 10X). + + Total of 76 pages in 11 files + + PS: +M15 FILE.H.1004;P775202 1 2026(7) 23-Apr-88 12:28:57 ALAN +? HUMBLE.H.1004;P775202 1 279(7) 11-May-88 22:13:17 ALAN + See comments about humble.c +- UIO.H.1007;P775202 2 3199(7) 29-Apr-88 20:34:35 ALAN + This file (uio.h) should be flushed completely. Nothing uses it. +M44 USYSIO.H.1007;P775202 2 3199(7) 29-Apr-88 20:34:35 ALAN + + Total of 6 pages in 4 files + + PS: += ABORT.C.9;P775202 1 250(7) 6-Sep-86 13:01:53 ALAN +- .REL.1;P775202 1 66(36) 22-Jul-88 12:32:21 ALAN += CPU.C.10;P775202 2 4393(7) 27-Feb-87 11:12:46 ALAN +- .REL.1;P775202 1 179(36) 22-Jul-88 12:30:50 ALAN +M65 CRT.C.1020;P775202 13 31956(7) 21-Jul-88 19:54:33 ALAN +- .REL.1;P775202 2 1010(36) 22-Jul-88 12:31:08 ALAN += CTYPE.C.3;P775202 4 8462(7) 25-Sep-86 18:27:34 ALAN +- .REL.1;P775202 1 333(36) 22-Jul-88 12:31:19 ALAN +- ITS.DIRECTORY.1;P20200 0 0(0) 5-Aug-88 18:39:27 KLH +? ITSSYM.FAI.1604;P775202 6 13768(7) 1-Apr-88 06:36:40 ALAN + Move to <.FAIL> directory? I don't want any .FAI or .MAC files + in the C source dirs, because I often do DELETE *.FAI to flush + compilation leftovers. +- LIBC.COM.17;P775202 1 405(7) 22-Jul-88 13:03:11 ALAN +- .MAK.1026;P775202 1 561(7) 22-Jul-88 13:03:31 ALAN +- .TAGS.8;P775202 2 3247(7) 14-Apr-88 17:26:09 ALAN + Eventually these idiosyncratic files should be flushed. That is, + all files get compiled even if some of them are unsuitable for + the target system -- they just assemble into null modules. + Much easier to update a list within just one file rather than in + N different files (N = # systems supported). +M192 MALLOC.C.1001;P775202 8 18032(7) 23-Apr-88 12:12:04 ALAN +- .REL.1;P775202 3 1415(36) 22-Jul-88 12:32:15 ALAN +- MATH.DIRECTORY.1;P20200 0 0(0) 5-Aug-88 18:40:46 KLH +! MEMSTR.C.9;P775202 10 23884(7) 11-Sep-87 00:33:46 ALAN +- .REL.1;P775202 2 991(36) 22-Jul-88 12:31:34 ALAN += ONEXIT.C.2;P775202 1 760(7) 7-Apr-87 15:30:56 ALAN +- .REL.1;P775202 1 128(36) 22-Jul-88 12:31:43 ALAN +- STDIO.DIRECTORY.1;P20200 0 0(0) 5-Aug-88 18:40:55 KLH += STRERR.C.4;P775202 3 5570(7) 16-Mar-88 14:48:11 ALAN +- .REL.1;P775202 3 1458(36) 22-Jul-88 12:32:31 ALAN += STRING.C.39;P775202 4 8365(7) 21-Mar-88 13:05:28 ALAN +- .REL.1;P775202 2 740(36) 22-Jul-88 12:31:53 ALAN +M5 SYSCAL.C.1004;P775202 1 2343(7) 23-Apr-88 12:01:07 ALAN + Some questions here: + Should ato6() and afrom6() be here? I'm not sure where + to put them; if we want to make them universally available + to all PDP-10 configs then they need a header file. + Should sysits.h be syscal.h and itssym.h? + I edited afrom6() to avoid using an ADJBP. +- .REL.1;P775202 1 146(36) 22-Jul-88 12:32:02 ALAN +- USYS.DIRECTORY.1;P20200 0 0(0) 5-Aug-88 18:41:02 KLH + + Total of 74 pages in 28 files + + PS: +? HUMBLE.C.36;P775202 5 10719(7) 11-May-88 23:17:38 ALAN +- .REL.2;P775202 4 1601(36) 22-Jul-88 12:56:41 ALAN + This should be a separate package, not part of the C library. + It can be put into a <.ITS> subdir of the distrib. + Invent a library name for it, such as LIBINF or LIBKID. + + Total of 9 pages in 2 files + + PS: += FLOOR.C.8;P775202 1 421(7) 12-Apr-88 18:03:59 ALAN +- .REL.1;P775202 1 95(36) 22-Jul-88 13:03:01 ALAN += MODF.C.87;P775202 2 4041(7) 12-Apr-88 18:27:27 ALAN +- .REL.2;P775202 1 104(36) 22-Jul-88 13:02:56 ALAN + + Total of 5 pages in 4 files + + PS: += CLEANU.C.4;P775202 1 477(7) 16-Sep-86 13:56:56 ALAN +- .REL.1;P775202 1 127(36) 22-Jul-88 12:52:33 ALAN += FCLOSE.C.22;P775202 1 475(7) 17-Sep-86 18:04:03 ALAN +- .REL.1;P775202 1 127(36) 22-Jul-88 12:52:21 ALAN +! FFLUSH.C.35;P775202 1 2125(7) 12-Apr-88 14:56:46 ALAN +- .REL.1;P775202 1 307(36) 22-Jul-88 12:51:06 ALAN += FGETC.C.18;P775202 2 3582(7) 29-Feb-88 21:55:11 ALAN +- .REL.1;P775202 1 385(36) 22-Jul-88 12:51:15 ALAN += FOPEN.C.27;P775202 2 3218(7) 11-Sep-87 17:19:28 ALAN +- .REL.1;P775202 1 302(36) 22-Jul-88 12:50:34 ALAN += FPUTC.C.38;P775202 1 1764(7) 12-Apr-88 18:30:23 ALAN +- .REL.1;P775202 1 304(36) 22-Jul-88 12:51:25 ALAN += FPUTS.C.14;P775202 1 573(7) 12-Apr-88 18:31:18 ALAN +- .REL.1;P775202 1 174(36) 22-Jul-88 12:51:33 ALAN +M52 FREOPE.C.1001;P775202 3 5145(7) 23-Apr-88 12:11:18 ALAN +- .REL.1;P775202 2 609(36) 22-Jul-88 12:50:41 ALAN += FSEEK.C.25;P775202 2 2720(7) 27-Oct-87 15:20:07 ALAN +- .REL.1;P775202 1 230(36) 22-Jul-88 12:52:15 ALAN +! FTELL.C.22;P775202 1 2418(7) 27-Oct-87 12:08:20 ALAN +- .REL.1;P775202 1 186(36) 22-Jul-88 12:52:09 ALAN += PRINTF.C.27;P775202 10 23373(7) 12-Apr-88 18:32:48 ALAN +- .REL.1;P775202 6 2914(36) 22-Jul-88 12:52:00 ALAN +! SCANF.C.187;P775202 6 13304(7) 30-Mar-87 14:55:04 ALAN +- .REL.1;P775202 4 1882(36) 22-Jul-88 12:51:45 ALAN += SETBUF.C.30;P775202 2 2965(7) 27-Oct-87 13:46:37 ALAN +- .REL.1;P775202 1 305(36) 22-Jul-88 12:50:48 ALAN +! SOPEN.C.16;P775202 1 1202(7) 30-Sep-86 15:26:35 ALAN +- .REL.1;P775202 1 183(36) 22-Jul-88 12:52:27 ALAN += UNGETC.C.22;P775202 1 946(7) 26-Mar-88 13:55:09 ALAN +- .REL.1;P775202 1 130(36) 22-Jul-88 12:50:56 ALAN + + Total of 59 pages in 30 files + + PS: +M60 CLOSE.C.1010;P775202 1 2176(7) 8-May-88 00:12:29 ALAN +- .REL.1;P775202 1 350(36) 22-Jul-88 12:35:24 ALAN += DUP.C.19;P775202 1 957(7) 5-Sep-87 18:56:01 ALAN +- .REL.1;P775202 1 186(36) 22-Jul-88 12:34:22 ALAN += EXIT.C.2;P775202 1 1550(7) 24-Aug-87 15:39:31 ALAN +- .REL.1;P775202 1 172(36) 22-Jul-88 12:33:15 ALAN += FCNTL.C.13;P775202 1 754(7) 5-Sep-87 18:56:06 ALAN +- .REL.1;P775202 1 175(36) 22-Jul-88 12:34:32 ALAN +M33 LSEEK.C.1007;P775202 2 3125(7) 29-Apr-88 21:19:25 ALAN +- .REL.1;P775202 1 370(36) 22-Jul-88 12:34:44 ALAN +M225 OPEN.C.1029;P775202 12 30714(7) 21-Jul-88 21:41:46 ALAN +- .REL.1;P775202 2 822(36) 22-Jul-88 12:34:12 ALAN +M101 READ.C.1041;P775202 10 25544(7) 2-May-88 13:42:44 ALAN +- .REL.1;P775202 3 1366(36) 22-Jul-88 12:34:58 ALAN +M30 SBRK.C.1002;P775202 3 6715(7) 10-May-88 13:18:18 ALAN +- .REL.1;P775202 1 309(36) 22-Jul-88 12:33:28 ALAN +M20 SIGDAT.C.1005;P775202 2 4100(7) 23-Apr-88 12:14:23 ALAN +- .REL.1;P775202 1 151(36) 22-Jul-88 12:33:37 ALAN +M145 STAT.C.1004;P775202 4 9518(7) 22-Jul-88 08:46:19 ALAN +- .REL.1;P775202 1 474(36) 22-Jul-88 12:33:57 ALAN +M18 UIODAT.C.1008;P775202 2 4834(7) 29-Apr-88 20:34:38 ALAN +- .REL.1;P775202 1 277(36) 22-Jul-88 12:33:45 ALAN +M120 URT.C.1019;P775202 15 36552(7) 22-Jul-88 12:10:28 ALAN +- .REL.1;P775202 5 2152(36) 22-Jul-88 12:33:00 ALAN += URTSUD.C.6;P775202 1 223(7) 18-Aug-87 02:36:47 ALAN +- .REL.1;P775202 1 64(36) 22-Jul-88 12:33:06 ALAN +M33 WAIT.C.1004;P775202 2 4797(7) 23-Apr-88 12:21:56 ALAN +- .REL.1;P775202 1 118(36) 22-Jul-88 12:35:31 ALAN +M65 WRITE.C.1020;P775202 6 13157(7) 22-Jul-88 09:00:20 ALAN +- .REL.1;P775202 2 887(36) 22-Jul-88 12:35:12 ALAN + + Total of 86 pages in 30 files + + Grand total of 326 pages in 117 files + +Some comments about NOTES.TXT: + + Is uio.h old hat? But isn't this the file that you include to + declare open()? +[KLH: Yes, flush uio.h. Nothing should still use it. Did you find something +that does?] + There are a lot of other little details to be done, and no obvious + organized way to go about it. + +[KLH: Well, one method is to pick a useful program or two you want to +port to ITS; it must be useful enough to stimulate you into converting +the USYS calls needed to support that program. More rewarding than just +trying to go down through them alphabetically and not really knowing whether +something NEEDS to be simulated. KCC itself is a good candidate, except +that you'll need an ITS linker.] + + ERJMP definitions are output even for ITS. +[KLH: OK, fixed.] + ADJBP macro output for KA is incorrect. +[KLH: if you meant AC and MEM dummy args, this was already fixed.] + +LSEEK: On block mode output (_UIO_HANDPACK is set) you can only position + from one word boundary to another. Doesn't know how to do L_XTND. + (Usual EOF screw.) ITS won't let you do any operations if you + position yourself beyond the end of the file anyway. + +[KLH: hmm, should be able to position at byte boundaries. What you really +need to be able to do is based on STDIO -- binary streams should be able +to fseek to arbitrary places within a file (ignore beyond EOF), text +streams should be able to fseek to anyplace that they ftell'd from, plus +0 and EOF. This can be fixed up, right?] + + Should support append mode. (How? No reasonable way to find the + end of a file...) Also overwrite? (Can't truncate!) +[KLH: don't worry about truncation, that's rarely used. append is +a lot more common, tho. Time to play with ITS? I thought the hack +I put into COMSAT for "FAST-APPEND" was reasonable.] + + +MALLOC: Removed test for extended addressing and call to EXTEND [XBLT] on + ITS (mostly) because FAIL didn't have XBLT predefined. +[KLH: KCC includes a definition of XBLT in the header it generates. You +may not have noticed this if you were already using your own header.] + + There is a bug in _setup() that touches nonexistent memory. I did + not fix it, because the temptation to re-write the entire MALLOC + module would be too great. Instead I broke brk() to always keep at + least 4 extra words existent so that I wouldn't have to think about + problems like this unless they were really bad ones. +[KLH: Guess we'll look at _setup().] + + Shouldn't _ERRNO_T20_LAST be ECLOSE? +[KLH: yeah, fixed.] + + What do we do about ITS error codes? Add 400000? +[KLH: that would be OK. There isn't any good way to combine Unix error +numbers with OS errors. Having strerror recognize OS numbers is an +extension, but the actual strerror call in this case has to be conditionalized. +An argument of -1 (another extension) gets the last OS error; this lets me +define LASTERROR to be either "errno" (on a real Unix) or "-1" (for KCC)] + +STDIO.H: On ITS SYS_OPEN is 16. Should perhaps set L_cuserid to 6? + +[KLH: Seems reasonable.] + +EXIT: The reasons for waiting for a subfork are different on ITS? + +[KLH: Huh?] + +CRT: Initialization of P is wrong everywhere but ITS. +[KLH: Huh?] + +CPU: FAIL can't assemble this because $BADL6, $BADL7, $BADL8, $BADL9, + $BADLH are EXTERNals after ==. + +[KLH: Yeah. Use MACRO for that. We use MACRO for most of the library, +just to avoid FAIL's meaningless barfing when SEARCH is used.] + +------- + +Received: from SRI-NIC.ARPA (TCP 1200000063) by AI.AI.MIT.EDU 22 Jul 88 20:22:11 EDT +Date: Fri, 22 Jul 88 17:19:59 PDT +From: Ken Harrenstien +Subject: Archaelogical discovery - STINK format +To: its-kcc@AI.AI.MIT.EDU +cc: klh@SRI-NIC.ARPA +Message-ID: <12416454197.17.KLH@SRI-NIC.ARPA> + +Buried in some ancient paper piles of mine I just found an old DM/CG +memo, SYS.03.02, dated 27-Mar-72 and titled: "STINKING-DYNAL +Relocatable Format" by Pitts Jarvis and Chris Reeve. + +It seems to document all of the funny bits, block types, and stuff. +Anyone who still cares about it could probably find a copy in the +DM archives, I guess. +------- + +Received: from SRI-NIC.ARPA (TCP 1200000063) by AI.AI.MIT.EDU 21 Apr 88 04:58:49 EDT +Date: Thu, 21 Apr 88 01:51:09 PDT +From: Ken Harrenstien +Subject: Re: New sources +To: ALAN@AI.AI.MIT.EDU +cc: ITS-KCC@AI.AI.MIT.EDU, KLH@SRI-NIC.ARPA +In-Reply-To: <362928.880420.ALAN@AI.AI.MIT.EDU> +Message-ID: <12392167861.28.KLH@SRI-NIC.ARPA> + +Sounds like you have a good handle on the pathname stuff. Let me see +if I can shed any light on read() and friends... + +I guess I thought SIOT was able to handle n-bit bytes. Evidently, you +only have a choice of 7- or 36-bit bytes from the system, although SIOT +will accept any byte pointer whether or not it is big enough. (Slowly +it all comes back to me...) + +Let's see. The most common case is converted 7-bit I/O (that is, text +streams), so you want to make that efficient. You can just use SIOT +for that and use existing CRLF->LF conversion code. For devices that +take 8-bit bytes you can do the same thing. For disk bytesizes of 8 and 9 +the simplest method seems to be a simulated SIOT level which does its +actual I/O using word buffers. read() and write() were originally intended +to always contain buffers of some size, but it turned out to be simpler +and safer to do away with buffers whenever possible. After all, on Unix +read() and write() ARE system calls, so it's reasonable for each invocation +to cause an actual system call. If you aren't going to allow simultaneous +read/write (even simulated) then the buffering isn't quite so hairy. + +I don't think you need a special case for ^C. If I/O is being converted +then flush the ^Cs since you are dealing with a text stream. If not converted, +then don't. Doesn't ITS have a FILLEN return value that gives the file length +in 7-bit bytes? If it exists, use that; if not, whether trailing ^Cs are +flushed depends on the conversion flag. + +Too bad about the lseek() problem. I thought there was a mode that +could be used for simultaneous read/write, though; COMSAT used that +for FAST-APPEND. Or perhaps I'm wrong and I just dodged the issue too by +writing a 5-char word of blanks. You too can dodge the issue by initially +only allowing lseek() to win for seeks to the right boundaries, and failing +otherwise. + +What about page mapping? You could map in a file page and fiddle with +it as you please, and when done update the length info manually. +Might be overkill, though; not many programs do this, most just read or +just write. + +That comment of Ian's about UIO buffer size is obsolete. I changed +the way ftell() worked so it no longer depended on that sort of crock. +Guess I missed that one. + +------- + +Date: Wed, 20 Apr 88 20:50:18 EDT +From: Alan Bawden +Subject: New sources +To: KLH@SRI-NIC.ARPA +cc: ITS-KCC@AI.AI.MIT.EDU +In-reply-to: Msg of Wed 20 Apr 88 08:00:27 PDT from Ken Harrenstien +Message-ID: <362928.880420.ALAN@AI.AI.MIT.EDU> + + Date: Wed, 20 Apr 88 08:00:27 PDT + From: Ken Harrenstien + Well, you should have seen the message now about KCC-5. The bad news + is that there a number of changes in the source, particularly in CRT.C + and URT.C. The good news is that I have no plans to fiddle with + anything for at least the next couple of months, except for fixing + bugs. So now is an excellent time to integrate any ITS additions. + +OK, I'll coordinate with SRA who will presumably pull a set of sources on +to XX soon. The changes I made to CRT and URT are still fresh enough in my +mind that I don't anticipate any pain doing the merge. + + I'm wondering what, if anything, you have decided to do about some of + the thornier issues. For example, pathnames -- so many routines expect + that a filename is a string of nonblank characters.... + +There are two kludges for pathnames so far. First, there is a hack in +open() that treats unquoted "/" as if it were ";", and that treats unquoted +"." the same as space when it appears -embedded- in a name component. Thus +".mail.;names.259" is converted to ".mail.;names 259" before it is passed +to SOPEN, but nothing changes in ".mail.;.file. (dir)". (Although you do +have to write ".mail.;.file..(dir)".) + +Second, there is a kludge in the ITS long filename parser that concatenates +multiple directory names together with "."s when parsing them into sixbit. +Thus ITS will treat "dsk:kcc;sys;foo h" the same as "DSK:KCC.SY;FOO H" (as +will all devices that only support sixbit filenames), while long filename +devices will still see a hierarchical directory name. + +Together, these kludges should make most of the common assumptions about +pathnames work. For example, KCC itself can deal with #include +"sys/file.h" by passing "DSK:KCC;sys/file.h" to open(), which will convert +this to "DSK:KCC;sys;file h" and pass it to SOPEN, which will parse it into +sixbit as "DSK:KCC.SY;FILE H". + +What's giving me trouble now is read(). The existing code relies on the +operating system to deliver a stream of bytes of the correct size (7, 8 or +9 bits), but on ITS 8 and 9 bit bytes can only be obtained by manually +unpacking 36 bit words (ignoring for the moment those devices, like network +connections, that only support byte streams). This introduces an +additional dimension into the space of different cases that read() already +deals with. I haven't figured out yet just how to control this complexity +while still keeping the common cases reasonable efficient. + +There is also the ^C problem. Somebody has to protect higher level +routines from the ugly little fact that ITS pads out ascii files with ^C's +in the last word. I am undecided about just where this padding should be +stripped off. It -might- be that the right thing is to make it part of the +CRLF => LF conversion hack, but I wonder if there might be programs that +ask for unconverted input that would be suprised by those ^Cs? Perhaps it +should just be an orthogonal conversion that defaults on for 7 bit bytes? +(Yet another case!) + +This manual byte unpacking also introduces additional complexity into +lseek(). For output, there is no reasonable way to position at other than +a multiple of four 8 or 9 bit bytes, since you can really on position at +word boundaries (and you can't have the file open for both reading and +writing so you can't compensate in the obvious way). For input, you can do +it by cooperating with read() a little. + +Incidentally, can someone enlighten me about what is going on here? From +USYSIO.H: + +/* + * The UIO buffer must be at LEAST twice the size of the higher-level + * STDIO buffer, for the ftell() crockery to work. See bufsiz() and + * ftell() for the gory details. + */ + +#define UIO_BUFSIZ (BUFSIZ*2) /* lowest-level buffer size */ + +I can't find a routine named bufsiz() anywhere, and the positional hackery +in ftell() (using either the "old" method or the "new" method) doesn't seem +to depend on the size of the low-level uio buffer although I didn't read it +carefully enough to be 100% sure of that. So what goes? At the very least +I claim this needs to have a better comment explaining whatever the gory +details are so that people like me will be able to figure out if they are +in any danger of violating important constraints. + +Date: Mon, 21 Mar 88 21:19:45 EST +From: Alan Bawden +Subject: Until we have a linker... +To: ITS-KCC@AI.AI.MIT.EDU +Message-ID: <345449.880321.ALAN@AI.AI.MIT.EDU> + +In the minor hack department: + + :KCC;GETSYM + +Sucks the ITS official symbol definitions out of the running system (just +as MIDAS and FAIL do) and writes a file of definitions suitable for +insertion (by FAIL or MACRO). It writes a file named ITSSYM on your +directory. Probably the results of doing this should be available in some +standard place on 20X. (If anyone thinks it is worth it, I can modify it +to produce a file suitable for generating .UNV and .FUN files instead. +Is it worth it?) + +In the major kludge department: + + :KCC;EXECVT , + +Converts a 20X SSAVE file into an ITS PDUMP file. (Including converting the +symbol table. Yuck!) The defaults to DSK:;FOO EXE, and +the defaults to DSK:; BIN, so doing: + + :KCC;EXECVT XX:ZAP + +converts ZAP.EXE from your directory on XX and writes the results into +ZAP BIN on your ITS directory. + +Using these two, I have successfully assembled a program on 20X, linked it +using LINK, and then run the results on ITS. + +Received: from SRI-NIC.ARPA (TCP 1200000063) by AI.AI.MIT.EDU 27 Feb 88 17:40:12 EST +Date: Sat, 27 Feb 88 14:36:49 PST +From: Ken Harrenstien +Subject: Re: KCC +To: its-kcc@AI.AI.MIT.EDU +cc: jtw@AI.AI.MIT.EDU, Alan@AI.AI.MIT.EDU, Maeda@AI.AI.MIT.EDU, + SRA@XX.LCS.MIT.EDU, KLH@SRI-NIC.ARPA +In-Reply-To: <880226174937.2.ALAN@PIGPEN.AI.MIT.EDU> +Message-ID: <12378162394.17.KLH@SRI-NIC.ARPA> + +Well, I have the interest, but I dunno about the time. If you have +some people to work on it, the best thing to tackle first is an ITS +DECREL linker. That's all you need to begin with. + +I'll go over the arguments again. Here's how it would work in action: + User invokes KCC. + KCC generates FAIL code, invokes FAIL and ITS linker. + FAIL generates DECREL. + ITS linker gobbles DECREL, produces BIN. + (Of course, the link step is omitted for -c compiles.) + +My reasons for staying with an assembler step: + I don't have much time at all for KCC hacking. Producing REL + files directly will be moderately painful. + Lots of library code has asm() statements in it. KCC would have + to parse those strings like an assembler would. More work. + It is much easier to debug things with the intermediate assembler + output available. + +KCC can readily generate code for any of MACRO, MIDAS, or FAIL. But the +reasons for using FAIL on ITS are: + MIDAS has a problem with symbol conflicts between user labels and + opcodes, pseudos, etc. + MACRO on ITS might be a problem. If you can get permission from DEC + to use MACRO sources, fine. But I doubt it. + FAIL is one-pass; MACRO and MIDAS are two-pass. + Note that FAIL, like MIDAS, will assemble for ITS and can produce + either STINK or DECREL. + + +I recommend DECREL for reasons that you've probably also figured out: + It's well documented (unlike STINK). + It now has many more block types (unlike STINK). + It can be generated by any of the 3 assemblers. + It makes it much easier to cross-compile things in the initial stages. + + +Now, about PSECTs. They are not necessary initially. To make them +work it will suffice to fix up FAIL and the ITS linker. KCC can then +be trivially adjusted to make use of this feature. Let's not bust a +gut trying to do everything at once. + +Similarly, you don't NEED long symbols to begin with. The ANSI C +draft standard mandates 6 char monocase uniqueness for externals, if +they are to be portable. It is easy to use long names for C code just +by putting #defines in the appropriate header files. We can worry +about this later. + +Incidentally, if we are agreed on the need for a new linker (in other words, +forget about patching up STINK, and forget about stealing LINK), then we +will need a working moniker (although of course the lucky author will have +prerogative). Some candidates: + ILL (Incompatible Linking Loader) - my current favorite. + SINS (SINS Is Not Stink) Obviously, names are + SAINTS (SAINTS AIN'T Stink) a religious matter. + ... + +p.s. If someone is going to start completely from scratch I'd suggest writing +it in C, of course. Cross-compile on a 20. Most importantly, make it +use and create libraries, with an index. +------- + +Received: from SRI-NIC.ARPA (TCP 1200000063) by AI.AI.MIT.EDU 27 Feb 88 16:55:19 EST +Date: Sat, 27 Feb 88 13:52:01 PST +From: Ken Harrenstien +Subject: [jtw@AI.AI.MIT.EDU: KCC] +To: its-kcc@AI.AI.MIT.EDU +Message-ID: <12378154237.17.KLH@SRI-NIC.ARPA> + +Forwarded for posterity, reply follows. + --------------- + +Return-Path: +Received: from XX.LCS.MIT.EDU by SRI-NIC.ARPA with TCP; Fri 26 Feb 88 15:53:37-PST +Received: from OZ.AI.MIT.EDU by XX.LCS.MIT.EDU via Chaosnet; 26 Feb 88 17:50-EST +Received: from PIGPEN.AI.MIT.EDU by OZ.AI.MIT.EDU via Chaosnet; 26 Feb 88 17:48-EST +Date: Fri, 26 Feb 88 17:49 EST +From: jtw@AI.AI.MIT.EDU +Sender: Alan@AI.AI.MIT.EDU +Subject: KCC +To: KLH@SRI-NIC.ARPA +cc: Alan@AI.AI.MIT.EDU, Maeda@AI.AI.MIT.EDU, JTW@AI.AI.MIT.EDU, + SRA@XX.LCS.MIT.EDU +Message-ID: <880226174937.2.ALAN@PIGPEN.AI.MIT.EDU> + +So here we are thinking about porting KCC to ITS. For a change we even have +some people to work on it...(!) + +A while back there was some discussion of teaching KCC to output some +relocatable format directly, and writing a linker for it. Now we're +thinking about this again, and wondering if you have the interest/time +to help. + +It seems that the best thing would be to get KCC to output DEC REL format, +and do an ITS linker for it. The question is what, exactly, to support. +Ideally we could teach KCC about PSECTS and new long-symbol-format REL +blocks, and implement an ITS linker that can deal with them. A somewhat +easier thing would be to generate PSECTed code but use old REL blocks. +Presumably this has the advantage that we could get things started by +having KCC generate MACRO code, and linking that. At worst, we could +avoid PSECTED code altogether and just generate TWOSEG code, except +that I really do want the linker to do automatic relocation of the +code and data segments to contiguous memory, then PDUMP with pure code, +and PSECTS just seem a lot cleaner. + +Alan points out that we will probably want to be able to link in assembly +code. The two choices seem to be to try and run MACRO under the simulator, +or to have midas generate DECTWO and teach the linker to do TWOSEG -> PSECT +mapping when required. + +So: + +a) Are you at all interested in teaching KCC to generate DEC REL directly? +b) If so, would you be interested in generating 'new' blocks so we could + use long names? (And what would you do about DDT? We're just going to + dump out the symbol table, maybe filtering out conflicts...) +c) How do you feel about generating PSECTed code instead of TWOSEG? +d) Anything else? (We-all 'r just sittin' here drinkin beers and such. We + may have missed something...) + + JTW, Alan, and Chris Maeda +------- + +Received: from XX.LCS.MIT.EDU (CHAOS 2420) by AI.AI.MIT.EDU 10 Sep 87 11:37:25 EDT +Date: Thu, 10 Sep 1987 11:27 EDT +Message-ID: +From: Rob Austein +To: ITS-KCC@AI.AI.MIT.EDU +Subject: Completely random KCC implementation note + +I just came to the conclusion that argv[0] should be the XJNAME (not +the JNAME). This seems to be the way it works on unix, and is +certainly the way it works on Twenex. + +If you're wondering why I care, I was just working on some code that +does crash dumps to different files depending on argv[0], which got me +wondering just what argv[0] is, anyway. + +Received: from XX.LCS.MIT.EDU (CHAOS 2420) by AI.AI.MIT.EDU 20 Aug 87 02:12:37 EDT +Date: Thu, 20 Aug 1987 02:12 EDT +Message-ID: +From: Rob Austein +To: ITS-KCC@AI.AI.MIT.EDU +Subject: KCC meets ITS +In-reply-to: Msg of 19 Aug 1987 23:13-EDT from David A. Moon + + Date: Wednesday, 19 August 1987 23:13-EDT + From: David A. Moon + + Date: Wed 19 Aug 87 19:10:38-PDT + From: Ken Harrenstien + + Two handy programs that could be created would be: + CNV (or whatever) to convert DEC format SAVE files into ITS SBLK files. + .... + + I believe this is AI: ARC: MOON; 20XCSV >. Someone who knows 20X + should see if it still works. + +CSAVE format hasn't changed any. The Twenex EXEC can convert SSAVE +format to CSAVE format if needed. + +20XCSV successfully converted a trivial program, I haven't tried +anything fancy. It throws away the symbol table. + +Received: from STONY-BROOK.SCRC.Symbolics.COM (TCP 20024224620) by AI.AI.MIT.EDU 19 Aug 87 23:13:37 EDT +Received: from EUPHRATES.SCRC.Symbolics.COM by STONY-BROOK.SCRC.Symbolics.COM via CHAOS with CHAOS-MAIL id 216969; Wed 19-Aug-87 23:13:36 EDT +Date: Wed, 19 Aug 87 23:13 EDT +From: David A. Moon +Subject: Re: KCC meets ITS +To: ITS-KCC@AI.AI.MIT.EDU +In-Reply-To: <12327869669.18.KLH@SRI-NIC.ARPA> +Message-ID: <870819231313.5.MOON@EUPHRATES.SCRC.Symbolics.COM> + + Date: Wed 19 Aug 87 19:10:38-PDT + From: Ken Harrenstien + + Two handy programs that could be created would be: + CNV (or whatever) to convert DEC format SAVE files into ITS SBLK files. + .... + +I believe this is AI: ARC: MOON; 20XCSV >. Someone who knows 20X +should see if it still works. + + +Date: Wed, 19 Aug 87 22:18:07 EDT +From: Alan Bawden +Subject: KCC meets ITS +To: ITS-KCC@AI.AI.MIT.EDU +In-reply-to: Msg of Wed 19 Aug 87 14:22:36-PDT from Ken Harrenstien +Message-ID: <244164.870819.ALAN@AI.AI.MIT.EDU> + + Date: Wed 19 Aug 87 14:22:36-PDT + From: Ken Harrenstien + Fine, could you tweak the ITS-KCC list so that ITS-KCC-NIC@SRI-NIC.ARPA is + on it, and add a comment to the effect that this is where I see the mail. + (I check my ITS mail maybe only once a month nowadays). I also put Ian + and a log file on that local distribution list. Will send some comments + when that's done... + +Done. + + Date: Wed, 19 Aug 87 17:23 EDT + From: David A. Moon + ... + Does it work to put the prefix on the built-in symbols instead of on + the user symbols, maybe something like + + .BEGIN USER + GO: MOVE 1,END + END: 700. + .U"END + +No, this kind of thing doesn't work. When Midas tries to assemble the +MOVE 1,END it complains that END returns no value, then it ends the +assembly right there. If instead of END you substitute an initial symbol +such as SETZ, it works, but you get a warning message. + + I've been poking around inside STINK to see what we can do about + the library issue. I don't suppose any of you have experience with + using .LIBRQ, .LIBRA, and .LIFS in Midas and STINK, do you?... + That's what I thought. + + That stuff probably worked back in the 1960s, so there may be some + chance that it still works. I've never used it, as far as I know. I + think PUB was put together with STINK, and I did make a new PUB once or + twice, but I don't remember whether that involved libraries. + +Some experiments JTW performed this afternoon confirm that this does seem +to work. The problem is that it isn't -really- what you want. It will let +you construct a file that when processed by STINK will selectively load +bits of code based on undefined symbols. It won't let you do the +transitive closure of that operation. + + Date: Wed, 19 Aug 1987 19:11 EDT + From: Rob Austein + ... + Under certain conditions, MIDAS will rename .MAIN to GLOBAL before + punching (sic) the binary, because it thinks DDT wants this. + +I always get a block named .MAIN, or if I use a TITLE statement I get a +block named after my title. (I am looking directly at the binary STINK files +output by Midas when I say this.) I -do- see that after STINK has linked +up a bunch of things, and passed the symbols on to DDT, somebody has +created a GLOBAL block, but I would expect that that is something that +STINK did. + +There is code in Midas that outputs a block named GLOBAL in the case of an +absolute assembly, but I don't think it ever does it when it is making a +REL file. + +I've been fooling around with what happens when you say things like + + .GLOBAL C"MOVE,C"STDIO + +and it looks like more-or-less the "right" things happens: Midas +manipulates the symbols in the block named C, and never confuses them with +any other symbols or pseudos. Then it writes a file for STINK that +contains no block information anywhere that STINK will be looking when it +does linking. In the final symbol table, the global symbols will all be +defined in a toplevel block named GLOBAL, and all the others will remain in +per-program sub-blocks named C! + +Received: from SRI-NIC.ARPA (TCP 1200000063) by AI.AI.MIT.EDU 19 Aug 87 22:13:00 EDT +Date: Wed 19 Aug 87 19:10:38-PDT +From: Ken Harrenstien +Subject: Re: KCC meets ITS +To: ITS-KCC@AI.AI.MIT.EDU +cc: KLH@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12327869669.18.KLH@SRI-NIC.ARPA> + +Hmm, guess I can't wait... here are some comments. + +First, an extract from the KCC PORT.DOC file which pertains to ITS; I'm +including it for reference. + ------------------------- +Thoughts on future port to ITS: + + The main problem is that the only linking loader available on +ITS is STINK, which only understands STINK format REL files. There +may be an old version of the DEC linker available, but this is +non-supported and painful to use. Getting KCC to run on ITS is not +difficult; establishing a scheme for the C compilation environment (so +users can compile their C programs on ITS) is what needs to be figured +out. + + Here is a map of the players. "..." marks links that don't currently +exist but could. + + /.......................>| + | | + |-> MAC --[MACRO]------->|--> DECREL --[T20LINK]------> DECSAV + | /---->| | /..................| +C --[KCC]-->| | /->| | | + |-> FAI --[FAIL]-->| | | \...[CNV]...>| + | \---->| \.......[ITSLNK]...>| + | | | | + |-> MID --[MIDAS]-----|->|--> STKREL --[STINK]---->|--> SBLK + | | + \...........................> newREL ..[newLNK]...>| + +As the diagram shows, both FAIL and MIDAS can produce either DECREL or +STKREL format. (The ITS version of FAIL uses a separate module called +STKTRN to achieve the latter). + +If KCC is ever improved to bypass the assembler phase by outputting +relocatable files directly, it will almost certainly NOT know about +STKREL format. Using this on ITS will require either further +modifications to generate STINK format, or the ITS loader will have to +know about DECREL format. The latter is most general. + +Two handy programs that could be created would be: + CNV (or whatever) to convert DEC format SAVE files into ITS SBLK files. + ITSLNK (or whatever) to gobble relocatable files in DECREL (or some + other, better, format) and produce ITS SBLK files. + +Actual bootstrap of KCC itself: + Several different approaches can be used: +[1] Build KCC elsewhere (T20), dump in old DECSAV format. + FTP over and convert to SBLK executable format. Needs CNV program. +[2] Generate complete set of .FAI files and FTP them. + Run ITS FAIL to generate STINK format rels, then load with STINK. +[3] Generate complete set of .MID files and FTP them. + Run ITS MIDAS to generate STINK format rels, then load with STINK. +[4] Generate complete set of .REL files and FTP them. + Modify STINK (NEWLNK) to understand DECREL format, use that. + ------------------------- + +Quick and dirty approach: + (1) Fix up the C library code so it will run on ITS. + (see USYS.DOC for a list of routines that need fixing). + (2) Set up KCC on T20 for cross-compiling (see PORT.DOC for + cross-compilation instructions). + (3) Write the CNV program to convert DECSAV to SBLK. + + Then, to bring up any C program on ITS, just cross-compile it +on a T20 system, dump the binary out with CSAVE, and use CNV to +convert DECSAV format to SBLK format. Copy to ITS and run. + The KCC program itself will need additional fixes (to do with +how the assembler and loader are invoked) before it can run on ITS. +But before you know what those fixes are, you have to decide what overall +C compilation scheme will be used (what assembler, what loader, etc). And +for various reasons, this implies a new loader. + +About symbols: + From the assembler's viewpoint there are three kinds of +symbols. First there are assembler-specific symbols such as opcodes +and pseudo-ops, and these have to never, ever, conflict with +user-defined symbols. Second, there are external (global) +user-defined symbols; fortunately, ANSI is saying that unless such +symbols are monocasedly unique in the first 6 characters the code is +not conforming (ie not very portable), so we don't have to bust a gut +trying to achieve long external symbol names, although someday this +would be nice. These are the symbols the loader has to worry about. +Finally, there are "static" (internal, local) symbol names which are +local to the C file being compiled and should not be visible outside +of that module. Let's call these symbol types A, G, and L for the +moment. + KCC does not currently use block structure for anything. Symbols +of type A are known to the assembler via FAIL/MACRO opdefs and thus do not +conflict with labels. Unfortunately, pseudo-ops are not done this way, +which is why those pseudo-ops must be expunged from FAIL/MACRO at the start +of assembly (and thus cannot be used during assembly!) + Because $ and % cannot exist in C-defined symbols, those +characters can be used for special purposes (eg KCC prefixes compiler- +generated labels with $ as in $123). Static data labels (type L) are +also done this way; static function labels are not, which means it is +possible for some conflict to happen, but KCC could be fixed to remedy +this (at the price of making DDT debugging more difficult). + The suggestion of using a prefix like USER" is a good one, but only +if it works for global symbols. + It would be possible to generate a new variety of MIDAS such that +all type A symbols either had a % prefix, or were block structured (.INIT"), +or something like that. + Or we could just use FAIL. ITS FAIL includes a routine that +translates the output format into STINK format. + +About STINK .LIBRQ etc: + The last time I looked, those pseudos weren't even implemented. +You can carefully specify each and every module you need loaded, and STINK +will load it. You can also say "load this module only if needed". But +you CANNOT say "search this library file for all modules needed". Gack! + +Absolute assembly vs better linking loader: + Absolute assembly is one of those charming wild ideas that +for a moment makes you wonder "what if?" But I think the effort of trying +to set up such a scheme (and it would take some) would be much better +spent in getting a loader to search a library properly. The size of the +C library alone is considerable, and many C programs (KCC is a good example) +are so much easier to work with when you only need to recompile one or +two modules. + The reason for using relocatable files is not necessarily +because the binary only includes what is needed; it is mainly to save +time. Running a linking loader is much faster than re-compiling or +re-assembling lots and lots of code. The C library is about 80 T20 +pages worth of relocatable code and that is an absolute minimum -- +large programs are also split into many modules. For example, the KCC +source code is altogether about 370 T20 pages, not including standard +header files or library code. The .FAI files are somewhat bigger. + + As far as I know, STINK knows nothing about libraries (ie +files which contain more than one module). An interesting strategem +for a new loader would be to parallel the UNIX use of archive files; +that is, make each module be a file in an ITS archive file, such as +AR13:C;FOPEN REL. The advantage of this is that you can use existing +tools to update such "libraries". There would be one special file in +the archive (on UNIX this is called __SYMDEFS, generated by a program +called "ranlib") which contains all of the external symbol table information +for the modules in the archive, so that the loader only needs to look at +that one file to know which of the archive files to load up. You can +invent your own format for this file. + +MIDAS vs FAIL: + + The choice of assembler is somewhat irrelevant. KCC can +generate either (although it is not yet known whether its MIDAS output +is completely winning -- assuredly a declaration like "int move;" will +fail). It would be safest to use FAIL at first. Either FAIL or MIDAS +can produce DECREL or STKREL files. FAIL is one-pass; it's not clear +if MIDAS can handle a one-pass relocatable assembly, but this is a +somewhat minor efficiency consideration. ITS FAIL is currently set to +only generate STKREL, but it would be trivial to add a switch or +pseudo to skip the translation and output just DECREL. + I see no real need to use MIDAS unless we intend to use its +block-structure features or something else I haven't thought of. It is +somewhat easier to maintain than FAIL, to be sure. + + +Finally... + + One way or another we have got to have a new loader before KCC +itself can run on ITS. This can be done by: + (1) stealing and porting T20 LINK (use DECREL fmt) + (2) hacking up STINK to do what we want (use STKREL fmt) + (3) writing a new loader. + (3a) STKREL format + (3b) DECREL format + (3c) New portable format + +The person who actually wants to do the work is probably the person who +will decide which of these gets done. I would just add that if a new loader +gets written, it should be written in C. Also, you may be able to start +with the GNU linking loader. If a new format is simple enough I'm willing +to consider making KCC output it, but not if it's ITS-specific. +------- + +Date: Wed, 19 Aug 1987 19:11 EDT +From: Rob Austein +To: ITS-KCC at AI.AI.MIT.EDU +Re: KCC meets ITS + +I agree with Dave that it would be better to prefix the built-in +symbols, but for different reasons. I think this is the only way to +get things to work right with globals and relocatable assembly in +MIDAS. + +MIDAS actually has five and a half magic block names. + +.INIT & .MAIN are predefined blocks. .INIT is where the predefined +symbols go, .MAIN is the highest available user block. + +.M, .C, & .U are special cased. .M is an alias for .MAIN, unless it's +something else. .C is "current block". .U is "superior block". + +Under certain conditions, MIDAS will rename .MAIN to GLOBAL before +punching (sic) the binary, because it thinks DDT wants this. + +It would be a good idea to prefix all opcodes and such with a block +name. The length of the MIDAS code won't matter if we are serious +about using relocatable assembly. + +Remember that we're trying to have a definition like + extern int move(); +work as one would expect. + +I still don't see why people are so gungho on using MIDAS for +everything. There's maybe five or ten modules that need to .INSRT +stuff, ok, those are done by hand in MIDAS, the rest can use FAIL. + +For those who don't already know about it, there's some code in +XX:SRC: which generates TWOSEG format files that work +correctly with Twenex KCC. Obviously some of this has to change for +STINK, but it gives an idea of what one should have in hand assembled +MIDAS modules that need to be linked with KCC-generated code. +Date: Wed, 19 Aug 87 17:23 EDT +From: David A. Moon +To: Alan Bawden +cc: ITS-KCC at AI.AI.MIT.EDU +Re: KCC meets ITS + + Date: Wed, 19 Aug 87 15:51:07 EDT + From: Alan Bawden + + I'm currently leaning towards having Midas do the assembly, especially + since I did an experiment last night and determined that Midas is perfectly + happy with code like: + + GO: MOVE 1,FOO"END + + FOO"END: 700. + + So if KCC always prefixes the users symbols with: USER" (or something) + there shouldn't be any problem. I assume that KCC isn't -already- making + some use of the assembler's block structure? + +Does it work to put the prefix on the built-in symbols instead of on the user +symbols, maybe something like + + .BEGIN USER + GO: MOVE 1,END + END: 700. + .U"END + +(I think .U is the right name, I could have misremembered). If this works the +assembler file will be shorter. + + I've been poking around inside STINK to see what we can do about the + library issue. I don't suppose any of you have experience with using + .LIBRQ, .LIBRA, and .LIFS in Midas and STINK, do you?... That's what I + thought. + +That stuff probably worked back in the 1960s, so there may be some chance +that it still works. I've never used it, as far as I know. I think PUB was +put together with STINK, and I did make a new PUB once or twice, but I don't +remember whether that involved libraries. + + I haven't at all ruled out the possibilities of (A) using an absolute + assembly and doing the linking and library stuff with a Midas macrology + that figures out what to .INSRT, or (B) writing a better STINK in C or + MacLisp. Option (A) has the advantage that its very close to the way + everything else on ITS works. Option (B) gets us a better utility in the + long run. + +I suspect for the initial pass you should use option A without the macrology, +just insert everything. This will get you on the air the quickest, I would +think. The library stuff is really just an optimization to make the object +code smaller by not including things you don't call, right? +Date: Wed 19 Aug 87 14:22:36-PDT +From: Ken Harrenstien +To: ALAN at AI.AI.MIT.EDU +cc: ITS-KCC at AI.AI.MIT.EDU, KLH at SRI-NIC.ARPA +Re: KCC meets ITS + +Fine, could you tweak the ITS-KCC list so that ITS-KCC-NIC@SRI-NIC.ARPA is +on it, and add a comment to the effect that this is where I see the mail. +(I check my ITS mail maybe only once a month nowadays). I also put Ian +and a log file on that local distribution list. Will send some comments +when that's done... +Date: Wed, 19 Aug 87 15:51:07 EDT +From: Alan Bawden +To: KLH at SRI-NIC.ARPA +cc: ITS-KCC at AI.AI.MIT.EDU +Re: KCC meets ITS + + Date: Mon 17 Aug 87 12:02:10-PDT + From: Ken Harrenstien + OK, you're on both lists now. Have you reached any conclusion as to the + best overall scheme for assembling and linking the compilation results? + Perhaps you, I, SRA, and anyone else that you know to be interested should + start a small offshoot mailing list to hammer out ITS-specific issues. + +OK, I created an ITS-KCC mailing list here on AI. I put everybody on it +who has ever shown any interest in the project. (I also made Bug-KCC at +any ITS forward to the main Bug-KCC at NIC.) + +I'm currently leaning towards having Midas do the assembly, especially +since I did an experiment last night and determined that Midas is perfectly +happy with code like: + + GO: MOVE 1,FOO"END + + FOO"END: 700. + +So if KCC always prefixes the users symbols with: USER" (or something) +there shouldn't be any problem. I assume that KCC isn't -already- making +some use of the assembler's block structure? + +(I haven't explored the possible bad interactions between symbol-table block +structure and .GLOBAL'ed symbols. It looks like such symbols are +automatically put in the toplevel symbol-table block, which might thwart +this prefixing scheme...) + +I've been poking around inside STINK to see what we can do about the +library issue. I don't suppose any of you have experience with using +.LIBRQ, .LIBRA, and .LIFS in Midas and STINK, do you?... That's what I +thought. + +I haven't at all ruled out the possibilities of (A) using an absolute +assembly and doing the linking and library stuff with a Midas macrology +that figures out what to .INSRT, or (B) writing a better STINK in C or +MacLisp. Option (A) has the advantage that its very close to the way +everything else on ITS works. Option (B) gets us a better utility in the +long run. + \ No newline at end of file diff --git a/doc/kcc/jobdat.txt b/doc/kcc/jobdat.txt new file mode 100755 index 000000000..09c53c015 --- /dev/null +++ b/doc/kcc/jobdat.txt @@ -0,0 +1,70 @@ + +JOBDAT as of 9 Jan 78: +.JBUUO 40 User's UUO data location; opcode and EA stored here +.JB41 41 User's UUO trap address; contains instruction to go + to user's trap routine (usually JSR or PUSHJ) +.JBERR 42 LH: unused RH: accumulated error count +.JBREL 44 LH: 0 RH: highest lowseg address +.JBBLT 45 three locations used by the loader and monitor +.JBDDT 74 LH: Last addres of DDT RH: Starting address of DDT + if 0, DDT is not loaded +.JBHRL 115 LH: length of hiseg RH: highest legal address in + hiseg, 0 => no hiseg +.JBSYM 116 LH: Negative length of RH: Base address of symbol table + symbol table +.JBUSY 117 Same format as .JBSYM, pointer to undefined symbol table +.JBSA 120 LH: Initial .JBFF value RH: Start address +.JBFF 121 LH: 0 RH: Address of first free location + in lowseg; set to .JBSA<18,18> + by RESET UUO +.JBREN 124 LH: Unused RH: REENTER start address +.JBAPR 125 LH: 0 RH: Trap address for APR traps +.JBCNI 126 APR status from CONI when APR trap taken +.JBTPC 127 PC of next instruction when APR trap taken +.JBOPC 130 Previous .JOBPC saved by DDT, REENTER, START or CSTART + In some cases this value points to the erroneous UUO, + see the reference manual. +.JBCHN 131 LH: 0 or first free loc RH: Address of first location + after FORTRAN IV after first FORTRAN IV + program BLOCK DATA program +.JBCOR 133 LH: Highest location RH: User core argument on + in lowseg loaded last GET or SAVE command + with nonzero data +.JBINT 134 LH: reserved RH: 0 or address of error + intercept block +.JBVER 137 Version number: in octal, it is given as + gVVVmm,,eeeeee + where + g is the group who last modified the program: + 0 - DEC development + 1 - DEC + 2,3,4 - Customers + 5,6,7 - Customer's users + Mostly CMU ignores this field + + VVV major version number, in octal + mm minor version number. + 00 => no minor version + 01 => A + 02 => B etc. + eeeeee edit level + prints as (eeeeee) + + e.g., the .JBVER version number 103402,,4556 + would print to the VERSION command as 34B(4556)-1 +NOTE: if the program has ONLY a high segment, .JBHVR must be set also! + +For jobs with only a high segment, the vestigial job data area +is defined as the first '10 locations of the high segment, and +the fields are (relative to .JBHGH, usually 400000): + +.JBHSA 0 Copy of .JBSA +.JBH41 1 Copy of .JB41 +.JBHCR 2 Copy of .JBCOR +.JBHRN 3 LH: restores LH of .JBHRL + RH: restores RH of .JBREN +.JBHVR 4 Copy of .JBVER +.JBHNM 5 High-segment name set on a SAVE (sixbit) +.JBHSM 6 Pointer to highsegment symbols, if any + 7 Reserved +.JBHDA 10 First free location after vestigial job data area diff --git a/doc/kcc/kcc.xmail b/doc/kcc/kcc.xmail new file mode 100755 index 000000000..fe0dbf502 --- /dev/null +++ b/doc/kcc/kcc.xmail @@ -0,0 +1,2749 @@ +Babyl Options: +Version:5 +Append:1 +Labels:KCC,RemindNow +Reformat-Headers-P: Save Both +Summary-Window-Format: Use Default + +0, answered,, +*** EOOH *** +Rcvd-Date: 29-May-86 23:11:44-EDT +Received: from SPEECH.MIT.EDU by XX.LCS.MIT.EDU via Chaosnet; 29 May 86 23:11-EDT +Date: Thu 29 May 86 23:11:04-EDT +From: "John Wroclawski" +Subject: Re: KCC +To: SRA@XX.LCS.MIT.EDU +Message-ID: <12210702303.33.JTW@SPEECH.MIT.EDU> + +KCC's been around for a long while. I didn't know he was still playing +with it tho... + +Date: Thursday, 29 May 1986 23:56-EDT +From: Rob Austein +To: "John Wroclawski" +cc: SRA@XX.LCS.MIT.EDU +Re: KCC + +Yeah, I saw a version of KCC when I was still at Wesleyan. It was +pretty poor in those days. BillW claims KLH and others at SRI have +been working on it, adding extended addressing, etc. Bill claimed it +was generating native code directly (instead of using FAIL), but he +may have been confused. There are still some weirdnesses (strings are +not the same thing as char[]s, one is 7-bit-5-per-word, the other is +9-bit-4-per-word). + +Thing is, if we can get a reasonably decent C compiler that will run +on both 20x and ITS I will use it for the resolver and will make sure +the resolver runs on both systems. Otherwise I may have to do it in +MIDAS, which would be a pain. + +I've pretty much decided that the right thing to do is punt this +mapped-into-kernal database idea; instead I'd have a VAF-style +resolver using IPCF (Len Bosack claims IPCF is fast enough for any +reasonable purpose). I'd probably want to keep a HOSTS3 table with +Chaos and (maybe) some small amount of IP data, using the current +CHANM% code, so that the machine could make use of the net while +booting. + +Getting the database out of the kernal solves a whole bunch of +problems (aside from the obvious one of ILMNRFs due to bogus +pointers). It pretty much avoids all the locking problems, since +there's just the one resolver. You can put seperate zones in separate +files and do the zone transfer operation as an hourly batch job. The +internal format can be a lot simpler too, like maybe just a text file +and an associated binary hash table (which can be trivially rebuilt, +easing maintainance). Paul's approach is needlessly hairy. + +I'd probably be willing to leave Paul's crock in place if I didn't +have to implement something for ITS anyway. Since I do, I might as +well do something useful for both systems and get this brainbubbled +PASCAL code out of here. What the hell, I only wasted about six +months of my life working on that lossage. + +BillW is so disgusted with Paul's thing that he is talking about doing +a simple version of the IPCF resolver that just goes off and asks some +bind to do the dirty work. Groan. Not for me, thanks, I -know- how +crufty the bind code is. + +0, answered,, +*** EOOH *** +Rcvd-Date: 29-May-86 23:11:44-EDT +Received: from SPEECH.MIT.EDU by XX.LCS.MIT.EDU via Chaosnet; 29 May 86 23:11-EDT +Date: Thu 29 May 86 23:11:04-EDT +From: "John Wroclawski" +Subject: Re: KCC +To: SRA@XX.LCS.MIT.EDU +Message-ID: <12210702303.33.JTW@SPEECH.MIT.EDU> + +KCC's been around for a long while. I didn't know he was still playing +with it tho... + +Date: Thursday, 29 May 1986 23:56-EDT +From: Rob Austein +To: "John Wroclawski" +cc: SRA@XX.LCS.MIT.EDU +Re: KCC + +Yeah, I saw a version of KCC when I was still at Wesleyan. It was +pretty poor in those days. BillW claims KLH and others at SRI have +been working on it, adding extended addressing, etc. Bill claimed it +was generating native code directly (instead of using FAIL), but he +may have been confused. There are still some weirdnesses (strings are +not the same thing as char[]s, one is 7-bit-5-per-word, the other is +9-bit-4-per-word). + +Thing is, if we can get a reasonably decent C compiler that will run +on both 20x and ITS I will use it for the resolver and will make sure +the resolver runs on both systems. Otherwise I may have to do it in +MIDAS, which would be a pain. + +I've pretty much decided that the right thing to do is punt this +mapped-into-kernal database idea; instead I'd have a VAF-style +resolver using IPCF (Len Bosack claims IPCF is fast enough for any +reasonable purpose). I'd probably want to keep a HOSTS3 table with +Chaos and (maybe) some small amount of IP data, using the current +CHANM% code, so that the machine could make use of the net while +booting. + +Getting the database out of the kernal solves a whole bunch of +problems (aside from the obvious one of ILMNRFs due to bogus +pointers). It pretty much avoids all the locking problems, since +there's just the one resolver. You can put seperate zones in separate +files and do the zone transfer operation as an hourly batch job. The +internal format can be a lot simpler too, like maybe just a text file +and an associated binary hash table (which can be trivially rebuilt, +easing maintainance). Paul's approach is needlessly hairy. + +I'd probably be willing to leave Paul's crock in place if I didn't +have to implement something for ITS anyway. Since I do, I might as +well do something useful for both systems and get this brainbubbled +PASCAL code out of here. What the hell, I only wasted about six +months of my life working on that lossage. + +BillW is so disgusted with Paul's thing that he is talking about doing +a simple version of the IPCF resolver that just goes off and asks some +bind to do the dirty work. Groan. Not for me, thanks, I -know- how +crufty the bind code is. + +0,, RemindNow, +*** EOOH *** +Rcvd-Date: 4-Jun-86 19:19:16-EDT +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Wed 4 Jun 86 19:18:42-EDT +Date: Wed 4 Jun 86 16:14:16-PDT +From: Ken Harrenstien +Subject: Re: winning C compiler? +To: SRA@XX.LCS.MIT.EDU +cc: alan@AI.AI.MIT.EDU, billw@SU-SCORE.ARPA, KLH@SRI-NIC.ARPA +In-Reply-To: Message from "Rob Austein " of Thu 29 May 86 15:35:44-PDT +Message-ID: <12212232057.29.KLH@SRI-NIC.ARPA> + +Yes. I am on the verge of wrapping up an overhauled KCC + library +which conforms to Harbison&Steele's CARM book (such a simple +statement, such a long project!), and we are using it for all of our +production programs nowadays. In a couple of weeks (vacation plans, +etc) a true distribution should be ready. + +This is meant to be the new KCC, rather than another offshoot of KCC +such as Greg Titus' NMIT version is. I'm afraid I can't compare +them myself as we don't have a copy here. You can retrieve the +file "C:CC.DOC" from SRI-NIC if you want to get an overview. + +While the library is currently written for TOPS-20, it should be +possible to adapt it to ITS without too much trouble. All of the +conditionals and hooks are already there, and I always intended that +it should be portable to other PDP-10 systems. There are still a +couple of issues to be resolved, such as deciding which linking loader +format to use on ITS -- DECREL or STINK? If you know other people who +are interested in hacking this sort of thing, gather them up and we'll +decide what should be done by who. + +0,, RemindNow, +*** EOOH *** +Rcvd-Date: 4-Jun-86 19:19:16-EDT +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Wed 4 Jun 86 19:18:42-EDT +Date: Wed 4 Jun 86 16:14:16-PDT +From: Ken Harrenstien +Subject: Re: winning C compiler? +To: SRA@XX.LCS.MIT.EDU +cc: alan@AI.AI.MIT.EDU, billw@SU-SCORE.ARPA, KLH@SRI-NIC.ARPA +In-Reply-To: Message from "Rob Austein " of Thu 29 May 86 15:35:44-PDT +Message-ID: <12212232057.29.KLH@SRI-NIC.ARPA> + +Yes. I am on the verge of wrapping up an overhauled KCC + library +which conforms to Harbison&Steele's CARM book (such a simple +statement, such a long project!), and we are using it for all of our +production programs nowadays. In a couple of weeks (vacation plans, +etc) a true distribution should be ready. + +This is meant to be the new KCC, rather than another offshoot of KCC +such as Greg Titus' NMIT version is. I'm afraid I can't compare +them myself as we don't have a copy here. You can retrieve the +file "C:CC.DOC" from SRI-NIC if you want to get an overview. + +While the library is currently written for TOPS-20, it should be +possible to adapt it to ITS without too much trouble. All of the +conditionals and hooks are already there, and I always intended that +it should be portable to other PDP-10 systems. There are still a +couple of issues to be resolved, such as deciding which linking loader +format to use on ITS -- DECREL or STINK? If you know other people who +are interested in hacking this sort of thing, gather them up and we'll +decide what should be done by who. + +0,, +*** EOOH *** +Rcvd-Date: 20-Oct-86 18:11:14-EDT +Mail-From: SRA created at 20-Oct-86 18:11:11 +Date: Mon, 20 Oct 1986 18:11 EDT +Message-ID: +From: Rob Austein +To: Paul Mockapetris +Cc: BillW@SU-SCORE.ARPA, Stu Grossman , + sra@XX.LCS.MIT.EDU +Subject: GTDOM causes systems crashes in JOBCOF +In-reply-to: Msg of 14 Oct 1986 21:35-EDT from Paul Mockapetris + +Sorry about delay answering this, folks, XX been sick and I been away. + + Date: Tuesday, 14 October 1986 21:35-EDT + From: Paul Mockapetris + + Thanks for the hint about testing for a pending ^C. (I understand that + there are other things that should do the same thing, but I'm not really + a TOPS-20 guy and didn't know the right term.). + +This one's news to me. Somebody fill me in? Or maybe I should be on +SU-TOPS-20@SCORE? + It brings up the + question of whether we can get together on a consistent set of domain + code for TOPS-20. + +It'd be nice. Be warned that I have been working on some C code for +ITS (JEEVES absolutely will -not- port there for fundamental reasons) +and if I ever finish it and if it is a winner I may run it on Twenex +too. I'd want to keep the user interface (user side of GTDOM% jsys) +the same though, so that we can get some user network code written +already. In fact, assuming that nobody else does anything +particularly braindamaged, I hereby -promise- to keep the user jsys +interface compatable. + + To start off I frankly admit that I am not a TOPS-20 system programmer. + The announcement that I recently sent out (to a very limited audience) + points at code that has real problems: + + It doesn't address the ^C issue + +Can't speak to this, not knowing what you're talking about. + + It doesn't even have a good set of mnemonics for the JSYS + +That I've got taken care of. See XX:MONSYM.MAC.46 +(generation number tracks MIT monitor edit number). + + The JSYS interface should perhaps be like the MIT interface + +The interfaces are the same except for functions 11 and 12. Obviously +I'm biased since I'm the person who changed it, but function 11 really +can't be used the way you wrote it because it's allowed to randomly +trash memory (no string length count). My changes to function 12 +probably don't really belong on function 12 (should be seperate +function); they're part of the support for the double-dip resolver +invokation code. + + The JSYS code isn't up to coding standards + +No comment. + + Its based on 5.1 + +You mean 5.0, yes? 5.1 was 5.3 without the network code. + +The MIT/Stanford code is running pretty much unchanged on 5.4 and 6.1. +I'm pretty sure the 5.4 code would run just fine on ISI's 5.machines +with a few cosmetic changes. In fact I think my code already has a +switch you can enable to change the names of the .UNV files it +searches, which is the only problem I know about. + + A lot of these stem from the 5.1 problem; I just can't debug code here + that is compatible with most of the external world, so frankly I haven't + tried. The virtues of this code is that it has much better + retransmission policies and logging (hence debugging) features, than + those which were in the code on which your current versions are based. + +Without meaning to sound too obnoxious, this one is just your own +damned fault. If you hadn't decided to lock me out of read access to +your sources the MIT version would be a superset of your code. That's +over and done now, but maybe you should keep it in mind in the future. + + The drawbacks to the MIT/Stanford code are that it lacks the recent + resolver improvements (I am just assuming you haven't changed it much, + correct me if I am wrong.), + +You're correct. Just bugfixes at MIT, and I believe this is also the +case at Stanford. Oh, BillW did do some nice work searching the list +of addresses returned during function .GTHSN so that it would return +the most useful address for a multi-homed host (ie, if SCORE is +looking for the address of NIC it should get the ARPANET address, not +the MILNET address). + + plus the double-dip approach chews the + stack. + +I don't understand the problem. Yes, the code uses about 80% of the +UPDL while it's sitting in the resolver wait. So what? It does it +once per resolve and it's not doing anything else while it's waiting +(in fact, it's dismissed into the scheduler and not doing squat). + + The virtues are the ^c and better JSYS interface, plus any other + improvements you have made. + + What I would like to see happen is that a single bunch of code emerges + for TOPS-20. I think we can do this by sitting my resolver code upon a + modified version of the MIT code. + + The JSYS code should collect the query name on the stack (to avoid the + problems with TTY as input, etc), go NOINT, copy the query name from the + stack to the search block, start the resolver, and while waiting for + resolver completion periodically check for ^C et al a la Stu's method. + If it sees a pending interrupt before the resolver completes (assuming + DISMS busy wait), it marks the query as background (so the resolver + releases the resources), allows interrupts, and gets blown away. + +I don't understand the bit about reading TTY input. I'm not sure +that's a reasonable thing to have to do anyway; many jsi restrict +their i/o designators to string pointers in the interest of +simplicity. + +The whole point of the double dip method was that allowed you to +behave like a normal piece of code and not have to busy wait or check +for pending interrupts or whatever. If there is some new piece of +data that says this method is bankrupt, please somebody tell me, but +otherwise my opinions on the matter haven't changed and I want nothing +to do with busy waits or dismissing with locks held down. + + The check for interrupt stuff naturally combines with some check for + resolver restart code which I added to allow the ISI version to restart + JEEVES on the fly. We could also allow a remap of the database to allow + dynamic replaces of FLIP as a fairly straightforward exercise, given the + system expertise to do the SMAP equivalent stuff. + +Now that would be real nice. I would just love to be able to restart +the nameserver without having to crash XX! It hasn't been a high +enough priority item for me to try to fix it, but I'm definitely interested. + + In any case, I would like to see a reasonable version available to + the world at large. I don't think that any of the current + versions are good enough. + +I agree. I like mine better than yours but would be the first to +admit that both have problems. Some of them are shared problems. Eg, +it really should not be necessary for a program to grok domain +internal string format (length byte followed by data) to be able to do +things like get the cannonical name of a host. There ought to be some +function that takes asciz arguments and returns asciz results. + + Two choices: + + 1) Combine what we have. + + I'm not really competent to do the requisite system hacking + even if I had a 6.X., but I'd be willing to reconcile the user level + stuff and help with the GTDOM level stuff. + +Clearly the right way to go if people are willing to do the work. + + 2) Distribute one or both versions. + + If, for whatever reasons, we can't converge, will MIT/Stanford + whatever agree to publicly distribute a 6.X version of whatever + lineage? + +My code is free to all comers, with the understanding that the MIT +patent office will sue the hind legs off of anybody who tries to +copywrite or sell it for profit without prior agreement by MIT. The +6.1 specific stuff is also partially Stanford property; I don't know +what their current policy is but I expect it isn't too different, +especially considering the obvious mixed parentage of the code in +question. + +All source code on XX is public read except where it is either a +security risk to us or a violation of contract. Neither applies here. +We might even be willing to handle tape distribution (probably +unnecessary given that this is network specific code) at cost + beer +money. + + I'd appreciate you comments. I'll be up at SRI for the rest of the week + and will probably try to bang on the Stanford domain guru's doors. I + really hope somebody can come up with a reasonable TOPS-20 version. + +Me too. Above acrimonious comments notwithstanding, I'd like to get +something out in the field already. I'm willing to put some work into +it, although not a whole lot. My C code is about 2/3 finished, +assuming that the new SRI version of the KCC compiler and runtimes is +more reliable than the old one from Stanford. If it looks like less +work to finish the GTDOM% interface for that code I'll do that +instead. I intend to keep using your nameserver in any case; I have +no need or desire to write one of my own (well, maybe a stripped down +TCP / Chaos-stream version for zone transfers, but that's a simple and +special case). + +--Rob + +0,, +*** EOOH *** +Rcvd-Date: 29-Oct-86 20:14:52-EST +Mail-From: SRA created at 29-Oct-86 20:14:51 +Date: Wed, 29 Oct 1986 20:14 EST +Message-ID: +From: Rob Austein +To: klh@SRI-NIC.ARPA, ian@SRI-NIC.ARPA +cc: sra@XX.LCS.MIT.EDU +Subject: new KCC + +Could I get a tape copy of KCC? I've been trying to get it via FTP +from SRI-NIC and from Sierra, but some of the files aren't +world-readable and some of the sources are out of sync and the whole +thing is a depressing mess. + +Should I send you a tape or send you money or what? I'd like to get +this ASAP, I've got some domain code that I want to get debugged (the +old Stanford KCC I have is so riddled with bugs that it's essentially +useless for this). + +Thanks.... + +--Rob + +0, answered,, +*** EOOH *** +Rcvd-Date: 29-Oct-86 20:47:23-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Wed 29 Oct 86 20:47:20-EST +Date: Wed 29 Oct 86 17:42:30-PST +From: Ken Harrenstien +Subject: Re: new KCC +To: SRA@XX.LCS.MIT.EDU, ian@SRI-NIC.ARPA +cc: KLH@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12250794213.13.KLH@SRI-NIC.ARPA> + +When did you last try? As far as I know, everything is set up properly +and I'd like specifics if it isn't. Retrieve the file KCCDIST:INSTAL.DOC +for information. + +We are probably going to charge something like $100 for tapes as it is +kind of a pain. + +There have been a couple of bug fixes to this release, so let me know +after you have the stuff. + +Hmm, you aren't on INFO-KCC, are you? Otherwise you would have heard +about the latest release... + +0,, +*** EOOH *** +Rcvd-Date: 29-Oct-86 22:04:01-EST +Mail-From: SRA created at 29-Oct-86 22:03:59 +Date: Wed, 29 Oct 1986 22:03 EST +Message-ID: +From: Rob Austein +To: Ken Harrenstien +Cc: Ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU +Subject: New KCC +In-reply-to: Msg of 29 Oct 1986 20:42-EST from Ken Harrenstien + + Date: Wednesday, 29 October 1986 20:42-EST + From: Ken Harrenstien + + When did you last try? As far as I know, everything is set up properly + and I'd like specifics if it isn't. Retrieve the file KCCDIST:INSTAL.DOC + for information. + +I think all the code errors and sync errors are from the version from +Sierra. The problems I've had getting the stuff from NIC are things +like FTP trashing files (I've been retrieving with SET KEEP VERSION to +try and keep generation numbers in sync, but when FTP loses on a +transfer I end up with garbage even if I had a decent file before), at +one point what looked like either a read protection error for every +file I tried or else SS: was offline. Plus of course the usual +hassles getting a connection to the machine at all. So except for the +possible protection lossage it's not anything you did. The request +for a tape was prompted by my housemate, who was getting tired of +listening to me cuss as I experienced all these network hassles. + + We are probably going to charge something like $100 for tapes as it is + kind of a pain. + +Well, the money isn't that much of an issue, but I'll try it again +tonight at some dark hour and let you know if that wins. But some of +the code isn't public read, is it? (Like KCCDIST:<.LIB.USYS>.) How +do I get that stuff, or don't I? + + There have been a couple of bug fixes to this release, so let me know + after you have the stuff. + + Hmm, you aren't on INFO-KCC, are you? Otherwise you would have heard + about the latest release... + +Right, I'm not. Add me, please? + +I'm having a little more luck with the code I've already got since I +got copies of LIBC and LIBCKX from NIC and recompiled/linked KCC +itself to use extended addressing. Some of the code I'm using is +reading a 40 page MONSYM.H file, which apparently eats more memory +than's available in section zero. + +BTW, I have an implementation of FLD() (a la MACSYM.MAC) which is real +handy for systems hacking. It's yours if you want it. In fact, being +all of one line long, here it is, just hit your D key if you don't +want it: + +#define FLD(bits,mask) ((((mask)&(-(mask)))*(bits))&(mask)) + +What are you using to generate your MACSYM.H and MONSYM.H files? I'm +using the MIDAS syntax output from CVTUNV and reformatting it with +TECO, but that's a bit of a pain. + +Thanks.... + +--Rob + +0,, +*** EOOH *** +Rcvd-Date: 30-Oct-86 03:08:49-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Thu 30 Oct 86 03:08:43-EST +Date: Thu 30 Oct 86 00:06:28-PST +From: Ken Harrenstien +Subject: Re: What happened this evening +To: Vivian@SRI-NIC.ARPA, sys-staff@SRI-NIC.ARPA +cc: KLH@SRI-NIC.ARPA, sra@XX.LCS.MIT.EDU +In-Reply-To: <12250622833.16.VIVIAN@SRI-NIC.ARPA> +Message-ID: <12250864111.17.KLH@SRI-NIC.ARPA> + +KCCDIST on SS: is still trashed. In particular complains +about bad directory format, and at least one other subdirectory, <.LIB>, is +missing completely. This really makes me feel good when I just assured +someone at MIT that KCCDIST contained our complete, consistent distribution +and that he should get everything from there! + +Please flush that directory and completely restore it. It should not have +had any changes for at least a couple of weeks. + +0, answered,, +*** EOOH *** +Rcvd-Date: 30-Oct-86 03:14:23-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Thu 30 Oct 86 03:14:11-EST +Date: Thu 30 Oct 86 00:12:23-PST +From: Ken Harrenstien +Subject: Re: New KCC +To: SRA@XX.LCS.MIT.EDU +cc: Ian@SRI-NIC.ARPA, KLH@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12250865188.17.KLH@SRI-NIC.ARPA> + +I'll add you to INFO-KCC. I just found that KCCDIST got royally messed up +in a recent disk failure and someone neglected to restore it properly, so +hold off on your transfer attempt until further notice. Sorry... +I'll forward the NEWS.TXT file in another message so you'll know what +the stuff contains. + +I don't think we use anything to generate MACSYM.H or MONSYM.H; we just +took what Sierra had. Since most of our code is oriented towards portability +and uses STDIO whenever possible, we haven't had much need for most of +the OS symbols. As you noticed, the sheer number of them does cause problems. + +0,, +*** EOOH *** +Rcvd-Date: 30-Oct-86 03:18:39-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Thu 30 Oct 86 03:18:36-EST +Date: Thu 30 Oct 86 00:16:47-PST +From: Ken Harrenstien +Subject: news.txt +To: sra@XX.LCS.MIT.EDU +cc: klh@SRI-NIC.ARPA, ian@SRI-NIC.ARPA +Message-ID: <12250865990.17.KLH@SRI-NIC.ARPA> + +On second thought you probably already have that (or have the latest C:CC.DOC) +so it isn't necessary; you couldn't have made the extended version otherwise. +Since you are presumably running with the stuff you got from our C: and +SYS:CC.EXE I would suggest you stick with it until the 3rd distribution is +ready, since there are a few bug fixes that will render the KCCDIST sources +inconsistent with your current binaries (that is the price of gobbling +things from C: instead of KCCDIST:). + +0,, +*** EOOH *** +Rcvd-Date: 30-Oct-86 13:04:20-EST +Mail-From: SRA created at 30-Oct-86 13:04:13 +Date: Thu, 30 Oct 1986 13:04 EST +Message-ID: +From: Rob Austein +To: Ken Harrenstien +Cc: Ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU +Subject: New KCC +In-reply-to: Msg of 30 Oct 1986 03:12-EST from Ken Harrenstien + +Ok, I will hold off on getting a new copy until I see a new +distribution notice. + +I may write up the few lines of TECO code necessary to make MACSYM.H +and MONSYM.H as an EMACS format library, since I'm tired of doing it +by hand. + +It would be nice if things like fflush() worked "right" on network +connections. This probably means using SOUTR% instead of SOUT%. +Similarly, fclose() for TCP should probably do the kludge of using +TCOPR% to send a FIN down the stream before closing it. In both of +these cases the right place to do this stuff is probably down in +close() and write() or someplace similar. Once I get a consistant set +of sources I will probably code this up. Right now I'm just using my +own network code with jsys() calls every four lines. + +Any changes I make you are of course welcome to. + +Thanks for the news, I'm relieved to know I wasn't halucinating +although I'm sorry that the problem turned out to be a trashed disk. + +--Rob + +0, answered,, +*** EOOH *** +Rcvd-Date: 30-Oct-86 14:49:14-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Thu 30 Oct 86 14:48:59-EST +Date: Thu 30 Oct 86 11:43:17-PST +From: Ian Macky +Subject: Re: New KCC +To: SRA@XX.LCS.MIT.EDU +cc: klh@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12250990963.22.IAN@SRI-NIC.ARPA> + +i am using C code for a couple servers now. i used to use SOUTR's as +the general-case output call, but that didn't seem right. then i put +in a hook for "finish" at close time, that did the TCOPR; but then it +was unnecessary for my application, since the C code was running as an +inferior of the listening program, which did its own force and close +after the inferior halted. the inferior just got its primary I/O +redirected by the listener, and knew not to follow the re-direction and +try to close the file itself. + +but perhaps that finish call (or _finish if an internal hook) ought to +be put back in. dunno... + +--ian + +0,, +*** EOOH *** +Rcvd-Date: 30-Oct-86 15:17:56-EST +Mail-From: SRA created at 30-Oct-86 15:16:53 +Date: Thu, 30 Oct 1986 15:16 EST +Message-ID: +From: Rob Austein +To: Ian Macky +Cc: klh@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU +Subject: fflush() and SOUTR% +In-reply-to: Msg of 30 Oct 1986 14:43-EST from Ian Macky + +The simplest sollution I can think of is to open the TCP connection in +.TCMWH mode and then use SOUTR% for fflush(). This shouldn't load +things too badly, and it has the advantage that the C code's idea of +the buffering state has some vauge resemblance to reality. + +Chaosnet it shouldn't hurt to do SOUTR% all the time. + +This doesn't address people hacking tapes, but then I don't think the +present code does either. + +I suppose another (really gross) way to deal with this would be a +global variable that gets set to one or zero depending on whether the +code wants SOUT% or SOUTR%. + +--Rob + +0,, +*** EOOH *** +Rcvd-Date: 30-Oct-86 16:50:10-EST +Mail-From: SRA created at 30-Oct-86 16:50:03 +Date: Thu, 30 Oct 1986 16:50 EST +Message-ID: +From: Rob Austein +To: bug-kcc@SRI-NIC.ARPA +cc: sra@XX.LCS.MIT.EDU +Subject: Bad generated code + +This might be the fault of the weird state my compiler/library etc are +in. Here's what I get. Note the lack of an ADJSP at label "foo:" and +the -2(P) offset in the LDB pointer. + +---------------- +Source file: +---------------- +struct mumble { + unsigned a : 2; + unsigned b; +}; + +foo(h) + struct mumble *h; +{ + int c; + if(!h->a) + return; +} +---------------- +FAIL output: +---------------- + TITLE foo + .REQUEST C:LIBc.REL + $$CVER==<1,,1> + INTERN $$CVER + OPDEF ADJBP [IBP] +DEFINE %CHRBP(A,M) +< SETO A, + ADJBP A,M +> +IFNDEF ERJMP,< OPDEF ERJMP [JUMP 16,] > +OPDEF ERJMPA [ERJMP] +OPDEF XMOVEI [SETMI] + DEFINE IFIW + TWOSEG 400000 + RELOC 0 + RELOC 400000 + DEFINE %%CODE + DEFINE %%DATA +PURGE IFE,IFN,IFG,IFGE,IFL,IFLE,IFDEF,IFNDEF,IFIDN,IFDIF +foo: + LDB 4,[420237,,-2] + CAIN 4,0 + POPJ 17, + POPJ 17, + +$$CPKI==0 + INTERN $$CPKI +$$CPKA==0 + INTERN $$CPKA + + LIT + EXTERN $$$CPU + EXTERN $$$CRT + INTERN foo + END + +0,, RemindNow, +*** EOOH *** +Rcvd-Date: 31-Oct-86 03:42:17-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Fri 31 Oct 86 03:42:14-EST +Date: Fri 31 Oct 86 00:41:14-PST +From: Ken Harrenstien +Subject: Re: Bad generated code +To: SRA@XX.LCS.MIT.EDU, bug-kcc@SRI-NIC.ARPA +cc: KLH@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12251132584.17.KLH@SRI-NIC.ARPA> + +Congratulations on finding a Real Bug. This has been fixed as of KCC 535. + +The problem was that whoever wrote the code to optimize out ADJSPs forgot +to have it check for indexing in byte-pointer literals. + +0, answered,, +*** EOOH *** +Rcvd-Date: 31-Oct-86 04:37:51-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Fri 31 Oct 86 04:37:16-EST +Date: Fri 31 Oct 86 01:35:14-PST +From: Ken Harrenstien +Subject: Re: New KCC +To: SRA@XX.LCS.MIT.EDU +cc: Ian@SRI-NIC.ARPA, KLH@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12251142413.17.KLH@SRI-NIC.ARPA> + +As you say, the right place for any device-dependent stuff is in the +lower-level "kernel" routines such as write() and close(). STDIO is +supposed to use only those functions, in an attempt to keep it as +portable as possible for unix-type systems. There is already a +device-type index in the FD data structures, so special things can +be done. + +I'd like to minimize those special things, though. For example, if +you just open the connection in interactive mode, each SOUT% will +force out the data as if a SOUTR% was done. You can still have +complete control over the buffering, even with STDIO, by using +setbuffer() and/or fflush(). Along the same lines, I'm not sure why +the TCOPR% should be necessary to send a FIN, since CLOSF% of a TCP +JFN is supposed to do this (as long as CZ%ABT is not set). + + +As for MONSYM/MACSYM I'm trying to find out where those files came +from. They are clearly machine generated. I suppose it would not be +too hard to simply modify CVTUNV or (ideally) re-do it in C. + +0,, +*** EOOH *** +Rcvd-Date: 31-Oct-86 13:07:22-EST +Mail-From: SRA created at 31-Oct-86 13:07:20 +Date: Fri, 31 Oct 1986 13:07 EST +Message-ID: +From: Rob Austein +To: Ken Harrenstien +Cc: Ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU +Subject: New KCC +In-reply-to: Msg of 31 Oct 1986 04:35-EST from Ken Harrenstien + +All I can say about SOUT%/SOUTR% is that back about a year ago when I +was (for some forgotten reason) doing FINGER protocol negotiations by +hand from DDT, SOUT% did not work right (foreign machine never got +query string) no matter what mode I used, but SOUTR% did work. + +The TCOPR% .TCFIN kludge is a well known screw in the TCP: device code +(not in the BBN interface, this is something weird that DEC did that +nobody has ever tracked down). Doing CLOSF% on a TCP: JFN does indeed +send a FIN. But you will hang forever in the CLOSF% (waiting for the +FIN on the input side of the bi-directional JFN). The three +cannonical ways of coping with this are: get the other end to send you +a FIN first (in which case CLOSF% doesn't hang), use TIMER% to break +out of the CLOSF% then do a second CLOSF% with CZ%ABT set (which +doesn't send a TCP RESET because the first CLOSF% already sent the +FIN), or use TCOPR% to send the FIN by hand before going into the +CLOSF% in the first place (which appears to work for some unfathomed +reason). Of the three methods I find the TCOPR% one the least painful +for use in generic programs. + +As you have probably noticed, the Twenex TCP code is somewhat, shall +we say, eccentric. + +--Rob + +0, answered,, +*** EOOH *** +Rcvd-Date: 31-Oct-86 13:24:35-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Fri 31 Oct 86 13:23:55-EST +Date: Fri 31 Oct 86 10:18:25-PST +From: Ken Harrenstien +Subject: Re: New KCC +To: SRA@XX.LCS.MIT.EDU +cc: Ian@SRI-NIC.ARPA, KLH@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12251237658.17.KLH@SRI-NIC.ARPA> + +Well, inasmuch as SOUTR% is exactly like SOUT% except for certain devices +(tape and TCP) it's OK to use that in write(). In fact this is just how +UNIX handles raw mode output to magtape; each write() call outputs one +record. As for the close(), I guess we can insert a special case for that. +What bothers me is that the device type code for TCP: may differ depending +on the particular site; our monitor here appears to use a Stanford +conditional when it sets that value. + +Alternatively we could simply fix the case of CLOSF% on TCP:. I have looked +at the code and suspect it can be fixed fairly easily (if it hasn't +already been done for our version of the monitor). + +0,, +*** EOOH *** +Rcvd-Date: 31-Oct-86 14:39:25-EST +Mail-From: SRA created at 31-Oct-86 14:39:22 +Date: Fri, 31 Oct 1986 14:39 EST +Message-ID: +From: Rob Austein +To: Ken Harrenstien +Cc: Ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU +Subject: New KCC +In-reply-to: Msg of 31 Oct 1986 13:18-EST from Ken Harrenstien + + Date: Friday, 31 October 1986 13:18-EST + From: Ken Harrenstien + + Well, inasmuch as SOUTR% is exactly like SOUT% except for certain devices + (tape and TCP) it's OK to use that in write(). In fact this is just how + UNIX handles raw mode output to magtape; each write() call outputs one + record. + +Fine. + + As for the close(), I guess we can insert a special case for that. + What bothers me is that the device type code for TCP: may differ depending + on the particular site; our monitor here appears to use a Stanford + conditional when it sets that value. + +I think that's just paranoia. DEC didn't put the symbol .DVTCP into +MONSYM for some reason I have never figured out. I'm pretty sure all +existing monitors use 025 for the value. You can always use +#ifndef/#endif if you're worried about it. + + Alternatively we could simply fix the case of CLOSF% on TCP:. I + have looked at the code and suspect it can be fixed fairly easily + (if it hasn't already been done for our version of the monitor). + +If you are willing to do that it'd be a real win. You are probably +one of the few people in the universe who has a chance of touching +that code and living to tell the tale.... + +--Rob + +1,, +Rcvd-Date: 31-Oct-86 22:37:47-EST +Return-Path: <@Score.Stanford.EDU:KLH@SRI-NIC.ARPA> +Received: from Score.Stanford.EDU by XX.LCS.MIT.EDU with TCP; Fri 31 Oct 86 22:37:30-EST +Received: from SRI-NIC.ARPA by SU-SCORE.ARPA with TCP; Fri 31 Oct 86 13:45:04-PST +Date: Fri 31 Oct 86 13:44:19-PST +From: Ken Harrenstien +Subject: Device type 025? +To: tops-20@SU-SCORE.ARPA +cc: klh@SRI-NIC.ARPA +Message-ID: <12251275139.17.KLH@SRI-NIC.ARPA> + +*** EOOH *** +Date: Friday, 31 October 1986 16:44-EST +From: Ken Harrenstien +To: tops-20@SU-SCORE.ARPA +cc: klh@SRI-NIC.ARPA +Re: Device type 025? + +There seems to be a conflict here. In MONSYM.MAC, the symbol .DVADS is +defined as 025 for "AYDIN DISPLAY" (whatever that is). However, in the +INIDVT table in STG.MAC (which sets up DEVCHR), 025 is used as the device +type for the TCP: device. There is no .DVTCP symbol, although one would +reasonably expect this. I find it hard to believe that the INIDVT table +doesn't use symbolic .DVxxx constants, and even harder to believe that +there is no .DVxxx for TCP:. This is for the 6.1 sources from Stanford. + +Considering the mess things are in, I'm not sure whether it is safe to +write portable code which does a DVCHR% on a JFN to see whether it is +a TCP stream or not. + +0,, +*** EOOH *** +Rcvd-Date: 1-Nov-86 09:54:17-EST +Return-Path: <@Score.Stanford.EDU:budd@BU-CS.BU.EDU> +Received: from Score.Stanford.EDU by XX.LCS.MIT.EDU with TCP; Sat 1 Nov 86 09:54:10-EST +Received: from CSNET-RELAY.ARPA by SU-SCORE.ARPA with TCP; Sat 1 Nov 86 05:40:01-PST +Received: from bu-cs.bu.edu by CSNET-RELAY.ARPA id aa15685; 1 Nov 86 3:15 EST +Received: by bu-cs.bu.edu (5.31/4.7) + id AA16418; Sat, 1 Nov 86 01:46:54 EST +Return-Path: +Received: by bucsd.bu.edu (5.31/4.7) + id AA26345; Sat, 1 Nov 86 01:46:48 EST +Date: Sat, 1 Nov 86 01:46:48 EST +From: budd@BU-CS.BU.EDU +Message-Id: <8611010646.AA26345@bucsd.bu.edu> +To: KLH@SRI-NIC.ARPA, tops-20@SU-SCORE.ARPA +Subject: Re: Device type 025? + +I recall seeing that internal versions of 6.1 (pre release) +used .DVNET (ie; the old NCP NET device) for TCP. + +You can always do a STDEV on /TCP/ to get a device designator. +And determine the correct number from that. + +-phil + +0,, +*** EOOH *** +Rcvd-Date: 1-Nov-86 04:03:29-EST +Return-Path: <@Score.Stanford.EDU,@SUMEX-AIM.ARPA:MRC@PANDA> +Received: from Score.Stanford.EDU by XX.LCS.MIT.EDU with TCP; Sat 1 Nov 86 04:03:11-EST +Received: from SUMEX-AIM.ARPA by SU-SCORE.ARPA with TCP; Sat 1 Nov 86 00:57:24-PST +Received: from PANDA by SUMEX-AIM.ARPA with Cafard; Sat 1 Nov 86 00:56:04-PST +Date: Sat 1 Nov 86 00:43:12-PST +From: Mark Crispin +Subject: Re: Device type 025? +To: KLH@SRI-NIC.ARPA +cc: TOPS-20@Score.Stanford.EDU +In-Reply-To: <12251275139.17.KLH@SRI-NIC.ARPA> +Postal-Address: 1802 Hackett Ave.; Mountain View, CA 94043-4431 +Phone: +1 (415) 968-1052 +Message-ID: <12251395085.7.MRC@PANDA> + +Ken - + + I agree that the absence of a .DVTCP symbol is a bug, but +I don't believe it is a good idea to have device dependence +unless it is absolutely necessary. That is, your routines that +may run on more than just TCP streams should only use the device +independent I/O jsi and *not* MTOPR%, TCOPR%, IPOPR%, etc. You +should only have TCP-dependent code at a level which already knows +that the stream is TCP. + + In case you didn't know, the preferred way to do a "push" on +a TCP stream is to use SOUTR% instead of SOUT% when you want to +push. This is device-independent. + + It requires a bit of discipline to be device-independent, but +it is definitely possible. My network software is all device- +independent. I have never needed to look up the device type via +DVCHR%. I suspect this is true of DEC's programmers as well, which +is probably why the DVCHR% types haven't been well-maintained. + +-- Mark -- + +0,, +*** EOOH *** +Rcvd-Date: 3-Nov-86 14:30:50-EST +Mail-From: SRA created at 3-Nov-86 14:30:45 +Date: Mon, 3 Nov 1986 14:30 EST +Message-ID: +From: Rob Austein +To: Mark Crispin +cc: klh@SRI-NIC.ARPA, sra@XX.LCS.MIT.EDU +Subject: Device type 025? +In-reply-to: Msg of 1 Nov 1986 03:43-EST from Mark Crispin + +As it happens the device dependent code KLH was talking about was +something I suggested for the KCC low-level support code. Specificly, +the kludge to do a TCOPR% .TCFIN call on a TCP JFN prior to closing +it. + +I've looked at your code. You work around this lossage by using a +TIMER% interrupt to get out of the hung CLOSF% call. That's not +really appropriate for KCC, I think, which is why I suggested to Ken +that he do it with TCOPR%. + +--Rob + +0,, +*** EOOH *** +Rcvd-Date: 3-Nov-86 16:26:09-EST +Mail-From: SRA created at 3-Nov-86 16:26:01 +Date: Mon, 3 Nov 1986 16:26 EST +Message-ID: +From: Rob Austein +To: Alan Bawden +Cc: KLH@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU +Subject: winning C compiler? +In-reply-to: Msg of 3 Nov 1986 14:13-EST from Alan Bawden + +It's been distributed for Twenex. I have a (slightly inconsistant) +copy on XX. I don't think the ITS runtimes are finished. At this +point it looks like the fastest path to getting code running on ITS +would be to cross-compile C code to FAIL code on Twenex (with the +right libraries) then assemble the FAIL code on ITS. Ugly, but it'll +do to boostrap a copy of the compiler on ITS and debug the runtimes. +Once that's done we can think about nice things like using MIDAS +instead of FAIL. See XXSRC: KCCDIST.KCC; PORT DOC if you are +interested in details. + +I don't know what Ken and Ian have been doing with this lately other +than what's written in that file and in the comments alongside some of +the runtime code. + +--Rob + +0, answered,, +*** EOOH *** +Rcvd-Date: 3-Nov-86 17:45:59-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Mon 3 Nov 86 17:45:51-EST +Date: Mon 3 Nov 86 13:43:49-PST +From: Ken Harrenstien +Subject: Re: winning C compiler? +To: SRA@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU +cc: KLH@SRI-NIC.ARPA, ian@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12252061482.14.KLH@SRI-NIC.ARPA> + +That's right, the ITS version of the runtimes isn't finished. There are +still a couple of places in KCC itself which need some work in order to +produce MIDAS code, so for the time being FAIL is the assembler of choice. + +If you read the last portion of PORT.DOC (about porting to ITS) you'll +have an idea of the issues. The most important one, in my opinion, is +STINK. I think the best way to proceed would be to modify STINK to +understand DECREL format. We could conceivably modify the DEC loader +to run on ITS, but I really want to avoid any source code legality +problems. STINK will have to be fixed up anyway so that KCC can +invoke it automatically with all the right arguments. + +One problem with MIDAS which I didn't mention is that MIDAS, unlike +FAIL, makes no distinction between opcodes, labels, and random +symbols (this is why OPDEFs are required in MACRO/FAIL). This means +that there is a considerably greater chance of encountering a symbol +conflict with MIDAS, for example if someone tries to write a function +called "blt" or "move" or "setz". MIDAS also has many more pseudo names. + +Finally, remember that if KCC is ever spiffed up to output .REL files +directly, those are going to be in DECREL format. + +Look around and think about it if you're interested. + +1, answered,, +Rcvd-Date: 4-Nov-86 18:35:01-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Tue 4 Nov 86 18:34:49-EST +Date: Tue 4 Nov 86 14:38:59-PST +From: Ken Harrenstien +Subject: ITS LINK +To: ALAN@AI.AI.MIT.EDU +cc: ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, KLH@SRI-NIC.ARPA +In-Reply-To: <[AI.AI.MIT.EDU].114261.861104.ALAN> +Message-ID: <12252333668.36.KLH@SRI-NIC.ARPA> + +*** EOOH *** +Date: Tuesday, 4 November 1986 17:38-EST +From: Ken Harrenstien +To: ALAN@AI.AI.MIT.EDU +cc: ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, KLH@SRI-NIC.ARPA +Re: ITS LINK + +If we are real lucky, STINKR might be a STINK modified to know about +DEC-type .REL files. I think it's worth looking into; at worst it might +turn out to have a little more documentation in it, since AS would have had +to learn something about STINK format. The obvious place to start +looking is in the DM backup tape catalog. I guess that would mean restoring +all of the .TAPE0 etc files somewhere and then searching them. + +The issue of the linking loader is really the most crucial problem. The +C library can easily be fixed up for ITS; that's straightforward. FAIL +already works on ITS, and there is even a STKTRN routine it can call which +translates FAIL output into STINK format. But I have never actually +tested it, and I'm not sure how reliable STINK is. I have looked at the +STINK code recently, and it is a perfect example of how not to write +programs -- pages of uncommented code, lots of funnies played with the +stack pointer, lots of numerical constants... sigh. If there were just +some description of STINK format somewhere it would be vastly easier to +decide what to do. In particular, whether to stay with STINK or switch to +DECREL. I wish we could load DECREL format files. It used to be possible +with the DECUUO program. + +I looked at the current DEC linking loader source. It has nigh upwards of +15-20 modules each of which starts with two pagefuls of dire warnings +about licensing and copyrighting and restrictions and stuff, which makes me +uncomfortable. If we could dig up the source for an older version of LINK +(say, from TENEX, or even from the ITS DECUUO stuff) then we could probably +adapt it easily and live with that. + +An even more radical suggestion would be to adopt the GNU relocatable file +format, and then simply run the GNU loader (written in C). This would +have the advantage of being much easier to maintain, and allowing us to +hack long symbol names, and that sort of thing. I suspect it is too much +work for now, however. + +1,, +Rcvd-Date: 4-Nov-86 20:02:09-EST +Mail-From: SRA created at 4-Nov-86 20:02:07 +Date: Tue, 4 Nov 1986 20:02 EST +Message-ID: +From: Rob Austein +To: Ken Harrenstien +Cc: ALAN@AI.AI.MIT.EDU, ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, + jtw@XX.LCS.MIT.EDU +Subject: ITS LINK +In-reply-to: Msg of 4 Nov 1986 17:38-EST from Ken Harrenstien + +*** EOOH *** +Date: Tuesday, 4 November 1986 20:02-EST +From: Rob Austein +To: Ken Harrenstien +cc: ALAN@AI.AI.MIT.EDU, ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, + jtw@XX.LCS.MIT.EDU +Re: ITS LINK + +I don't think STINKR groks DECREL format. Its strengths were things +like multiple automaticly relocatable PSECTS, which would allow us to +have the high segment automaticly get put right after the low segment +and thus leave much more of the limited one section address space for +use by malloc() and the stack. + +Again, JTW is probably the only person who knows about this anymore. +He also made it fairly clear that (1) he knows he's the logical person +to work on this project and (2) that he doesn't have time. I suspect +he's right on both counts. + +1,, +Rcvd-Date: 4-Nov-86 23:00:40-EST +Received: from SPEECH.MIT.EDU by XX.LCS.MIT.EDU via Chaosnet; 4 Nov 86 23:00-EST +Date: Tue 4 Nov 86 23:01:39-EST +From: "John Wroclawski" +Subject: Re: ITS LINK +To: SRA@XX.LCS.MIT.EDU, KLH@SRI-NIC.ARPA +cc: ALAN@AI.AI.MIT.EDU, ian@SRI-NIC.ARPA +Message-ID: <12252392408.38.JTW@MIT-SPEECH> + +*** EOOH *** +Date: Tuesday, 4 November 1986 23:01-EST +From: "John Wroclawski" +To: SRA@XX.LCS.MIT.EDU, KLH@SRI-NIC.ARPA +cc: ALAN@AI.AI.MIT.EDU, ian@SRI-NIC.ARPA +Re: ITS LINK + +So, KLH is dead right as usual. + +STINKR is a Snyderism written in C some ages ago to support the +portable C compiler he did for his MS thesis. It groks STINK format +only, with the further kludge that it is able to do PSECT-like things +when used in conjunction with a set of macros he wrote for MIDAS. It +worked OK but not perfectly; since MIDAS doesn't -really- have +multiple program counters it used an expanded highseg-loseg approach, +with the obvious problems resulting if you made any one segment too +big or used large negative offsets. It had a couple of other nice +features, being able to run init code in the linked-image address +space, etc. + +The source for STINKR is locked up in an encrypted file on an XX backup +tape somewhere. We might find it but the only people that ever knew the +key were AS and Elliot Moss. + +So it's not a great idea. But if someone finishes up the MIDAS code +for KCC we could use it, at least for bootstrapping. + +I remember looking at STINK once a while back and deciding it was +hopeless. I suppose it would be a little more reasonable if we had +some documentation on the format. Apparently Alan has found some +internal doc of some sort which might help. + +Does anyone hack FAIL anymore? How hard would it be to get it to accept +long symbol names and spit out the appropriate "new" DEC Rel block +types? + +1,, +Rcvd-Date: 5-Nov-86 03:00:52-EST +Received: from AI.AI.MIT.EDU by XX.LCS.MIT.EDU via Chaosnet; 5 Nov 86 03:00-EST +Date: Wed, 5 Nov 86 03:02:33 EST +From: Alan Bawden +Subject: ITS LINK +To: KLH@SRI-NIC.ARPA +cc: ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, JTW@AI.AI.MIT.EDU, + MOON@AI.AI.MIT.EDU +In-reply-to: Msg of Tue 4 Nov 86 14:38:59-PST from Ken Harrenstien +Message-ID: <[AI.AI.MIT.EDU].114530.861105.ALAN> + +*** EOOH *** +Date: Wednesday, 5 November 1986 03:02-EST +From: Alan Bawden +To: KLH@SRI-NIC.ARPA +cc: ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, JTW@AI.AI.MIT.EDU, + MOON@AI.AI.MIT.EDU +Re: ITS LINK + + Date: Tue 4 Nov 86 14:38:59-PST + From: Ken Harrenstien + ... FAIL already works on ITS, and there is even a STKTRN routine it + can call which translates FAIL output into STINK format. But I have + never actually tested it, and I'm not sure how reliable STINK is. I + have looked at the STINK code recently, and it is a perfect example of + how not to write programs -- pages of uncommented code, lots of funnies + played with the stack pointer, lots of numerical constants... sigh. If + there were just some description of STINK format somewhere it would be + vastly easier to decide what to do. In particular, whether to stay + with STINK or switch to DECREL. I wish we could load DECREL format + files. It used to be possible with the DECUUO program.... + +Currently in JTW's possession is a file of handwritten documents that Moon +and Penny found somewhere that seems to describe STINK format. It looks +like it is written in Linear A to me, but perhaps he can make something of +it. + +Is it really the case that DECUUO can no longer deal with DECREL files? +(I've never done this, but apparently this is what SYS1;TS CCL is all +about. It hasn't been changed since 1976.) + +[The DECSYS directory has been GFR'd severely while it lived on the KL. +Maybe I should take the time to restore it completely.] + +1,, +Rcvd-Date: 5-Nov-86 06:04:03-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Wed 5 Nov 86 06:03:59-EST +Date: Wed 5 Nov 86 03:03:36-PST +From: Ken Harrenstien +Subject: Re: ITS LINK +To: ALAN@AI.AI.MIT.EDU +cc: ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, JTW@AI.AI.MIT.EDU, MOON@AI.AI.MIT.EDU, + KLH@SRI-NIC.ARPA +In-Reply-To: <[AI.AI.MIT.EDU].114530.861105.ALAN> +Message-ID: <12252469222.36.KLH@SRI-NIC.ARPA> + +*** EOOH *** +Date: Wednesday, 5 November 1986 06:03-EST +From: Ken Harrenstien +To: ALAN@AI.AI.MIT.EDU +cc: ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, JTW@AI.AI.MIT.EDU, MOON@AI.AI.MIT.EDU, + KLH@SRI-NIC.ARPA +Re: ITS LINK + +I re-discovered the STKTRN routine (in SAIL;STKTRN 39) and it, at +least, is somewhat better commented, so it might help. Interestingly +enough it appears to work not by using FAIL data structures, but by +converting from ready-to-output DECREL format blocks into STINK format +blocks! So it could perhaps be extracted into a separate program. +Now all we need is the reverse-transformation routine! + +I looked at FAIL and I'd say that it is in good shape; it should be +possible to build a new version of ITS FAIL from the latest source +with little trouble. One thing to remember is that FAIL can +apparently output either a STINK or DECREL format file with full +relocatable hackery, whereas MIDAS can only do this for STINK format. +Oh, MIDAS can output a DECREL format file, but it does not have the +ability to do polish fixups etc in this format; the necessary code was +never written (in fact, DECREL inside MIDAS is almost the same thing +as an absolute assembly). Since all KCC needs is a fast assembler +without hairy macros or anything, it looks like FAIL might be the +better bet initially. + +About DECUUO -- I only meant that I had the impression DECUUO was +semi-broken owing to the severe GFR-age. I imagine that if everything +was restored, it would work as it used to. Of particular interest +would be restoration of the linking loader source code! I'm not even +sure if this ever existed, however; given the DECUUO simulation stuff, +it would only be necessary to snarf the binary from a DEC system, and +this may have been what MRC did. + +If anyone has contacts at DEC it may be possible to get permission to +use the current LINK source. I can't see what they have to lose. + +1,, +Rcvd-Date: 5-Nov-86 17:00:21-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Wed 5 Nov 86 16:59:41-EST +Date: Wed 5 Nov 86 13:53:43-PST +From: Ken Harrenstien +Subject: Re: ITS LINK +To: JTW%MIT-SPEECH@XX.LCS.MIT.EDU, SRA@XX.LCS.MIT.EDU +cc: ALAN@AI.AI.MIT.EDU, ian@SRI-NIC.ARPA, KLH@SRI-NIC.ARPA +In-Reply-To: <12252392408.38.JTW@MIT-SPEECH> +Message-ID: <12252587571.36.KLH@SRI-NIC.ARPA> + +*** EOOH *** +Date: Wednesday, 5 November 1986 16:53-EST +From: Ken Harrenstien +To: JTW%MIT-SPEECH@XX.LCS.MIT.EDU, SRA@XX.LCS.MIT.EDU +cc: ALAN@AI.AI.MIT.EDU, ian@SRI-NIC.ARPA, KLH@SRI-NIC.ARPA +Re: ITS LINK + +To answer JTW's question about FAIL, nobody really hacks it any more, +in the sense that nobody hacks MIDAS any more; it is more or less mature +and it basically isn't worth adding new features. It would be more +useful to add direct .REL generation to KCC. + +the new long-symbol DEC block types aren't really used much, I suspect. +Certainly DDT has no idea what to do with the resulting symbols, anyway! + +1, answered,, +Rcvd-Date: 7-Nov-86 22:18:39-EST +Received: from SPEECH.MIT.EDU by XX.LCS.MIT.EDU via Chaosnet; 7 Nov 86 22:18-EST +Date: Fri 7 Nov 86 22:18:18-EST +From: "John Wroclawski" +Subject: Re: ITS LINK +To: ALAN@AI.AI.MIT.EDU, KLH@SRI-NIC.ARPA +cc: ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, MOON@AI.AI.MIT.EDU +In-Reply-To: <[AI.AI.MIT.EDU].114530.861105.ALAN> +Message-ID: <12253170947.32.JTW@MIT-SPEECH> + +*** EOOH *** +Date: Friday, 7 November 1986 22:18-EST +From: "John Wroclawski" +To: ALAN@AI.AI.MIT.EDU, KLH@SRI-NIC.ARPA +cc: ian@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, MOON@AI.AI.MIT.EDU +Re: ITS LINK + +STINK is now somewhat documented; see .INFO.;STINK DOC. ITS FAIL +produces STINK format output by default; I tried things out on a +couple of very simple FAIL programs and it did the right thing. + +0,, +*** EOOH *** +Date: Fri, 14 Nov 86 15:48 EST +From: Rob Austein +Subject: 8 bit characters? +To: bug-kcc@SRI-NIC.ARPA +cc: sra@XX +Message-ID: <861114154833.1.SRA@WHORFIN.LCS.MIT.EDU> + +How difficult would it be to add support for 8-bit characters (-x=ch8 or +some such)? It would be Real Useful for two reasons: + +1) Certain things, eg UDP packets, have to be formatted into left + justified 8 bit bytes because of operating system limitations. + +2) It turns out to be extremely useful to have network data in 8 bit + bytes even when it is coming from some stream protocol, because it is + much easier to extract header fields by casting structs and + extracting bitfields. Such header fields often cross byte boundries + but rarely cross word boundries. + +In both of these cases I currently kludge things by using a five line +assembly routine to convert from 9 to 8 bit pointers. But it would be +real nice to just compile with the right switches and let the machine do +the dirty work. Lot more portable too. Seems like it shouldn't be much +trouble, especially compared to 7 bit bytes. + +--Rob + +1,, +Rcvd-Date: 14-Nov-86 20:50:13-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Fri 14 Nov 86 20:50:07-EST +Date: Fri 14 Nov 86 17:50:09-PST +From: Ken Harrenstien +Subject: Re: 8 bit characters? +To: sra@XX.LCS.MIT.EDU, bug-kcc@SRI-NIC.ARPA +cc: KLH@SRI-NIC.ARPA +In-Reply-To: <861114154833.1.SRA@WHORFIN.LCS.MIT.EDU> +Message-ID: <12254989909.16.KLH@SRI-NIC.ARPA> + +*** EOOH *** +Date: Friday, 14 November 1986 20:50-EST +From: Ken Harrenstien +To: sra@XX.LCS.MIT.EDU, bug-kcc@SRI-NIC.ARPA +cc: KLH@SRI-NIC.ARPA +Re: 8 bit characters? + +Well, this would not be portable, anyway, since even for 8-bit-byte +machines you cannot predict what the byte ordering is going to be! So +your struct declarations aren't going to work (at least, not portably) +and you will always need some machine dependent knowledge in order to +combine bytes into integer values, etc. + +I'm not too thrilled about the idea of -x=ch8 because code compiled +for 8-bit bytes won't necessarily match up properly with code compiled +for 9-bit bytes. To be safe, everything (including the library) +should be compiled with the same setting; but funny things will happen +with the library (eg STDIO won't be able to read 36-bit or 9-bit +files, and who knows what else). It is possible to win (eg as for +7-bit bytes) only if you know what you are doing and are real careful. +It's true that most KCC code will work with 7,8, or 9, but the catch +is "most". + +I'd say that using a hack routine to do your own casting conversion, +as you seem to be doing now, is the best idea. + +A more ambitious notion would be to define additional types such as "char7" +and "char8" (and I suppose "char9" for completeness). When using KCC these +would be significant; for other systems a typedef or macro would convert +them to simple "char". I'm not sure how useful these would be or whether it +is worth messing around with more incompatible extensions to C; comments +welcome. + +1, answered,, +Rcvd-Date: 11-Feb-87 00:48:09-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Wed 11 Feb 87 00:47:34-EST +Date: Tue 10 Feb 87 12:24:43-PST +From: Ken Harrenstien +Subject: Forthcoming KCC changes; comments? +To: info-kcc@SRI-NIC.ARPA +cc: klh@SRI-NIC.ARPA +Message-ID: <12277999337.15.KLH@SRI-NIC.ARPA> + +*** EOOH *** +Date: Tuesday, 10 February 1987 15:24-EST +From: Ken Harrenstien +To: info-kcc@SRI-NIC.ARPA +cc: klh@SRI-NIC.ARPA +Re: Forthcoming KCC changes; comments? + +First, the news. I am almost finished with a new version of KCC which +has some incompatible changes: + + (1) Functions that return structures now operate differently. + (2) The "short" data type is now 18 bits (halfword) instead of 36. + (3) Structure/union members of type "char" and "short" are compacted. + (4) String literals are now normally 9-bit byte strings (not 7-bit). + +and, of course, several improvements: + + * Structure copying and returning is much more efficient, using in-line + BLT or XBLT and sometimes avoiding the stack altogether. + * Integer narrowing and widening is done properly in all situations. + * Pointer constant initialization happens at load time rather than + run time, reducing the size and startup overhead for C programs. + * Unsigned integer arithmetic is completely implemented. + + +And second, a request for feedback: + + I'd like to propose an extension which recent internal changes +have now made possible, and see what you think. This extension would +amount to adding 5 new KCC-specific data types which would be called +"_KCCtype_charN" where N is one of 6, 7, 8, 9, or 18. These will act +exactly like "char" except that the bytesize would be N rather than +the default 9. Consider these declarations: + + _KCCtype_char8 packet[40]; /* An array of 8-bit bytes */ + _KCCtype_char7 *arg = "text"; /* A pointer to an ASCIZ string */ + _KCCtype_char18 useless; /* Same as "short useless;" */ + _KCCtype_char6 tmp[] = "tmp"; /* An array of SIXBIT chars */ + +In addition, there will be two special cases of interaction between these +types and string literals: + +(1) A 6-bit string literal will be stored as SIXBIT rather than + using the low 6 bits of the ASCII char values. + This does not affect integer values stored into 6-bit arrays. +(2) A cast of a string literal to (_KCCtype_charN *) will be interpreted + as a request to use N-bit bytes in the literal, rather than + just operating on the pointer to the literal (the strict C + interpretation). For example, + (_KCCtype_char7 *)"text" + Will produce a 7-bit pointer to a 7-bit (ASCIZ) string. + Strict C would otherwise make a 7-bit ptr to a 9-bit string, + which is kind of useless. + + +Here are the pros and cons I could think of: + +PRO: + * It will make it easier to write PDP-10 code which must + interface with the operating system or non-C software. + * It should be possible to completely flush the -x=ch7 switch, + which (in my opinion) has too much potential for trouble, as it + changes "char" to "_KCCtype_char7" EVERYWHERE, and certain things + cannot be made to work right. + +CON: + * It may cause problems when porting code from elsewhere which + uses those identifiers. + * It may tempt some people to write non-portable code when + portable code would work just as well. + +The last two points are the reason why the type names are +"_KCCtype_charN" instead of "charN". This almost eliminates the +chance of identifier conflict, and forces lazy people to think twice. +It also encourages them to use a #define or typedef instead (e.g. +"typedef _KCCtype_char7 char7;") which makes programs both more +readable and more amenable to porting. + +I would like comments on this proposed extension. In particular I +need to hear from anyone who depends on -x=ch7, because I would like +to flush that switch altogether. + +Thanks, +--Ken + +0, answered,, KCC, +*** EOOH *** +Rcvd-Date: 11-Feb-87 00:48:09-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Wed 11 Feb 87 00:47:34-EST +Date: Tue 10 Feb 87 12:24:43-PST +From: Ken Harrenstien +Subject: Forthcoming KCC changes; comments? +To: info-kcc@SRI-NIC.ARPA +cc: klh@SRI-NIC.ARPA +Message-ID: <12277999337.15.KLH@SRI-NIC.ARPA> + +First, the news. I am almost finished with a new version of KCC which +has some incompatible changes: + + (1) Functions that return structures now operate differently. + (2) The "short" data type is now 18 bits (halfword) instead of 36. + (3) Structure/union members of type "char" and "short" are compacted. + (4) String literals are now normally 9-bit byte strings (not 7-bit). + +and, of course, several improvements: + + * Structure copying and returning is much more efficient, using in-line + BLT or XBLT and sometimes avoiding the stack altogether. + * Integer narrowing and widening is done properly in all situations. + * Pointer constant initialization happens at load time rather than + run time, reducing the size and startup overhead for C programs. + * Unsigned integer arithmetic is completely implemented. + + +And second, a request for feedback: + + I'd like to propose an extension which recent internal changes +have now made possible, and see what you think. This extension would +amount to adding 5 new KCC-specific data types which would be called +"_KCCtype_charN" where N is one of 6, 7, 8, 9, or 18. These will act +exactly like "char" except that the bytesize would be N rather than +the default 9. Consider these declarations: + + _KCCtype_char8 packet[40]; /* An array of 8-bit bytes */ + _KCCtype_char7 *arg = "text"; /* A pointer to an ASCIZ string */ + _KCCtype_char18 useless; /* Same as "short useless;" */ + _KCCtype_char6 tmp[] = "tmp"; /* An array of SIXBIT chars */ + +In addition, there will be two special cases of interaction between these +types and string literals: + +(1) A 6-bit string literal will be stored as SIXBIT rather than + using the low 6 bits of the ASCII char values. + This does not affect integer values stored into 6-bit arrays. +(2) A cast of a string literal to (_KCCtype_charN *) will be interpreted + as a request to use N-bit bytes in the literal, rather than + just operating on the pointer to the literal (the strict C + interpretation). For example, + (_KCCtype_char7 *)"text" + Will produce a 7-bit pointer to a 7-bit (ASCIZ) string. + Strict C would otherwise make a 7-bit ptr to a 9-bit string, + which is kind of useless. + + +Here are the pros and cons I could think of: + +PRO: + * It will make it easier to write PDP-10 code which must + interface with the operating system or non-C software. + * It should be possible to completely flush the -x=ch7 switch, + which (in my opinion) has too much potential for trouble, as it + changes "char" to "_KCCtype_char7" EVERYWHERE, and certain things + cannot be made to work right. + +CON: + * It may cause problems when porting code from elsewhere which + uses those identifiers. + * It may tempt some people to write non-portable code when + portable code would work just as well. + +The last two points are the reason why the type names are +"_KCCtype_charN" instead of "charN". This almost eliminates the +chance of identifier conflict, and forces lazy people to think twice. +It also encourages them to use a #define or typedef instead (e.g. +"typedef _KCCtype_char7 char7;") which makes programs both more +readable and more amenable to porting. + +I would like comments on this proposed extension. In particular I +need to hear from anyone who depends on -x=ch7, because I would like +to flush that switch altogether. + +Thanks, +--Ken + +1,, RemindNow, +Rcvd-Date: 8-Mar-87 04:01:33-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Sun 8 Mar 87 04:01:12-EST +Date: Fri 6 Mar 87 19:41:51-PST +From: Ken Harrenstien +Subject: New KCC is ready! +To: info-kcc@SRI-NIC.ARPA +Message-ID: <12284370371.19.KLH@SRI-NIC.ARPA> + +*** EOOH *** +Date: Friday, 6 March 1987 22:41-EST +From: Ken Harrenstien +To: info-kcc@SRI-NIC.ARPA +Re: New KCC is ready! + + Finally, after a lot of sweat and blood, a new KCC is ready +with all of the features previously mentioned, plus a few more. This +should be the last incompatible version for a long time, although new +(compatible) versions will continue to spring up. Even as you read +this, some sources will already have changed; there is still a long +list of good and interesting things to do. This snapshot has been +made following the dictum that "better is the enemy of good enough" -- +a good distribution now is more useful than a better one later. + + A new, consistent set of sources is available from the usual +place, the KCCDIST: directory on SRI-NIC.ARPA. Likewise, the latest +binaries (if that is all you want) are SYS:CC.EXE, C:*.*.0, and +C:*.*.0. While things have been pretty thoroughly tested +here, there is always a small possibility that new bugs will emerge +when presented with the outside world; please send any comments, +problems, and suggestions to BUG-KCC@SRI-NIC.ARPA. + + Here follows the relevant excerpt from NEWS.TXT that +summarizes the changes: + ----------------------------- +03/06/87 KCC 557, LIBC 124: <2,,1> Third formal distribution snapshot + + IMPORTANT: this version of KCC is incompatible with previous +versions! The way that structures are returned from functions has +changed, and the layout of "char" and "short" objects in structures has +also changed. In order to enforce this, the symbol $$CVER has been +updated, and any attempt to load .REL modules which have been produced +by incompatible versions of KCC will cause LINK to complain with an +error message similar to this: + + %LNKMDS Multiply-defined global symbol $$CVER + Detected in module PRINTF from file C:LIBC.REL + Defined value = 1000001, this value = 2000001 + +This is easily remedied by re-compiling old modules. Fortunately, no +further incompatible changes are expected to be necessary. + + Nothing has really changed from the user's viewpoint. However, +there are several new features available, and some inefficiencies +corrected. The noteworthy changes are listed below, very briefly; +as usual, CC.DOC should be consulted for more complete and informative +details. + +KCC: --------------------------------------------------------------- + +KCC - Bug fixes: + A multitude of minor bug fixes too trivial to mention, almost +all having to do with incorrectly optimized code. One that wasn't +trivial was that {char c, *cp = &c;} used to produce an (int *)! + +KCC - Incompatible changes: + * "shorts" are now 18 bits long (halfwords), with sizeof(short) == 2. + * The mechanism for returning structure values from functions +is different. This is an internal change, invisible to the user, which +is much more efficient than the previous method. + * Structure members of type "char" and "short" are now packed +differently (more compactly). Any structure using these types will be +laid out differently in storage. + * Integer narrowing and widening is now done properly in all +situations. This may cause incorrectly written code to behave +differently. + * Implicit arithmetic conversions now follow the ANSI +value-preserving rules rather than the old K&R and H&S +unsigned-preserving rules. Ambiguous code may behave differently. + * "float" values are no longer automatically converted to "double", +except for function arguments. This conforms to the ANSI draft. + * The "signed" keyword (introduced by ANSI) has been implemented. + * "volatile" and "const" (also new from ANSI) are now reserved +words (but unimplemented). + +KCC - Extension: New data types: + 5 new data types have been introduced, which act like "char" +but with different byte sizes. You can now manipulate signed or +unsigned bytes of 6, 7, 8, 9, or 18 bits. This is non-portable and +intended strictly for PDP-10 machine-dependent code where efficiency +is desirable. + +KCC - Efficiency improvements: + The change to the structure handling mechanism falls in this +category. Structure copies used to always take two subroutine calls +and two copies; they now use a single in-line BLT (or a series of +single-word moves, whichever is best), and are much faster than +element-by-element copying. + KCC's constant initialization code has been improved to the point +where almost all constants are now initialized at load time rather than +at run time; a similar mechanism eliminates the code that used to generate +string constant pointers. You will see a significant difference with code +that uses many string literals; both startup time and program size are +reduced. + KCC's pointer arithmetic for byte pointers is MUCH better. +Pointer comparison and subtraction formerly used subroutine calls and +many, many instructions; both now use a handful of in-line +instructions and some magic numbers. + There are no more calls to internal run-time subroutines. +All of the operations which used to require this are now compiled +in-line, including double-int and int-double conversion, pointer +operations, and structure copying. + +KCC - unsigned and signed data: + KCC now fully supports "unsigned int" operations. Some code +that uses unsigned integers will now compile differently. Division in +particular needs many more instructions. Any integer type, "char" in +particular, may be declared as "signed" and will behave accordingly. + +KCC - Switch changes: + -L= Passes in the command string to the linking loader. + -v= (Verbosity) has been expanded; see CC.DOC. -v alone prints out + everything, including the loader command string. + -l Libraries loaded with the -l switch are now loaded in /SEARCH + mode (they evidently weren't before). + +KCC - Miscellaneous + -d=sym now produces a *.CYM file instead of *.SYM, to avoid +conflicts with LINK output files. + -P=ansi+kcc is now the default. The effects are minor and documented +in CC.DOC. The three new ANSI keywords of "signed", "const", and "volatile" +are recognized, although only the first has any real effect. + + +LIBC: --------------------------------------------------------------- + + More minor bug fixes to the LIBC stdio routines. + + open() now attempts to track down and expand logical device +names completely (thus performing what the monitor should be doing but +isn't). Thus, open("X:subdir/filename.ext",0) will work even if X is +a search path. Previously only the first device/directory could be tried. +This permits KCC #includes to work with C: defined as a search path. + + malloc() no longer allocates pages 770-777 (non-extended) or +37770-37777 (extended), so that obsolete forms of DDT can be mapped therein. + +0,, KCC, RemindNow, +*** EOOH *** +Rcvd-Date: 8-Mar-87 04:01:33-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Sun 8 Mar 87 04:01:12-EST +Date: Fri 6 Mar 87 19:41:51-PST +From: Ken Harrenstien +Subject: New KCC is ready! +To: info-kcc@SRI-NIC.ARPA +Message-ID: <12284370371.19.KLH@SRI-NIC.ARPA> + + Finally, after a lot of sweat and blood, a new KCC is ready +with all of the features previously mentioned, plus a few more. This +should be the last incompatible version for a long time, although new +(compatible) versions will continue to spring up. Even as you read +this, some sources will already have changed; there is still a long +list of good and interesting things to do. This snapshot has been +made following the dictum that "better is the enemy of good enough" -- +a good distribution now is more useful than a better one later. + + A new, consistent set of sources is available from the usual +place, the KCCDIST: directory on SRI-NIC.ARPA. Likewise, the latest +binaries (if that is all you want) are SYS:CC.EXE, C:*.*.0, and +C:*.*.0. While things have been pretty thoroughly tested +here, there is always a small possibility that new bugs will emerge +when presented with the outside world; please send any comments, +problems, and suggestions to BUG-KCC@SRI-NIC.ARPA. + + Here follows the relevant excerpt from NEWS.TXT that +summarizes the changes: + ----------------------------- +03/06/87 KCC 557, LIBC 124: <2,,1> Third formal distribution snapshot + + IMPORTANT: this version of KCC is incompatible with previous +versions! The way that structures are returned from functions has +changed, and the layout of "char" and "short" objects in structures has +also changed. In order to enforce this, the symbol $$CVER has been +updated, and any attempt to load .REL modules which have been produced +by incompatible versions of KCC will cause LINK to complain with an +error message similar to this: + + %LNKMDS Multiply-defined global symbol $$CVER + Detected in module PRINTF from file C:LIBC.REL + Defined value = 1000001, this value = 2000001 + +This is easily remedied by re-compiling old modules. Fortunately, no +further incompatible changes are expected to be necessary. + + Nothing has really changed from the user's viewpoint. However, +there are several new features available, and some inefficiencies +corrected. The noteworthy changes are listed below, very briefly; +as usual, CC.DOC should be consulted for more complete and informative +details. + +KCC: --------------------------------------------------------------- + +KCC - Bug fixes: + A multitude of minor bug fixes too trivial to mention, almost +all having to do with incorrectly optimized code. One that wasn't +trivial was that {char c, *cp = &c;} used to produce an (int *)! + +KCC - Incompatible changes: + * "shorts" are now 18 bits long (halfwords), with sizeof(short) == 2. + * The mechanism for returning structure values from functions +is different. This is an internal change, invisible to the user, which +is much more efficient than the previous method. + * Structure members of type "char" and "short" are now packed +differently (more compactly). Any structure using these types will be +laid out differently in storage. + * Integer narrowing and widening is now done properly in all +situations. This may cause incorrectly written code to behave +differently. + * Implicit arithmetic conversions now follow the ANSI +value-preserving rules rather than the old K&R and H&S +unsigned-preserving rules. Ambiguous code may behave differently. + * "float" values are no longer automatically converted to "double", +except for function arguments. This conforms to the ANSI draft. + * The "signed" keyword (introduced by ANSI) has been implemented. + * "volatile" and "const" (also new from ANSI) are now reserved +words (but unimplemented). + +KCC - Extension: New data types: + 5 new data types have been introduced, which act like "char" +but with different byte sizes. You can now manipulate signed or +unsigned bytes of 6, 7, 8, 9, or 18 bits. This is non-portable and +intended strictly for PDP-10 machine-dependent code where efficiency +is desirable. + +KCC - Efficiency improvements: + The change to the structure handling mechanism falls in this +category. Structure copies used to always take two subroutine calls +and two copies; they now use a single in-line BLT (or a series of +single-word moves, whichever is best), and are much faster than +element-by-element copying. + KCC's constant initialization code has been improved to the point +where almost all constants are now initialized at load time rather than +at run time; a similar mechanism eliminates the code that used to generate +string constant pointers. You will see a significant difference with code +that uses many string literals; both startup time and program size are +reduced. + KCC's pointer arithmetic for byte pointers is MUCH better. +Pointer comparison and subtraction formerly used subroutine calls and +many, many instructions; both now use a handful of in-line +instructions and some magic numbers. + There are no more calls to internal run-time subroutines. +All of the operations which used to require this are now compiled +in-line, including double-int and int-double conversion, pointer +operations, and structure copying. + +KCC - unsigned and signed data: + KCC now fully supports "unsigned int" operations. Some code +that uses unsigned integers will now compile differently. Division in +particular needs many more instructions. Any integer type, "char" in +particular, may be declared as "signed" and will behave accordingly. + +KCC - Switch changes: + -L= Passes in the command string to the linking loader. + -v= (Verbosity) has been expanded; see CC.DOC. -v alone prints out + everything, including the loader command string. + -l Libraries loaded with the -l switch are now loaded in /SEARCH + mode (they evidently weren't before). + +KCC - Miscellaneous + -d=sym now produces a *.CYM file instead of *.SYM, to avoid +conflicts with LINK output files. + -P=ansi+kcc is now the default. The effects are minor and documented +in CC.DOC. The three new ANSI keywords of "signed", "const", and "volatile" +are recognized, although only the first has any real effect. + + +LIBC: --------------------------------------------------------------- + + More minor bug fixes to the LIBC stdio routines. + + open() now attempts to track down and expand logical device +names completely (thus performing what the monitor should be doing but +isn't). Thus, open("X:subdir/filename.ext",0) will work even if X is +a search path. Previously only the first device/directory could be tried. +This permits KCC #includes to work with C: defined as a search path. + + malloc() no longer allocates pages 770-777 (non-extended) or +37770-37777 (extended), so that obsolete forms of DDT can be mapped therein. + +0,, KCC, +*** EOOH *** +Rcvd-Date: 13-Mar-87 02:07:18-EST +Mail-From: SRA created at 13-Mar-87 02:07:17 +Date: Fri 13 Mar 87 02:07-EST +From: Rob Austein +Subject: KCC type extensions +To: SRA@XX.LCS.MIT.EDU + +_KCCtype_char6 +_KCCtype_char7 +_KCCtype_char8 +_KCCtype_char9 +_KCCtype_char18 + +0, answered,, KCC, +*** EOOH *** +Rcvd-Date: 16-Mar-87 16:45:33-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Mon 16 Mar 87 16:45:28-EST +Date: Mon 16 Mar 87 13:46:47-PST +From: Ken Harrenstien +Subject: Re: RESET% +To: SRA@XX.LCS.MIT.EDU +cc: Bug-KCC@SRI-NIC.ARPA, KLH@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12286927173.16.KLH@SRI-NIC.ARPA> + +The RESET% isn't executed by jsys() as it is part of the low-level startup +(look at the SSTART: label). I suspect that what you are seeing is yet +another aspect of the brain-damaged way that TOPS-20 cleans things up; all +that RESET% really seems to do is say "hey, do this if you've got a spare +hour or two, will you" and then comes right back to the user program, +which then tries to do something that fails because the cleanup hasn't +yet actually been done! + +This often happens for JFNs. MIDAS, for example, is too fast for TOPS-20 +and if you give a new MIDAS command the same as a previous one (which was +interrupted) then you will often fail, because the old output-file JFN +has not yet been cleared away. Most programs don't encounter this problem +as it takes them a while to get around to doing whatever it is they are +supposed to do. I bet you never knew that DEC software was designed to be +slow! + +0,, KCC, +*** EOOH *** +Rcvd-Date: 16-Mar-87 20:57:17-EST +Mail-From: SRA created at 16-Mar-87 20:57:15 +Date: Mon, 16 Mar 1987 20:57 EST +Message-ID: +From: Rob Austein +To: Bug-KCC@SRI-NIC.ARPA +cc: sra@XX.LCS.MIT.EDU +Subject: Another bug for you (unions and structs, wrong code generated) + +I must admit that this is one of the more amusing compiler errors I +can recall seeing. It is clearly on drugs by the time it gets to the +return() statement.... + +Here's the C code: + +unsigned a,b,c,d; + +int atoina() +{ + union { + int number; + struct { + unsigned : 4; + unsigned a : 8; + unsigned b : 8; + unsigned c : 8; + unsigned d : 8; + } address; + } crock; + crock.number = 0; + crock.address.a = a; + crock.address.b = b; + crock.address.c = c; + crock.address.d = d; + return(crock.number); +} + +And here's the FAIL code: + + TITLE kccbug + .REQUEST C:LIBc.REL + $$CVER==<2,,1> + INTERN $$CVER + OPDEF ADJBP [IBP] +DEFINE %CHRBP(A,M) +< SETO A, + ADJBP A,M +> +IFNDEF ERJMP,< OPDEF ERJMP [JUMP 16,] > +OPDEF ERJMPA [ERJMP] +OPDEF XMOVEI [SETMI] + DEFINE IFIW +XBLT==<020000,,0> + TWOSEG 400000 + RELOC 0 + RELOC 400000 + DEFINE %%CODE + DEFINE %%DATA +PURGE IFE,IFN,IFG,IFGE,IFL,IFLE,IFDEF,IFNDEF,IFIDN,IFDIF + + %%DATA +a: BLOCK 1 +b: BLOCK 1 +c: BLOCK 1 +d: BLOCK 1 + + %%CODE +atoina: + PUSH 17,[0] + MOVE 4,a + DPB 4,[341017,,1] + MOVE 5,b + DPB 5,[241017,,1] + MOVE 6,c + DPB 6,[141017,,1] + MOVE 7,d + DPB 7,[41017,,1] + MOVEI 1,0 + ADJSP 17,-1 + POPJ 17, + +$$CPKI==0 + INTERN $$CPKI +$$CPKA==0 + INTERN $$CPKA + + LIT + EXTERN $$$CPU + EXTERN $$$CRT + INTERN a + INTERN b + INTERN c + INTERN d + INTERN atoina + END + + +--Rob + +0,, KCC, +*** EOOH *** +Rcvd-Date: 17-Mar-87 23:53:20-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Tue 17 Mar 87 23:53:16-EST +Date: Tue 17 Mar 87 20:55:04-PST +From: Ken Harrenstien +Subject: Re: Another bug for you (unions and structs, wrong code generated) +To: SRA@XX.LCS.MIT.EDU, Bug-KCC@SRI-NIC.ARPA +cc: KLH@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12287267283.17.KLH@SRI-NIC.ARPA> + +Boy, that one was tough. It's fixed now in the latest installed CC on +SRI-NIC (see the INFO-KCC message). There were two separate problems, +one having to do with unnamed bitfields in structures (this fouled up +the too-clever trick that was being used to know whether struct/union +parsing was at the start of a word or not!), and the other having to +do with some more oversights in the peephole optimizer -- among other +things the common subexpression code only had a limited knowledge of +what instructions modified memory, and didn't realize that byte +pointers could point to the same thing as normal instruction +addressing. I am slowly fixing all of that by using tables which +completely describe the behavior of every instruction, but full +conversion will take quite a while. + +At least the fixed code now does better optimization than before! + +0, answered,, KCC, +*** EOOH *** +Rcvd-Date: 20-Mar-87 03:24:19-EST +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP; Fri 20 Mar 87 03:24:15-EST +Date: Fri 20 Mar 87 00:24:03-PST +From: Ken Harrenstien +Subject: KCC on ITS? +To: sra@XX.LCS.MIT.EDU +cc: klh@SRI-NIC.ARPA +Message-ID: <12287829617.25.KLH@SRI-NIC.ARPA> + +Just noticed in one of your recent messages on ITS that you mumbled +something about compiler conversion. I assume you were talking about KCC. +Have any promising hackers shown up who are willing to brave the +foul innards of STINK, etc. in return for fame and... well, just fame...? + +Once again, the problem is not the ITS support, which is easy to spiff +up, or the assembler, since either MIDAS or FAIL can be used -- it is +the linking loader, or lack thereof. I suppose if people are willing +to always invoke STINK by hand, though, it would be workable. (ugh) + +1,, +Rcvd-Date: 25-Apr-87 03:46:58-EDT +Mail-From: SRA created at 25-Apr-87 03:46:56 +Date: Sat, 25 Apr 1987 03:46 EDT +Message-ID: +From: Rob Austein +To: JTW@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, Ian@SRI-NIC.ARPA, + KLH@SRI-NIC.ARPA, Moon@AI.AI.MIT.EDU, SRA@XX.LCS.MIT.EDU +Subject: STINK for ITS KCC +In-reply-to: Msg of 7 Nov 1986 22:18-EST from "John Wroclawski" + +*** EOOH *** +Date: Saturday, 25 April 1987 03:46-EDT +From: Rob Austein +To: JTW@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, Ian@SRI-NIC.ARPA, + KLH@SRI-NIC.ARPA, Moon@AI.AI.MIT.EDU, SRA@XX.LCS.MIT.EDU +Re: STINK for ITS KCC + +So I seem to recall that somebody (KLH?) thought there was still some +unsolved problem involved in having an ITS KCC invoke STINK. Maybe +I'm dense, but what's wrong with stuffing the link script down a +corelink then invoking STINK with JCL telling it to use that corelink +file as input? Or even just translating "TTY:" opened in read mode to +the corelink for STINK if for some reason the JCL thing doesn't work? +We're not trying to be tasteful here or anything, are we? + +I'm out of town for the next week, so do expect a quick answer to any +reply. + +--Rob + +1,, +Rcvd-Date: 25-Apr-87 05:03:41-EDT +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP/SMTP; Sat 25 Apr 87 05:03:29-EDT +Date: Sat 25 Apr 87 02:05:42-PDT +From: Ken Harrenstien +Subject: Re: STINK for ITS KCC +To: SRA@XX.LCS.MIT.EDU, JTW@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, + Ian@SRI-NIC.ARPA, Moon@AI.AI.MIT.EDU +cc: KLH@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12297274383.19.KLH@SRI-NIC.ARPA> + +*** EOOH *** +Date: Saturday, 25 April 1987 05:05-EDT +From: Ken Harrenstien +To: SRA@XX.LCS.MIT.EDU, JTW@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, + Ian@SRI-NIC.ARPA, Moon@AI.AI.MIT.EDU +cc: KLH@SRI-NIC.ARPA +Re: STINK for ITS KCC + +Well, there IS an unresolved problem, but it doesn't have much to do +with STINK invocation. As you point out, there are various hacks that +can be used to pass strings to STINK (and twiddling STINK to make this +simpler isn't much of an exercise). + +The problem has to do with how the C library routines are loaded. +I'm not sure how to do it. + +There is, I believe, no such thing as a library of .REL files; STINK +does not know how to search a library (the block type is there, but it +is a no-op). There seem to be only two ways to do it: First, load the +complete library .REL file, every time. Second, have a directory with +all .REL modules in it, and check each and every .REL file in that +directory to see if its definitions match any of the symbols needed by +the program. This requires that the JCL to STINK specify each one of +these filenames. + +I'm not even sure if the first approach is possible, since I'm not +sure how to construct that single .REL file in the first place. As +far as I know, ITS has no notion of a library file (containing more +than one library module). + +The second approach might be do-able by keeping a sort of @ file in +the directory, which can be gobbled by the invocation process, so that +KCC doesn't have to know about all the files itself. It will, +however, be rather slow. Whether this is unacceptable I don't know, +but it will certainly not be as fast as a traditional library file +search, and won't encourage people to use C. + +It would sure be nice if STINK knew how to do a real library search. +It would sure be nice if anyone could hold his nose long enough to +accomplish this or anything else. + +There is one other monkey wrench to consider. Namely, it is probably +not a good idea to use MIDAS. The reason for this is that MIDAS does +not distinguish between label symbols and opcode symbols. Thus you +will be in deep shit if the C programmer has routines called things +like "move" or "push". FAIL and MACRO differentiate the two usages +(this is why their OPDEF pseudo is needed to define new ops). ITS +FAIL is able to translate DECREL output into STINK-format output, but +whether it does the right thing for library entry point +linkages/requests I don't know. Actually, none of them is perfect +since there is always the danger of a pseudo-op name conflicting with +a user symbol (e.g. IFE, IFL, etc); only if KCC produced a binary REL +directly could this be avoided (and if KCC were to do this, what +format do you think it would use? Yep, DECREL.) For the time being +we may simply have to ignore this problem and hope there are not too +many conflicts; the UNIX AS assembler may not be immune to conflicts +either. + +If anybody has ideas, please speak up... + +To give you some notion of what we are talking about, here is a listing +of the current C library. The first name on each line is the module +name, and all other names on the line are entry points. Sorry about the +132 column format. + + Listing of Modules and Entry points +Produced by MAKLIB Version 2B(104) on 23-Apr-87 at 12:15:55 + + ************************** + +DSK:LIBC.REL[4,116] Created on 23-Apr-87 at 12:13:00 + +ABORT ABORT +ATOI ATOF ATOI ATOL +BSEARC BSEARC +CLOCK CLOCK +CPU $$$CPU +CPUTM .CPUTM +CRT $$$CRT $KCCID $STACS $STOFF $ZERO ..EXIT .EALLO .EDATA .END .ETEXT +CTERMI CTERMI CUSERI +CTIME ASCTIM CTIME DIFFTI GMTIME LOCALT MKTIME TIMEZO +CTYPE .CTOIN .CTOLO .CTOUP .CTYP1 +GETCWD GETCWD GETWD +GETENV GETENV +JSYS JSYS +MALLOC CALLOC CFREE CLALLO FREE MALLOC MLALLO REALLO RELALL .FREE. .MEMOR .PALLO .PFREE +MEMSTR BCMP BCOPY BZERO MEMCHR MEMCMP MEMCPY MEMSET +MKTEMP MKTEMP +ONEXIT .N.EXI .EXIT. ONEXIT +PERROR PERROR +PFORK PFORK +QSORT QSORT +SETJMP LONGJM SETJMP +STRING STRCAT STRCHR STRCMP STRCPY STRCSP STRLEN STRNCA STRNCM STRNCP STRPBR STRPOS STRRCH STRRPB STRRPO STRSPN + STRTOK STRSTR STRTOD STRTOL STRTOU +STRUNG STR000 STR001 STR002 STR003 +SYSTEM SYSTEM +URT EXIT ERRNO .EXIT .RUNTM .VFRKF .NFORK .GETJC +URTSUD .URTSU +ACCESS ACCESS +BUFPOS .BUFPO +OPEN CREAT OPEN .GTJFN .OPENU .RLJFN .UIOCH .UIOCN .UIOCP .UIOEO .UIOFD .UIOFL .UIOFX .UIONO .UIOPB .UIOPO + .UIOTY .UIOUF +CLOSE CLOSE +READ READ READLN +STAT STAT XSTAT FSTAT XFSTAT .DVTYP .RCUSR .GTFDB .WHOAM +WRITE WRITE +LSEEK LSEEK TELL +DUP DUP DUP2 +FORK EXECL EXECLE EXECLP EXECV EXECVE EXECVP FORK VFORK +GETPID GETPID +PIPE PIPE +RENAME RENAME +SBRK BRK SBRK +SIGNAL KILL SIGNAL SIGSYS +SLEEP PAUSE SLEEP +TIME FTIME GETTIM TIME .GLTAD .TADL2 .TADU2 +UNLINK UNLINK +WAIT WAIT +CLEANU .CLEAN +FCLOSE FCLOSE +FDOPEN FDOPEN +FFLUSH FFLUSH .PRIME +FGETC FGETC .READA +FGETS FGETS +FILBUF .FILBU +FOPEN .SIOS .FILE. FOPEN .MAKEF .FREEF +FPUTC FPUTC .WRITE +FPUTS FPUTS +FREAD FREAD +FREOPE FREOPE .SIOFL .SETFI +FSEEK FSEEK +FTELL FTELL +FWRITE FWRITE +GETS GETS +GETW GETW +PRINTF FPRINT PRINTF SPRINT PRF.BI +PUTS PUTS +PUTW PUTW +REWIND REWIND +SCANF FSCANF SCANF SSCANF +SETBUF SETBUF .SETBU SETLIN .SOBUF SETVBU +SOPEN SOPEN +UNGETC UNGETC +ABS ABS +ACOS ACOS +ASIN ASIN +ATAN ATAN +ATAN2 ATAN2 +CEIL CEIL +COS COS +COSH COSH +EXP EXP +FABS FABS +FLOOR FLOOR +FMOD FMOD +FREXP FREXP +LABS LABS +LDEXP LDEXP +LOG LOG +LOG10 LOG10 +MODF MODF +POW POW +RAND SRAND RAND +SIN SIN +SINH SINH +SQRT SQRT +TAN TAN +TANH TANH +SIGN SIGN +XMANT XMANT +POLY POLY +XEXP XEXP +GETHST GTHENT GTHNAM GTHADR SETHOS ENDHOS GH.HOS GH.ADD GH.HEN + +1,, +Rcvd-Date: 25-Apr-87 10:04:13-EDT +Return-Path: +Received: from AI.AI.MIT.EDU by XX.LCS.MIT.EDU with Chaos/SMTP; Sat 25 Apr 87 10:04:11-EDT +Date: Sat, 25 Apr 87 10:07:06 EDT +From: Ed Schwalenberg +Subject: C symbols conflict with Midas pseudos +To: ALAN@AI.AI.MIT.EDU, sra@XX.LCS.MIT.EDU, jtw@XX.LCS.MIT.EDU, + klh@SRI-NIC.ARPA, ian@SRI-NIC.ARPA, + moon@SCRC-STONY-BROOK.ARPA +Message-ID: <191008.870425.ED@AI.AI.MIT.EDU> + +*** EOOH *** +Date: Saturday, 25 April 1987 10:07-EDT +From: Ed Schwalenberg +To: ALAN@AI.AI.MIT.EDU, sra@XX.LCS.MIT.EDU, jtw@XX.LCS.MIT.EDU, + klh@SRI-NIC.ARPA, ian@SRI-NIC.ARPA, + moon@SCRC-STONY-BROOK.ARPA +Re: C symbols conflict with Midas pseudos + +KCC could prepend an "underscore", but then we'd be reduced to 5-character +identifiers. So how about doing this only in the conflicting case? +Since the set of Midas symbols that might cause conflict is known, +and fairly short, it should be easy for a filter to clean up KCC's +assembly-language output by changing symbols that are in conflict: + DEFINE: PUSHJ P,AOS +would become + DEFIN%: PUSHJ P,AOS% +It would even be possible for the filter to save the translations and +later patch up the symbol table of the MIDAS output so you can conveniently +say DEFINE/ to DDT. (AOS/ wouldn't work, but hey...). + +On the library issue, there are compilers that work by simply loading +the entire library into one big .o file. This has two bugs: it wastes +core, and it promotes names like "printf" to reserved-word status. + +Does STINK have the capability, known as -r to Unix ld, which says: +"Link these objects together, but put out another object, which may still +have undefined symbols in it"? If so, it would be simple to link +all the user's .o files in that way, then examine the undefined symbol +table of the resultant .o file for names which are in the library index, +and run STINK again with the big .o file containing the user's code and +the necessary set of "library" objects. + +I suggest these approaches because they require no changes to existing +programs. They are simple, and can be written in any language of your +choice (Lisp, Snyder C, and Midas come to mind). I have long forgotten +what little PDP10 assembly language I knew, so I can't help with most of +the problems you folks are working on, but I can write C programs of the +sort mentioned above. If you agree it's the right thing, I'll be glad to +write them.. + +1,, +Rcvd-Date: 25-Apr-87 20:48:57-EDT +Return-Path: +Received: from AI.AI.MIT.EDU by XX.LCS.MIT.EDU with Chaos/SMTP; Sat 25 Apr 87 20:48:53-EDT +Date: Sat, 25 Apr 87 20:51:13 EDT +From: Ed Schwalenberg +Subject: C symbols conflict with Midas pseudos +To: ALAN@AI.AI.MIT.EDU, klh@SRI-NIC.ARPA, ian@SRI-NIC.ARPA, + Moon@SCRC-STONY-BROOK.ARPA, sra@XX.LCS.MIT.EDU, + jtw@XX.LCS.MIT.EDU +Message-ID: <191153.870425.ED@AI.AI.MIT.EDU> + +*** EOOH *** +Date: Saturday, 25 April 1987 20:51-EDT +From: Ed Schwalenberg +To: ALAN@AI.AI.MIT.EDU, klh@SRI-NIC.ARPA, ian@SRI-NIC.ARPA, + Moon@SCRC-STONY-BROOK.ARPA, sra@XX.LCS.MIT.EDU, + jtw@XX.LCS.MIT.EDU +Re: C symbols conflict with Midas pseudos + +Hmmm. Another way around the whole relocation mess would be to maintain +"objects" and libraries as Midas code, and just do an absolute assembly +of the whole thing. I remember claims from the distant past that Midas +could absolutely assemble all of ITS about an order of magnitude faster +than STINK (I think) could load all of WAITS. + +1,, +Rcvd-Date: 27-Apr-87 18:16:18-EDT +Return-Path: +Received: from STONY-BROOK.SCRC.Symbolics.COM by XX.LCS.MIT.EDU with TCP/SMTP; Mon 27 Apr 87 18:16:15-EDT +Received: from EUPHRATES.SCRC.Symbolics.COM by STONY-BROOK.SCRC.Symbolics.COM via CHAOS with CHAOS-MAIL id 125567; Mon 27-Apr-87 18:12:04 EDT +Date: Mon, 27 Apr 87 18:11 EDT +From: David A. Moon +Subject: Re: STINK for ITS KCC +To: Ken Harrenstien +cc: SRA@XX.LCS.MIT.EDU, JTW@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, Ian@SRI-NIC.ARPA +In-Reply-To: <12297274383.19.KLH@SRI-NIC.ARPA> +Message-ID: <870427181149.3.MOON@EUPHRATES.SCRC.Symbolics.COM> + +*** EOOH *** +Date: Monday, 27 April 1987 18:11-EDT +From: David A. Moon +To: Ken Harrenstien +cc: SRA@XX.LCS.MIT.EDU, JTW@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, Ian@SRI-NIC.ARPA +Re: STINK for ITS KCC + +I think we've successfully used DEC's TOPS-10 LOADER, under DECUUO, to +build programs runnable under ITS in the past. Unless someone has made +that stop working, it might be the way to go. Remember that for right +now we are only talking about making one, single C program run on ITS, +so issues that would make it inconvenient to deal with a broad spectrum +of C programs shouldn't be allowed to interfere. + +1,, +Rcvd-Date: 27-Apr-87 19:13:22-EDT +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP/SMTP; Mon 27 Apr 87 19:13:06-EDT +Date: Mon 27 Apr 87 16:14:34-PDT +From: Ken Harrenstien +Subject: Re: STINK for ITS KCC +To: Moon@SCRC-STONY-BROOK.ARPA +cc: SRA@XX.LCS.MIT.EDU, JTW@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, + Ian@SRI-NIC.ARPA, KLH@SRI-NIC.ARPA +In-Reply-To: <870427181149.3.MOON@EUPHRATES.SCRC.Symbolics.COM> +Message-ID: <12297953201.19.KLH@SRI-NIC.ARPA> + +*** EOOH *** +Date: Monday, 27 April 1987 19:14-EDT +From: Ken Harrenstien +To: Moon@SCRC-STONY-BROOK.ARPA +cc: SRA@XX.LCS.MIT.EDU, JTW@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, + Ian@SRI-NIC.ARPA, KLH@SRI-NIC.ARPA +Re: STINK for ITS KCC + +Well, I thought about the T10 loader. Trouble is, I believe it is +very old, without some things we now need, and I don't think there is +any source, or anything. There is also the MAKLIB program to +consider. + +I'm not sure what "one program" this is that you are talking about, +but assume it is SRA's domain stuff. If true, you could simply +maintain software on a T20 and cross-compile it for ITS -- just need a +trivial hack to dump out a DEC .EXE program image into an ITS SBLK +image, with symtab mungage. No need to worry about any ITS-ification +other than run time issues for the program in question. KCC has a multitude +of switches and options for cross-compilations. + +On the other hand, that's not as "interesting". It's OK for bootstrapping, +and OK for dull serious stuff where time is important, but that's not really +the reason (I thought) we were having fun talking about how to port KCC, and +it doesn't buy us any interesting new capabilities. + +There is one other tack, which is to take the CURRENT sources for LINK and +MAKLIB and fake them into thinking they are running on a PDP-10, actually +ITS DECUUO. While this would be something of a license violation, I'm not +sure it's any worse than DECUUO's stuff, and I doubt DEC will care much +about ITS anyway. (I feel safe suggesting this because I'm comfortably +out of reach of any backfires.) Not only would this make everything work, +and considerably simplify the task of porting KCC, it also buys you lots of +loader bug fixes and many new features that were added since the time of +the last T10 hackery. + +Ed's message gave me an idea about how to resolve symbol conflict problems +with MIDAS. Simply invent a "user symbol space" block, called U or C or +whatever, and do all the assembling in some other, also standard, block. +Every KCC-generated symbol will always have C" prefixed to the symbol. Thus, +I hope, you can freely generate stuff like: + C"POP: BLOCK 1 + C"MOVE: MOVE 1,C"POP + ... +Viva MIDAS! + +1,, +Rcvd-Date: 28-Apr-87 01:19:40-EDT +Received: from SPEECH.MIT.EDU by XX.LCS.MIT.EDU via Chaosnet; 28 Apr 87 01:19-EDT +Date: Tue 28 Apr 87 01:20:38-EDT +From: "John Wroclawski" +Subject: Re: STINK for ITS KCC +To: KLH@SRI-NIC.ARPA, Moon@SCRC-STONY-BROOK.ARPA +cc: SRA@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, Ian@SRI-NIC.ARPA +In-Reply-To: <12297953201.19.KLH@SRI-NIC.ARPA> +Message-ID: <12298019843.25.JTW@MIT-SPEECH> + +*** EOOH *** +Date: Tuesday, 28 April 1987 01:20-EDT +From: "John Wroclawski" +To: KLH@SRI-NIC.ARPA, Moon@SCRC-STONY-BROOK.ARPA +cc: SRA@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, Ian@SRI-NIC.ARPA +Re: STINK for ITS KCC + + From: Ken Harrenstien + Subject: Re: STINK for ITS KCC + + + Well, I thought about the T10 loader. Trouble is, I believe it is + very old, without some things we now need, and I don't think there is + any source, or anything. There is also the MAKLIB program to + consider. + +I'm not even sure it handles library searches... We don't have the +sources, but the thing's so simple it could probably be disassembled. +MAKLIB sources we have; it runs under the emulator on twenex too, so +it'll probably run under DECUUO almost right off. + + I'm not sure what "one program" this is that you are talking about, + but assume it is SRA's domain stuff. If true, you could simply + maintain software on a T20 and cross-compile it for ITS -- just need a + trivial hack to dump out a DEC .EXE program image into an ITS SBLK + image, with symtab mungage. + +Actually we should create this technology anyway, it seems like an +easier way to port the compiler itself than copying FAIL files over +and assembling them on ITS, or whatever. + + On the other hand, that's not as "interesting".... + +Ah, a man after my own heart. But we do need that domain stuff. + + There is one other tack, which is to take the CURRENT sources for LINK and + MAKLIB and fake them into thinking they are running on a PDP-10, actually + ITS DECUUO. + +The current LINK sources comprise some twenty zillion pages of grotty +code conditionalized for TOPS10 and TOPS20 which assemble into a 100+ +page (tops20 page -> 50 ITS page) program that knows how to do +everything except scratch your back. (really. It can draw pretty +little graphs of the overlay structure of your program on a plotter...) +But it -can't- do the one thing that would be really nice, which is to +automatically relocate the high segment to give you the maximum possible +amount of free memory. I don't think attempting to port this crock is +worth the trouble. + +Lets get some sort of cross-compilation method working and write a simple +little loader in KCC. + +1,, +Rcvd-Date: 28-Apr-87 01:25:24-EDT +Return-Path: +Received: from STONY-BROOK.SCRC.Symbolics.COM by XX.LCS.MIT.EDU with TCP/SMTP; Tue 28 Apr 87 01:25:20-EDT +Received: from EUPHRATES.SCRC.Symbolics.COM by STONY-BROOK.SCRC.Symbolics.COM via CHAOS with CHAOS-MAIL id 125834; Tue 28-Apr-87 01:27:09 EDT +Date: Tue, 28 Apr 87 01:26 EDT +From: David A. Moon +Subject: Re: STINK for ITS KCC +To: John Wroclawski +cc: KLH@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, Ian@SRI-NIC.ARPA +In-Reply-To: <12298019843.25.JTW@MIT-SPEECH> +Message-ID: <870428012658.0.MOON@EUPHRATES.SCRC.Symbolics.COM> + +*** EOOH *** +Date: Tuesday, 28 April 1987 01:26-EDT +From: David A. Moon +To: John Wroclawski +cc: KLH@SRI-NIC.ARPA, SRA@XX.LCS.MIT.EDU, Alan@AI.AI.MIT.EDU, Ian@SRI-NIC.ARPA +Re: STINK for ITS KCC + + Date: Tue 28 Apr 87 01:20:38-EDT + From: "John Wroclawski" + + Lets get some sort of cross-compilation method working and write a simple + little loader in KCC. + +Now -there's- a smart suggestion! + +Not only is this practical, but it seems like the right point in the +lifetime of the pdp-10 architecture for it to acquire a linking loader +that isn't a piece of shit. + +0,, KCC, +*** EOOH *** +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP/SMTP; Mon 11 May 87 21:37:17-EDT +Date: Mon 11 May 87 18:14:17-PDT +From: Ken Harrenstien +Subject: New CC.EXE available +To: info-kcc@SRI-NIC.ARPA +cc: klh@SRI-NIC.ARPA +Message-ID: <12301645011.18.KLH@SRI-NIC.ARPA> + +A new version (561) of SYS:CC.EXE has been installed at SRI-NIC and can be +FTP'd. This mainly fixes a couple more unusual optimization bugs, and +is not a new source distribution. + +0,, KCC, +*** EOOH *** +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP/SMTP; Mon 11 May 87 21:17:04-EDT +Date: Mon 11 May 87 18:17:42-PDT +From: Ken Harrenstien +Subject: Re: ADJSP 17,1 in the middle of a function call? +To: SRA@XX.LCS.MIT.EDU, Bug-KCC@SRI-NIC.ARPA +cc: KLH@SRI-NIC.ARPA +In-Reply-To: +Message-ID: <12301645633.18.KLH@SRI-NIC.ARPA> + +This bug is now fixed by the new version (561). It was a real pain, +otherwise it would have been fixed sooner -- was busy with other things. +Next bug, please... + +0,, KCC, +*** EOOH *** +Rcvd-Date: 27-May-87 11:56:12-EDT +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP/SMTP; Wed 27 May 87 11:56:05-EDT +Received: from SCIENCE.UTAH.EDU by SRI-NIC.ARPA with TCP; Wed 27 May 87 08:47:46-PDT +Date: Wed 27 May 87 09:48:17-MDT +From: "Nelson H.F. Beebe" +Subject: Screen management +To: roode@BIONET-20.ARPA +cc: BEEBE@SCIENCE.UTAH.EDU, INFO-KCC@SRI-NIC.ARPA +X-US-Mail: "Center for Scientific Computation, South Physics, University of Utah, Salt Lake City, UT 84112" +X-Telephone: (801) 581-5254 +Message-ID: <12305736278.21.BEEBE@SCIENCE.UTAH.EDU> + +Volume 1 of mod.sources (available at SEISMO for FTP) has +PCURSES, a public-domain version of CURSES, which is +probably the best way to go now for screen management, since +it is available on all Unix systems, in VAX VMS C, and at +least 2 commercial (~$125) versions on the IBM PC (see the +June 87 issue of Computer Language, p. 144). PCURSES needs +sgtty.h, which KCC does not yet have, but may soon. + +0,, KCC, +*** EOOH *** +Rcvd-Date: 29-May-87 15:44:10-EDT +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP/SMTP; Fri 29 May 87 15:44:02-EDT +Date: Thu 28 May 87 15:15:20-PDT +From: Ken Harrenstien +Subject: Re: KCC Libraries +To: ROODE@BIONET-20.ARPA +cc: Golub@BIONET-20.ARPA, KLH@SRI-NIC.ARPA, info-kcc@SRI-NIC.ARPA +In-Reply-To: <12305650663.15.ROODE@BIONET-20.ARPA> +Message-ID: <12306068881.50.KLH@SRI-NIC.ARPA> + +To add to Nelson's reply: we are working on s/gtty() and ioctl() now, so +it shouldn't be long. + +As for the idea of using a sharable runtime segment for the library routines, +this notion has been entertained for a while but the mechanics have yet to +be worked out; the library itself is changing all the time. It has been +suggested that we find a way to use the FORTRAN runtime library for access +to their versions of various math functions; doing both at once would be +an interesting problem. If you have a specific proposal to contribute, +please get in touch... + +0,, KCC, +*** EOOH *** +Rcvd-Date: 3-Jun-87 03:05:48-EDT +Return-Path: +Received: from SRI-NIC.ARPA by XX.LCS.MIT.EDU with TCP/SMTP; Wed 3 Jun 87 03:05:38-EDT +Date: Wed 3 Jun 87 00:04:55-PDT +From: Ken Harrenstien +Subject: [Ken Harrenstien : New plan for signal() and BSD sigvec()] +To: sra@XX.LCS.MIT.EDU +Message-ID: <12307476010.15.KLH@SRI-NIC.ARPA> + +You aren't on BUG-KCC but I thought you might have something to say about +this stuff... + --------------- + +Mail-From: KLH created at 2-Jun-87 23:40:50 +Date: Tue 2 Jun 87 23:40:50-PDT +From: Ken Harrenstien +Subject: New plan for signal() and BSD sigvec() +To: bug-kcc@SRI-NIC.ARPA +cc: klh@SRI-NIC.ARPA +Message-ID: <12307471625.15.KLH@SRI-NIC.ARPA> + +I have been wrestling with various ideas on how to most satisfactorily +implement the 4.3BSD Unix sigvec() mechanism (which includes the old +V7 signal() as a subset). The existing signal code in the KCC library +is only a partial implementation and has several deficiencies, e.g. only +one signal can be handled at a time, and the "system calls" cannot return +EINTR; moreover, their data structures can be horribly messed up. + +The details of the scheme I came up with are too long to relate in this +message, particularly when I'm not sure who has an interest in the outcome. +If you would like to read my current thinkpiece and comment on it, the +file is PS:SIGNAL.PLAN on SRI-NIC.ARPA and should be accessible via +anonymous FTP. + +A brief summary: I propose to have all PSIs at a single level, and +have the PSI handler do DEBRK%s as quickly as possible after adjusting +various global masks and variables. No signal handler will run at +interrupt level. All features of the V7 and BSD scheme can be +implemented, including signal masks and many independent handlers. +The primary difficulty is having no obvious way to transparently +resume execution of a user-code JSYS interrupted by a signal. + +If this interests (or worries) you, please read the file and send +comments! + +--Ken + +1,, KCC, +Rcvd-Date: 30-Jul-87 09:08:02-EDT +Mail-From: SRA created at 30-Jul-87 09:08:00 +Date: Thu, 30 Jul 1987 09:08 EDT +Message-ID: +From: Rob Austein +To: Bug-KCC@SRI-NIC.ARPA +cc: sra@XX.LCS.MIT.EDU +Subject: So, you guys only use GENERATION-RETENTION-COUNT == 1, huh? + +*** EOOH *** +Date: Thursday, 30 July 1987 09:08-EDT +From: Rob Austein +To: Bug-KCC@SRI-NIC.ARPA +cc: sra@XX.LCS.MIT.EDU +Re: So, you guys only use GENERATION-RETENTION-COUNT == 1, huh? + +[PHOTO: Recording initiated Thu 30-Jul-87 8:58AM] + + MIT TOPS-20 Command Processor 5(312160)-2 +XX>ty t.c +#include +main(argc,argv) + int argc; + char *argv[]; +{ + int i; + for(i = 0; i < argc; ++i) + printf("argv[%d]=\"%s\"\n",i,argv[i]); +} +XX>v -oa + + XX: + -OA.CMD.1;P777700 1 1058(7) 15-Jun-87 02:00:41 SRA + .2;P777700 1 224(7) 15-Jun-87 23:55:51 SRA + .FLUSH-LIST.2;P777700 1 1098(7) 15-Jun-87 01:56:28 SRA + .3;P777700 1 1109(7) 15-Jun-87 23:54:24 SRA + + Total of 4 pages in 4 files +XX>t.exe -oa.*.* +argv[0]="T" +argv[1]="XX:-OA.CMD" +argv[2]="XX:-OA.CMD" +argv[3]="XX:-OA.FLUSH-LIST" +argv[4]="XX:-OA.FLUSH-LIST" +XX>pop + +[PHOTO: Recording terminated Thu 30-Jul-87 8:59AM] + \ No newline at end of file diff --git a/doc/kcc/libc.doc b/doc/kcc/libc.doc new file mode 100755 index 000000000..dc7948b14 --- /dev/null +++ b/doc/kcc/libc.doc @@ -0,0 +1,934 @@ + KCC Runtime Library documentation + + This file summarizes the overall contents of the KCC C +library, and is used by implementors as a status file to determine the +portability or availability of particular library functions. This +file does NOT document what the functions do or how to use them, because +this information is already available in published form (see [CARM] +and [UPM] at the end of this page). + + The organization of routines here follows that of the +descriptions in Part II of [CARM]. Note that as of this writing there +are two versions of CARM; the first (v1) appeared in 1984, and the +second (v2) in 1987. All references here are to the most recent (v2) +version, and the organization of this file follows sections 13-22 of +v2 rather than section 11 of v1. + + KCC implements all CARM routines. In addition, there are +other routines which are part of KCC's C library, either for +compability with Un*x systems like V7 and BSD, or to provide access to +certain operating system functions. For the most part these have been +listed in whichever CARM section below is most appropriate, right +after the "official" CARM functions. + +Additional information: + +[CARM] Book: "C: A Reference Manual", Second edition, ISBN 0-13-109802-0 + by Samuel P. Harbison & Guy L. Steele, Jr. + Also known as "H&S". This is a very good reference and + describes most of the library functions. +[UPM] Book: "Unix Programmer's Manual". + 4.2/4.3BSD version (Reference Guide) printed by the USENIX Association. +[MAN] Files: "man foo" on most Un*x systems. + +[TMX] File: LIBTMX.DOC - documents the extended time functions, time_*(). +[USYS] File: USYS.DOC - contains a summary of those particular + KCC functions which simulate UN*X system calls. This is + considerably more implementor-oriented. +[SIGS] File: SIGNAL.DOC - contains an overview of the KCC signal + implementation. +[LIBC] File: LIB/CODING.DOC - describes guidelines for writing KCC library + functions and identifies certain crucial files. This is primarily + for implementors. + +Contents summary: + Section 13: Standard Language Additions + Section 14: Character Processing (V1: Sec 11.1) + Section 15: String Processing (V1: Sec 11.2) + Section 16: Memory Functions + Section 17: Input/Output Facilities (V1: Sec 11.5) + Section 18: Storage Allocation (V1: Sec 11.4) + Section 19: Mathematical Functions (V1: Sec 11.3) + Section 20: Time and Date Functions + Section 21: Control Functions + Section 22: Miscellaneous Functions + Section BSD(3N): BSD network functions + Section TRM(3X): TERMCAP terminal independent functions + Section TMX(3X): Time and Date Functions (Extended) + Section KCC-1: KCC-specific general-purpose functions + + +Library function listing format: + +Name Module Port Comments +(routine name) (source file) (see below) + Name: Name of the function, variable, or macro. + Module: Source file module. "XXX" means the pathname "lib/XXX.C" + unless the section identifies a different source directory, + such as "usys/" or "stdio/", etc. + Header files are shown as "". + Port: A status code indicating portability, as follows: + E = file #includes "c-env.h" for environment configuration. + - runs on the given sys, one of: T20,10X,T10,WAITS,ITS. + *10 = portable to all of the above PDP-10 systems. + * = fully portable (either no OS-dependent stuff, or a + fully-portable conditional exists) + +Section 13: Standard Language Additions + +Name Module Port Comments + +NULL , * +typedef ... ptrdiff_t; * +typedef ... size_t; * +int errno; usys/URT * (see USYS.DOC) +char *strerror(errnum); STRERR E T20,10X +void perror(s); stdio/PERROR E T20,10X +int sys_nerr; STRERR * +char *sys_errlist[]; STRERR * +constant EDOM; * +constant ERANGE; * +__DATE__ in KCC * +__FILE__ in KCC * +__LINE__ in KCC * +__TIME__ in KCC * +__STDC__ NOT YET (in KCC) +va_alist,va_dcl * KCC Non-ANSI form +va_list,va_start,va_arg,va_end * KCC Non-ANSI form +va_list,va_start,va_arg,va_end * KCC ANSI form + +Notes: + All CARM facilities are supported, but __STDC__ is not defined +and will not be until KCC can provide full ANSI support. This of course +must wait until the ANSI draft standard becomes more concrete. + The standard set of UN*X error codes are provided, in particular +EDOM and ERANGE. All others apply only to failing UN*X system call +simulations. + Both and are provided. Since they are +incompatible, they cannot be used together and only one of them +can be included by any particular program. + +Section 14: Character Processing (V1: Sec 11.1) Src: , lib/ctype.c + +Name Module Port Comments + +int isalnum(c); CTYPE * +int isalpha(c); CTYPE * +int isascii(i); CTYPE * CARM/BSD +int iscntrl(c); CTYPE * +int iscsym(c); CTYPE * CARM only +int iscsymf(c); CTYPE * CARM only +int isdigit(c); CTYPE * +int isodigit(c); CTYPE * CARM only +int isxdigit(c); CTYPE * +int isgraph(c); CTYPE * +int isprint(c); CTYPE * +int ispunct(c); CTYPE * DIFFERENT (no space!) CARM goofed. +int islower(c); CTYPE * +int isupper(c); CTYPE * +int isspace(c); CTYPE * +int iswhite(c); - - rare variant of isspace, not provided. +int toascii(i); CTYPE * CARM/BSD +int toint(c); CTYPE * CARM only +int tolower(c); CTYPE * allows any case +int toupper(c); CTYPE * allows any case +int _tolower(c); CTYPE * CARM only +int _toupper(c); CTYPE * CARM only + +Tests: LIB/TEST/TCTYPE.C tests all these functions. + +Notes: + All CARM facilities are supported; must be included. +All work with any unsigned 9-bit character value and EOF; most are +macros and very fast. None evaluate their argument more than once. + The ispunct() function differs from the CARM description, +which claims that "space" is included in the set. Neither the BSD nor +the ANSI draft version of ispunct() does this however, so we have +assumed that H&S made a mistake here, and the KCC version excludes +"space". + BSD's implementation of tolower and toupper is incorrect +(corresponds to _tolower and _toupper). KCC's corresponds to CARM. + +Implementation notes: + The flag table is large enough that any unsigned 9-bit char +value can be safely used as index. On the PDP-10 the table is an +integer array for speed, and is fully portable, but the macro +_CT_TABTYPE can be defined during installation if a char array is +preferable. defines all of the macros. CTYPE.C defines the +_ctyp1 and _ctyp2 tables, plus some small auxiliary routines that the +macros may call. + +Section 15: String Processing (V1: Sec 11.2) Src: , lib/ + +Name Module Port Comments + +char *strcat(s1,s2); STRING * +char *strncat(s1,s2,n); STRING * +int strcmp(s1,s2); STRING * +int strncmp(s1,s2,n); STRING * +char *strcpy(s1,s2); STRING * +char *strncpy(s1,s2,n); STRING * +int strlen(s); STRING * +char *strchr(s,c); STRING * +char *index(s,c); STRING * synonym for "strchr" +int strpos(s,c); STRING * CARM only +char *strrchr(s,c); STRING * +char *rindex(s,c); STRING * synonym for "strrchr" +int strrpos(s,c); STRING * CARM only +int strspn(s,set); STRING * +int strcspn(s,set); STRING * +char *strpbrk(s,set); STRING * +char *strrpbrk(s,set); STRING * CARM only +char *strstr(src, sub); STRING * CARM/ANSI only +char *strtok(str, set); STRING * ANSI/BSD/S5/CARM (not V7) +double strtod(str, ptr); ATOI * +long strtol(str,ptr,base); ATOI * +unsigned long strtoul(str,p,b); ATOI * +double atof(str); ATOI * +int atoi(str); ATOI * +long atol(str); ATOI * + +Additional non-CARM functions: + Non-case-sensitive versions of the above functions are + provided by the STRUNG module, declared in : + +int strCMP(s1, s2); STRUNG * +int strnCMP(s1, s2, n); STRUNG * +char *strCHR(s, c); STRUNG * +char *strSTR(src, sub); STRUNG * + +Tests: LIB/TEST/TSTRIN.C partially tests these functions. + +Notes: + All CARM facilities are supported. must be +included. also exists for BSD compatibility; it merely +includes . + +Implementation notes: + This stuff can use more conditionalizing in order to optimize for +specific configurations. The routines are portable, but are coded to +encourage use of the PDP-10 ILDB/IDPB instructions (opposite of the optimal +PDP-11 order!) + +Section 16: Memory Functions Src: lib/ + +Name Module Port Comments + +char *memchr(ptr,val,len); MEMSTR E * ANSI/BSD/CARM/S5 +int memcmp(ptr1,ptr2,len); MEMSTR E * ANSI/BSD/CARM/S5 +int bcmp(ptr1,ptr2,len); MEMSTR E * BSD/CARM (calls memcmp) +char *memcpy(dest,src,len); MEMSTR E * ANSI/BSD/CARM/S5 +char *memccpy(dest,src,c,len); MEMSTR E * BSD/CARM/S5 +void *memmove(dest,src,len); MEMSTR E * ANSI/ CARM (needs optimiz) +char *bcopy(src,dest,len); MEMSTR E * BSD/CARM (calls memcpy) +char *memset(ptr,val,len); MEMSTR E * ANSI/BSD/CARM/S5 +void bzero(ptr,len); MEMSTR E * BSD/CARM (calls memset) + +Tests: LIB/TEST/TBCOPY.C tests bcopy() and bzero(). + +Notes: + All CARM facilities are supported. The header file for +declaring these routines is as per ANSI, but +also exists for BSD compatibility. + These are guaranteed to work for all valid KCC byte sizes, +i.e. any of 6, 7, 8, 9, and 18-bit bytes. Remember that the +arguments must be of type (char *), rather than (int *). Both +memcpy() and memset() have special optimization built into them so +that they are very fast for large amounts of data. memmove() is much +slower. + +Section 17: Input/Output Facilities (V1: Sec 11.5) Src: lib/stdio/ + +Name Module Port Comments + +constant EOF; * +struct FILE; * +FILE *fopen(path,typ); FOPEN * See notes on next page. +int fclose(fp); FCLOSE * +int fflush(fp); FFLUSH * also works on input streams +FILE *freopen(path,typ,fp); FREOPE * +int setvbuf(fp,buf,type,size); SETBUF * +void setbuf(fp,buf); SETBUF * +FILE *stdin; (m) * +FILE *stdout; (m) * +FILE *stderr; (m) * +int fseek(fp,off,typ); FSEEK * +long ftell(fp); FTELL * +void rewind(fp); REWIND * +int fgetc(fp); FGETC * +int getc(fp); (m) * +int getchar(); (m) * +int ungetc(c,fp); UNGETC * +char *fgets(s,n,fp); FGETS * +char *gets(s); GETS * +int fscanf(fp,fmt,ptrs); SCANF * +int scanf(fmt,ptrs); SCANF * +int sscanf(s,fmt,ptrs); SCANF * +int fputc(c,fp); FPUTC * +int putc(c,fp); (m) * +int putchar(c); (m) * +int fputs(s,fp); FPUTS * +int puts(s); PUTS * +int fprintf(fp,fmt,args); PRINTF * +int printf(fmt,args); PRINTF * +int sprintf(s,fmt,args); PRINTF * +int vfprintf(fp,fmt,arg); PRINTF * +int vprintf(fmt,arg); PRINTF * +int vsprintf(s,fmt,arg); PRINTF * +int fread(ptr,siz,cnt,fp); FREAD * +int fwrite(ptr,siz,cnt,fp); FWRITE * +int feof(fp); (m) * +int ferror(fp); (m) * +void clearerr(fp); (m) * +int remove(filename); REMOVE * just calls unlink() +int rename(oldnam,newnam); RENAME * null file, uses USYS rename(). +FILE *tmpfile(); TMPFIL * +char *tmpnam(buf); TMPNAM E T20,10X +char *mktemp(buf); MKTEMP E T20,10X uses HPTIM%, not getpid + +These functions were recently added to the ANSI C draft (not yet implemented): + +int fgetpos(FILE *stream, fpos_t *pos); ?? +int fsetpos(FILE *stream, const fpos_t *pos); ?? + + +Additional STDIO functions for V7/BSD compatibility: + +constant BUFSIZ; * V7/BSD +constant NULL; * V7/BSD +FILE *fdopen(fd,type); FDOPEN * V7/BSD open w/existing FD +int fileno(fp); (m) * V7/BSD +int getw(fp); GETW * V7/BSD Get word (int) +int putw(w,fp); PUTW * V7/BSD Put word (int) +void setbuffer(fp,buf,size); SETBUF * BSD +void setlinebuf(fp); SETBUF * BSD do linebuffering + +Additional KCC functions: + +FILE *sopen(s,type); SOPEN * (KCC only) open string for I/O + +Internal globals, not for user consumption: + +FILE _sios[]; * internal array +void _cleanup(); CLEANU * called by exit() for cleanup + +Tests: LIB/TEST/TFSEEK.C, TFTEL1.C, TFTEL2.C - tests fseek/ftell + LIB/TEST/TPRINT.C - tests printf. + +Notes for Section 17 (STDIO): + + See <.LIB.STDIO>-READ-.-THIS- for implementation-specific details. + + Note that some facilities, in particular putc and getc, are +implemented as macros. + + In general, the sequence CR-LF is converted to LF on 7-bit +input, and LF converted to CR-LF on 7-bit output. This conversion is +performed by the system call read/write functions and not by STDIO, +however. See the notes on fopen() below for details. + +[17.2] (V1 11.5.7) fflush(): + This should normally only be called on an output stream; +however, if called on an input stream, fflush() flushes any buffered +but unread data. This feature is probably not portable. + +[17.2] (V1 11.5.10, 11.5.15) fopen(), freopen(): + These implement all the H&S type specification characters, +with certain defaults and settings appropriate for the PDP-10 world: + +String Mode Start Description +"r", "rb" R 0 Open existing file for reading. Error if not found. +"w", "wb" W 0 Create a new file for writing. +"a", "ab" W EOF Append to existing file (create new if necessary). +"r+","r+b" R/W 0 Open existing file for updating. Error if not found. +"w+","w+b" R/W 0 Create a new file for updating. +"a+","a+b" R/W EOF Append to existing file (create new if necessary). + + Note that on TOPS-20 and TENEX, files have version numbers, and +writing a file never truncates an existing one; "w" and "w+" always create +new versions. + A stream can be either "text" or "binary", as per the ANSI +draft description; a "b" in the string specifies binary. The +characteristics of the two types of streams are: + Bytesize(old) Bytesize(new) LF-conversion + TEXT or 7 7 yes if size 7 + BINARY or 9 9 no, never + + When an OLD, existing file is opened (for reading, appending, +or updating), normally the bytesize of the file is used as the +bytesize of the stream. If the file bytesize is 7, 8, or 9 then that +size is used. If the file bytesize is 0 or 36 then the default (7 or +9) is used instead. If the file bytesize is anything other than +0, 7, 8, 9, or 36 then the behavior is undefined. + When a NEW file is created, its bytesize will be that of the +stream, which is normally 7 for text, 9 for binary. Note that older +versions of a file may have a different bytesize -- the notion of +checking these to set the bytesize was considered, but rejected in the +interest of simplicity. + Whether LF conversion is performed on the stream is a little +simpler. A text stream is only converted if the stream bytesize is 7; +otherwise conversion does NOT happen. A binary stream is never +converted, regardless of the bytesize. + + The user can override either the bytesize or the conversion +by adding explicit specification characters, which should come after +any regular specification characters: + "C" Force LF-conversion. + "C-" Force NO LF-conversion. + "7" Force 7-bit bytesize. + "8" Force 8-bit bytesize. + "9" Force 9-bit bytesize. + "T" Open for thawed access (TOPS-10/TENEX only) + + These are KCC-specific however, and are not portable to other +systems. Note that the actual LF conversion is done by the USYS (Unix +simulation) level calls (read() and write()) rather than STDIO. + +[17.5] fseek(), ftell(), rewind(): + For binary streams (no LF conversion), the I/O pointer value +returned by ftell() and used in fseek() is the same as the USYS (and +T20) pointer value; arithmetic can be done on this pointer to derive +new pointers as arguments to fseek(). However, for text streams when +LF conversion is being done, the I/O pointer value is a composite which +cannot be manipulated; the argument to fseek() can only be 0 or a +value previously returned by ftell() for that stream. This corresponds +to the restrictions described in H&S. + fseek() does not yet work for "+" text streams (i.e. +LF-converted streams open for both reading and writing). + +[17.6] (V1 11.5.34) ungetc(): + The number of characters that can be pushed back with ungetc is +a site-dependent option available at library compile-time. _SIO_NPBC +in STDIO.H defaults to 1. + +[17.8] (V1 11.5.16, 11.5.28, 11.5.30) fscanf(), scanf(), sscanf(): + Common sense was used in implementing the various conversion +routines when there was doubt about CARM's description: + For numeric conversions ('d', 'u', 'o', 'x', 'f'), there must +be at least one digit present for the parse to succeed, despite CARM's +claim that "some number" of digits, "possibly none" are allowed. For +string scanners ('s' and '['), at least one character must be read. + +[17.11] (V1 11.5.11, 11.5.23, 11.5.29) printf(), fprintf(), sprintf(): + An additional facility has been provided for the user to +assign his own conversion specification character to arbitrary +functions. This function is "prf_bind()" in module PRINTF, which should +be seen for details. Unfortunately this is not portable. + +[17.13] (V1 11.5.14,11.5.19) fread(), fwrite(): + These are implemented assuming that the input stream is open +in 9-bit binary mode, such that all 36 bits of an int can be read with +four successive bytes. No byte-size or mode checking is done, so it +is the user's responsibility to make sure the stream is open +correctly. + +[17.16] tmpfile(), tmpnam(), mktemp(): + On TOPS-20/TENEX these are implemented using a time value from +the system rather than getpid() due to the difficulty of ensuring that +getpid() is unique. This may change. + +Additional STDIO routines from 4.2BSD: + See a BSD UPM section 3S for details on those routines. Some +of them existed in V7 as well, such as fdopen(), fileno(), getw(), putw(), +and setbuf(). + +Additional STDIO routines (KCC specific, not portable): + + sopen(): opens a string as a source or destination for I/O. +The first arg is a string pointer, second is a standard fopen type +specification. The implementation of this is not yet complete: only +"r" and "w" are implemented. "a" (append) mode does NOT do the +obvious thing; place has been kept for "w+" to automatically expand +the given string if the end is reached (assuming it was allocated by +malloc). If a NULL string pointer is given, a string buffer is +allocated starting at max_size characters. The file pointer cannot be +repositioned (e.g. a string can be scanned only once). These things +may be finished some day. + +Section 18: Storage Allocation (V1: Sec 11.4) Src: lib/ + +Name Module Port Comments + +char *malloc(size); MALLOC * +char *calloc(cnt,siz); MALLOC * (calls malloc) +char *mlalloc(lsize); MALLOC * CARM only. (calls malloc) +char *clalloc(lcnt,lsize); MALLOC * CARM only. (calls malloc) +void free(ptr); MALLOC * +void cfree(ptr); MALLOC * Not in ANSI. (calls free) +char *realloc(ptr,size); MALLOC * +char *relalloc(ptr,size); MALLOC * CARM only. (calls realloc) + +-------------------- +Additional non-CARM functions: + These are temporary only and their use in new code is not + advisable; they will probably go away soon. + +int _palloc(n); allocate n pages of memory +void _pfree(page); free allocated pages + +Tests: LIB/TEST/TMALL1.C, TMALL2.C + +Notes: + All CARM facilities supported. can be included. +Since "long" is the same size as "int" for KCC, the long and int forms +of calls are functionally identical. For portability the "long" forms +should not be used. + + Note that in ANSI these facilities can be declared with +, but in non-ANSI implementations there is no associated +header file. You should either include , or you should be +VERY careful about pre-declaring these functions properly, and be SURE +that routines which expect a char pointer argument are given one. + A common mistake is failing to declare malloc(), so that the +compiler is unaware of the proper conversions that must be applied to +the return value (which is a PDP-10 byte pointer). This sort of type +mismatch error can go undetected on some machines but will cause you +all kinds of mysterious grief on the PDP-10. + Using brk() and sbrk() is not prohibited, but doing so is +guaranteed to confuse the storage allocator and cause problems if you +also use malloc() and friends. + The KCC functions conform to the ANSI/CARM descriptions of how +they should behave, particularly when given strange arguments like +NULL pointers. This is different from the behavior on BSD, where a +zero size will still return something from malloc and realloc (rather +than ignoring and freeing). + +[18.1] (V1 11.4.1,11.4.3,11.4.5,11.4.6) calloc(),clalloc(),malloc(),mlalloc(): + clalloc() == calloc() on the PDP-10. These will return NULL +if either argument is zero (as per ANSI). + mlalloc() == malloc() on the PDP-10. These also return NULL +if given a zero argument (as per ANSI). + +[18.2] (V1 11.4.2, 11.4.4) free(), cfree(): + cfree() == free() on the PDP-10. CARM claims that for +maximum portability it is best to use cfree() only to deallocate +memory allocated by calloc(), and free() only to deallocate memory +allocated by malloc(). However, the ANSI draft has flushed cfree() +altogether. + free() does nothing if given a NULL argument (as per ANSI). +If given a bad pointer, free() calls abort() after sending the following +message to stderr: + "free(): tried to free invalid block" + +[18.3] (V1 11.4.7, 11.4.8) realloc(), relalloc(): + relalloc() == realloc() on the PDP-10. These behave as per +ANSI for unusual arguments: if the pointer is NULL, it acts like +malloc(); if the size is zero, it acts like free() and returns NULL. +If given a bad pointer, realloc() calls abort() after sending the +following message to stderr: + "realloc(): tried to reallocate invalid block". + +Section 19: Mathematical Functions (V1: Sec 11.3) Src: lib/math/ + +Name Module Port Comments + +int abs(x); ABS * PRIMITIVE: C code +double fabs(x); FABS * PRIMITIVE: C code +long labs(x); LABS * PRIMITIVE: C code +div_t div(n,d); DIV E * PRIMITIVE: C or PDP10 code +ldiv_t ldiv(n,d); DIV E * PRIMITIVE: C or PDP10 code +double ceil(x); CEIL * based on modf() +double floor(x); FLOOR * based on modf() +double fmod(x,y); FMOD * based on modf() +double exp(x); EXP *10 PRIMITIVE: uses _sign,fabs,modf,ldexp +double log(x); LOG *10 based on _xexp, _xmant, _poly +double log10(x); LOG10 * based on log() +double frexp(x,nptr); FREXP *10 PRIMITIVE: MACH DEP C code! +double ldexp(x,n); LDEXP *10 PRIMITIVE: MACH DEP C code! +double modf(x,nptr); MODF E *10 PRIMITIVE: MACH DEP asm code! +double pow(x,y); POW * based on exp(), log(), modf() +double sqrt(x); SQRT * based on _xexp(), _xmant(), ldexp() +int rand(); RAND E *10 PRIMITIVE: mach dep C code +srand(seed); RAND E *10 PRIMITIVE: C code +double cos(x); COS *10 PRIMITIVE: uses fmod,sin,sqrt,_poly +double sin(x); SIN *10 PRIMITIVE: uses fmod,cos,sqrt,_poly +double tan(x); TAN * based on sin(), cos() +double acos(x); ACOS * based on atan() +double asin(x); ASIN * based on atan() +double atan(x); ATAN *10 PRIMITIVE: uses _sign, _poly +double atan2(y,x); ATAN2 * based on atan() +double cosh(x); COSH * based on exp() +double sinh(x); SINH * based on exp() +double tanh(x); TANH * based on exp() + +-------------------- +Additional support routines, NOT IN CARM: + These exist only to support the above routines and should not + be used by user code. + +double _sign(x, y); SIGN * PRIMITIVE: C code +double _poly(x, y, z); POLY * PRIMITIVE: C code +int _xexp(x); XEXP *10 PRIMITIVE: MACH DEP C code! +double _xmant(x); XMANT *10 PRIMITIVE: MACH DEP C code! + + +Tests: LIB/TEST/TMATH.C + (Why don't we have good precision for atan()?? E-9 only) + +Notes: + All CARM facilities are supported. must be included. +These are mostly derived from the Portable Math Library written by +Fred Fish. + +[19.8] (V1 11.3.25) tan(): + According to CARM, "If the argument is so close to an odd +multiple of pi/2 that the correct result value is too large to be +represented, then the largest representable positive floating-point +number is returned and the error code ERANGE is stored into the +external variable errno". The actual error check done is to see if +for tan(x), cos(x) == 0. If so, the error behavior above is done. + +[19.9] (V1 11.3.5) atan2(): + For atan2(0, 0), the value 0 is returned and errno set to EDOM. + +[19.10] (V1 11.3.22) sinh(): + sinh() of a negative argument that is too large returns the +largest representable negative float-point number. + +Other notes: + ANSI and CARM have the same functions. There are a few +differences about domain/range error specifications which are minor. +The BIG incompatibility is: + ANSI modf() is incompatible with CARM and BSD, because + the 2nd arg is (double *) instead of (int *)!!! + +The functions abs, labs, rand, and srand are declared in by +ANSI, in by CARM. + +BSD appears to have all CARM functions except labs() and fmod(). +BSD has these functions which are not in ANSI or CARM: + (all return double unless otherwise indicated) + Documented in UPM: gamma, hypot, cabs, j0, h1, jn, y0, y1, yn + Undocumented: asinh, acosh, atanh, erf, erfc, expm1, log1p + rint, lgamma, copysign, drem, logb, scalb, cbrt + finite (returns int), infnan (VAX only) + +Section 20: Time and Date Functions Src: lib/, lib/usys/ + +Name Module Port Comments + +clock_t clock(); CLOCK E *10 +clock_t * +CLK_TCK * +struct tms * +void times(tmsbuf); USYS/TIMES * (see USYS.DOC) +time_t time(tptr); USYS/TIME E *10 (see USYS.DOC) +time_t * +char *asctime(ts); CTIME E *10 +char *ctime(timptr); CTIME E *10 +struct tm *gmtime(t); CTIME E *10 +struct tm *localtime(t); CTIME E *10 +time_t mktime(tmptr); CTIME E *10 +double difftime(t1,t0); CTIME E *10 + +-------------------- +Additional non-CARM functions: + +char *timezone(mwest, dstt); CTIME E *10 For BSD/V7 compatibility (!S5) + +typedef ... tadl_t; E *10 Type for local TAD value +tadl_t tadl_get(); USYS/TIME E *10 Get current local TAD value +tadl_t tadl_to_utime(time); USYS/TIME E *10 Convert time_t to tadl_t +time_t tadl_from_utime(tadl); USYS/TIME E *10 Convert tadl_t to time_t + +Internal globals, not for user consumption: + +struct tm *_lbrktime(); CTIME E ITS,T10,WAITS For use by USYS/TIME +int _tmisdst(); CTIME E ITS,T10,WAITS For use by USYS/TIME + +The latest ANSI C draft also includes this function (not implemented yet): + +size_t strftime(char *s, size_t maxsize, const char *format); ?? + +Tests: LIB/TEST/TTIME.C + +Notes: + All CARM facilities are supported. + For additional time functions, see the TMX(3X) section. + +[20.1] clock(), times(): + CLK_TCK is uniformly 1000 (i.e. runtime is in milliseconds). The +BSD times() call is supported, although only crudely; it does not return +its children's runtime. It could if this was needed. + +[20.2] time_t, time(): + The type of time_t is "int". The value is the same as that +for a standard UN*X implementation, i.e. the number of seconds since +1/1/1970 GMT. This is NOT a TOPS-20 or TENEX GTAD format time and date. +If you wish to manipulate time-and-date (TAD) values of the local operating +system, use the (non-standard) tadl_t facilities. + +Section 21: Control Functions Src: lib/, lib/usys/ + +Name Module Port Comments + +macro assert(); * +int system(cmd); SYSTEM E T20,10X (partial implem) +int exec*(); USYS/FORKEX E T20,10X (partial implem) +void exit(status); USYS/URT E T20,10X +void _exit(status); USYS/URT E T20,10X +void abort(); ABORT *10 +typedef ... jmp_buf[]; E *10 KCC specific +int setjmp(env); SETJMP E *10 KCC specific +void longjmp(env,status); SETJMP E *10 KCC specific +typedef ... onexit_t; * +onexit_t onexit(func); ONEXIT * +constant SIG_IGN; * +constant SIG_DFL; * +constant SIG_ERR; * +constant SIGxxx; * +void (*signal(sig,func)); USYS/SIGNAL * See USYS.DOC +int raise(sig); USYS/SIGVEC E T20,10X See USYS.DOC +int kill(pid,sig); USYS/SIGVEC E T20,10X See USYS.DOC +int (*ssignal(softsig,func))(); SSIGNA * +int gsignal(softsig); SSIGNA * +void psignal(sig,prefix); PSIGNA * +void sleep(secs); USYS/SLEEP E *10 See USYS.DOC +unsigned alarm(secs); USYS/ALARM E T20 See USYS.DOC + +-------------------- +Additional functions: + +int _setjmp(env); SETJMP E *10 For BSD compatibility +int _longjmp(env,val); SETJMP E *10 For BSD compatibility +void longjmperror(); SETJMP E *10 For BSD compatibility +int forkexec(); USYS/FORKEX E T20,10X Combines fork & exec! + +The latest ANSI C draft has replaced onexit() with: + +int atexit(void(*func)(void)) + +Notes: + All CARM facilities exist, although some may not be as +completely supported as for a real UN*X system. + +[21.2] exec(), system(): + The various forms of exec() all exist, but none of them do +anything with the "envp" environment pointer. For the very common +situation where a fork() is followed by an exec(), the forkexec() call +should be used instead; it is MUCH faster. + system() is not a full implementation. On TOPS-20 the command +string is parsed assuming that the first word is a system program +name, which is then invoked (using forkexec()) with the full +string as an RSCAN% argument. Unfortunately there is no convenient way +to feed input directly into an inferior EXEC. This call could be changed +to use a PTY, but this would be much slower. + +[21.3] abort(), exit(): + abort() does no cleanup actions whatsoever. It simply +attempts to execute a zero instruction, which generates an illegal +instruction fault on the PDP-10. This can be ignored by the signal +handler, but if so then the program will loop indefinitely; abort() +NEVER returns to its caller. + exit() cleans up by calling all functions registered with onexit(). +The STDIO buffers are the last thing cleaned up. + +[21.4] setjmp(), longjmp(): + In addition to the above, KCC also implements the BSD +facilities of _setjmp(), _longjmp(), and longjmperror(). A checksum +is stored in jmp_buf, and longjmp() checks this as well as +other things; if anything looks bad, it calls longjmperror(). + longjmperror() can be defined by the user (the default routine +simply prints "longjmp botch"). If it returns, abort() is called. + +[21.5] onexit(): + Up to 32 functions can be registered, as per CARM. + +[21.6] signal(), raise(), kill(), gsignal(), ssignal(), psignal(): + The implementation of signals is more complete than you might +expect. + The software signal functions gsignal() and ssignal() operate as +described in CARM, as does the signal description function psignal(). +Note that psignal() outputs to the STDIO stream "stderr" rather than the +UN*X file descriptor 2. + raise(sig) is implemented simply as kill(getpid(),sig). + The signal() and kill() functions are "UN*X system calls" and +as such are treated specially. What KCC actually implements corresponds +to the 4.3BSD signal handling mechanism, using the sigvec() call and +a signal block mask. The main difference from standard UN*X is that +when a signal is caught, its handler is NOT reset to SIG_DFL; also, +most system calls are resumed rather than forced to fail with EINTR. + For a fuller description the file SIGNAL.DOC should be consulted. + +[21.7] sleep(), alarm(): + alarm() is implemented as described in CARM. On TOPS-20 this uses +the TIMER% jsys. A signal handler for SIGALRM must be defined before the +first call to alarm(), otherwise nothing will happen. + The TOPS-20 implementation of sleep() uses a timer separate from +that of alarm(). If any signal is handled, sleep() will return immediately +with errno set to EINTR; it does not return any value. + +Section 22: Miscellaneous Functions Src: lib/ + +Name Module Port Comments + +int main(argc,argv); * * User program! +char *ctermid(s); CTERMI E T20 +char *cuserid(s); CTERMI E T20 +char *getcwd(buf,size); GETCWD E T20 +char *getwd(path); GETCWD E T20 +char *getenv(name); GETENV E T20 Note 10X does not have LNMST%. +char *getlogin(); GETLOG E T20 +int getopt(argc,argv,optstr); GETOPT * Note other externals defined! +int putenv(namval); GETENV E T20 +char *bsearch(ky,bs,ct,sz,cmp); BSEARC * +void qsort(base,cnt,siz,cmp); QSORT * GNU version + +-------------------- +The latest ANSI C draft has added a new function and new header file: + +char *setlocale(int category, char *locale); + +Notes: + All CARM functions are supported insofar as possible. + +[22.1] main(): + The runtime startup provides main() with argc and argv, as +parsed from the command line (on TOPS-20 this is the RSCAN% buffer). +However, it does NOT provide "env" or "environ". + +[22.2] ctermid(), cuserid(): + These are System V functions and are not present in BSD or ANSI. +KCC implements them as per the CARM description. + +[22.3] getcwd(), getcd(): + Implemented as per description. + +[22.4] getenv(), getlogin(), getopt(), putenv(): + CARM goofed by only describing getenv(). +KCC implements all of these, but the environment variables need further +explanation. + +[22.5] bsearch(): + This is a new function in ANSI. +[22.6] qsort(): + KCC uses the GNU (Free Software Foundation) implementation of +this function. + +Section BSD(3N): UPM(3N) BSD network functions Src: lib/network/ + +Name Module Port Comments + +struct hostent; Defs for following +struct hostent *gethostent(); GETHST - not yet done +struct hostent *gethostbyaddr(adr,l,t); GETHST E T20,10X +struct hostent *gethostbyname(nam); GETHST E T20,10X +void sethostent(flg); GETHST - not yet +void endhostent(); GETHST - not yet + +Section TRM(3X): TERMCAP functions Src: , lib/trmcap.c + +Name Module Port Comments + +int tgetent(bp,name); TRMCAP * +int tgetnum(id); TRMCAP * +int tgetflag(id); TRMCAP * +char *tgetstr(id, area); TRMCAP * +char *tgoto(cm, destcol, destline); TRMCAP * +int tputs(cp, affcnt, outc); TRMCAP * + +Tests: Compile lib/trmcap.c with -DTEST to generate a test program. + +Notes: + This should be a full TERMCAP emulation. The code is derived from +that of Gnuemacs (of the Free Software Foundation). + The functions are kept in a library separate from the normal C +library; to use them, the program must #include and the +"-ltrm" switch must be given on the KCC command line. + The terminal database file on TOPS-20 is kept in C:TRMCAP.DAT. + Programs using these functions must include declarations +of the following variables, which TERMCAP expects to use: + char PC; + char *BC; + char *UP; + short ospeed; + +Section TMX(3X): Time and Date Functions (Extended) Src: , lib/ + +Name Module Port Comments + +struct tmx * includes +struct tmz * +int time_parse(str, tmx, endptr); TIMEPA * +time_t time_make(tmx); TIMEMK * +int time_lzone(tmz); TIMEZN E * +int time_tzset(); TIMEZN E * + +Notes: + Documentation is in the file C:LIBTMX.DOC. These functions +were written by Ken Harrenstien and are distributed in a quasi-public +fashion, similar to GNU software. They are not supported by any +specific C implementation but because the code is available and is +portable to any reasonable system, it should be safe to write code +using these functions. + + These functions are kept in a library separate from the normal C +library; to use them, the program must #include and the "-ltmx" +switch must be given on the KCC command line. + + +Section KCC-1: KCC-specific general-purpose functions Src: lib/ + +Name Module Port Comments + + T20,10X Defines JSYS nums and arg vals +jsys JSYS E T20,10X Support for T20/10X syscalls + +regex* ? GNU version. +regex* REGEX * GNU version. + Not used, problems with overly + long variable names. Foo. + +syscal ITS Defines ITS system-call macro. +_scall SYSCAL E ITS C support for ITS syscalls + +int muuo(ins,d,e); MUUO T10,WAITS Execute MUUO +int calli(n,ac,ret); MUUO T10,WAITS Execute CALLI + +Notes on jsys(): + + The jsys() function has been provided for ease in performing +simple TOPS-20/TENEX monitor calls without being forced to resort to asm(). +The calling convention is: + + #include + int jsys(num, acs); + int num, acs[5]; + +The jsys number is given in "num", and registers 1 through 4 are given +and returned in the "acs" array. Offsets in acs correspond to machine +registers; thus acs[1] goes into AC1 before the call and then takes +the value of AC1 after the call. acs[0] is not used unless the call fails. + The include file defines all JSYS names, including +certain flags which tell the jsys() routine what behavior to expect from +that JSYS. This information allows jsys() to present completely regular +behavior to the C user, regardless of which JSYS is invoked. + +jsys() returns: + == 0 if it failed. The JSYS error code is returned in acs[0]. + If something was wrong with the arguments to jsys() itself, + so that no JSYS was done, the error code will be 0. + < 0 if it was interrupted. + This is only possible if the JSYS_OKINT flag was + OR'd into "num" for the call. The return value will be + -2 if the interrupt happened before the JSYS was invoked, + and -1 if it actually interrupted the JSYS. + > 0 if it succeeded. The return value will be one of 1, 2, or 3 + depending on whether the JSYS returned to .+1, .+2, or .+3. + + Note that interruption is ONLY allowed if the JSYS_OKINT flag +is set in the "num" argument. Thus, for example, the following call +can be interrupted: + jsys(WAIT|JSYS_OKINT, acs); +but this call will NOT be interrupted: + jsys(PMAP, acs); + + If the user program does direct JSYS calls itself with asm() +then the signal handling code permits these to be interrupted, and +tries to restart the JSYS when the signal handler returns, but this is +not guaranteed to work for all possible cases. The jsys() call by +contrast is always guaranteed to behave in a predictable way. + + Things are not yet completely normalized for TENEX, because +the need to handle .ICILI (illegal instruction) interrupts and emulate ERJMP +complicates matters. diff --git a/doc/kcc/libtmx.doc b/doc/kcc/libtmx.doc new file mode 100755 index 000000000..76b20fc9d --- /dev/null +++ b/doc/kcc/libtmx.doc @@ -0,0 +1,269 @@ +NAME + time_parse - parse free-format date/time string + +SYNOPSIS + #include + + int time_parse(char *str, struct tmx *tmx, char **endptr); + + struct tmx { + struct tm tm; /* See ctime(3) for definition */ + struct tmz tmz; /* See time_lzone(3X) for definition */ + char *tmx_err; /* NULL or pointer to error message */ + int tmx_flags; /* Flags for possible future uses */ + }; + #define TMX_NULL (-1) /* Unspecified items are given this value */ + + +DESCRIPTION + + time_parse() accepts a null-terminated date/time string, +"str", and parses it to fill out the specified TMX structure, "tmx". +If "endptr" is not null, it is always set to point to the remainder +(if any) of the string not consumed by time_parse(). The "time_make()" +function can be used to derive a UNIX time word of type "time_t" from the +resulting TMX structure. + + The integer return value is used to help indicate overall +success or failure. A negative value means that the parse stopped due +to conflicting specifications or ambiguous keywords, which usually +means an error; otherwise, the value is the number of non-break tokens +successfully scanned, which may be 0. Whenever the parse did not +reach the end of the string for whatever reason, the char pointer +"tmx_err" will be non-NULL and will point to a short constant message +string describing the reason. + +INPUT + + "time_parse()" is intended to parse all reasonable date and/or +time formats, specifically including everything that the TOPS-20/TENEX +IDTNC JSYS will accept. The function is actually much more flexible +than this since it will tolerate most cases of arbitrarily ordered, +delimited, and duplicated items; it makes use of contextual clues to +heuristically determine the meaning of numerical tokens. + + The string is parsed into tokens which are either numbers or +keywords, separated by punctuation break characters. Keywords may be +in any case and need only have enough of the word to uniquely identify +it. Whitespace serves to delimit tokens but is otherwise ignored. +The parse will stop when it encounters a keyword which cannot be +identified (unknown or ambiguous), or encounters a character that is +not one of the valid date/time punctuation chars: "()-,/:." plus space +and tab. It will also stop if the only plausible interpretation of +a token leads to a clash with already parsed values. + For maximum flexibility, the parsing is deliberately as +forgiving as possible, and will accept ANY construct as long as it can +make some sense out of it, regardless of how bizarre the string looks +to a human. It is guaranteed to correctly parse all strings which are +"well formatted", but it cannot be used to enforce a specific syntax +and its interpretation of some inherently ambiguous strings may not be +what the user expects. + + The following are examples of acceptable formats (most taken +from the TOPS-20/TENEX documentation). They are shown as dates and +times; a string may have only one of these, or it may have both in +either order, and sometimes their tokens can intermingle. + Dates Times + 6-Feb-76 12:34:56 or 1234:56 or 123456 + Feb-6-76 12:34 or 1234 + Feb 6 76 16:30 or 1630 or 4:30pm + Feb 6, 1976 1:23 or 123 or 0123 + 6 Feb 76 1:56AM + 6.2.1976 1:56-EST or 1:56 EST or 0156AM EST + 2/6/76 NOON + 1976-02-06 12:00AM or 12 AM or 12-am or MIDNIGHT + Wedn Febru 6 1976 5-STD or 0500-STANDARD + 1976 We Fe 6 12:30-DAYLIGHT + + Combined: + Fri Nov 13 13:08:02 PDT 1981 (output of ctime()) + Monday, May 24, 1987 3:22:23am-PST (03:22:23) (output of T20 daytime) + Wed 2 Dec 87 09:18:44 PST (RFC822 format) + 10-Nov-52 12:34-CDT + 2:25 AM, December 25, 2025 + Tues-PDT,(1988)8PM..AugUS 8 Tu (free-format example) + + Note that any USA timezone abbreviation is allowed, plus GMT, UT, + and whatever else has been added to the keyword table. + +DIAGNOSTICS + To summarize all possible return situations: + +Value tmx_err **endptr Meaning +< 0 set start of bad token Parse stopped due to clashing specs + or ambiguous keyword. +>= 0 NULL '\0' String was completely parsed. +>= 0 set start of invalid token String was not completely parsed. + (if isalpha(**endptr) is + true, this is an unknown + keyword) + +Return-value: + The exact quantity expressed by a positive return +value is not useful for most purposes; it represents the number of +valid numerical or alpha (keyword) tokens that the scan parsed. This +often but not always corresponds to the number of entries in "tmx" +that were set. A zero return value always indicates that the scan +halted before finding any such valid tokens, and nothing in the "tmx" +structure was set. + +tmx_err: + The "tmx_err" member of "tmx" will be NULL if the parse scanned +the entire string, otherwise it will be set. Note that the setting of +this variable, as well as the sign of the return value, does not +necessarily reflect either success or failure; it is up to the +application program to interpret the results. + +endptr: + The "endptr" return value provides another way to see what +terminated the scan, since it points to the first character of +whatever token stopped it. This will be '\0' at the end of the +string, and if **endptr is an alpha character then the token was an +unidentified or ambiguous keyword. + +tmx: + NOTE!! Although a "tm" structure is included in the "tmx" +structure, the values it contains are not completely consistent with +the way "tm" structures are used by the C library functions. There +are two differences: + + (1) If a structure member was not specified by the date/time string, + it is given the value TMX_NULL, defined in , which matches + no valid value for any member. In particular the "tm.tm_yday" + member will always be set to TMX_NULL since there is no standard + convention for specifying it in a text string. + + (2) The "tm.tm_year" member is set to whatever year value the + string specifies. This may be 87, or 1987, or any other number. + This conflicts with the C library interpretation where 0 is the + year 1900. + + In fact, time_parse() can be used to merely initialize a +"tmx" structure, by invoking it like this: + time_parse((char *)0, &tmx, (char *)0); + The time_make() function can be used to completely +canonicalize the structure so that the "tm" substructure is then +acceptable to the standard C library functions. + + It is important to remember that time_parse() does not, in +general, attempt to verify the correctness of the resulting "tmx" +structure. Cross-checking is limited primarily to ensuring that duplicate +specifications are identical, e.g. if the weekday is seen twice, it +must have the same value or the parse fails. time_make() will perform +the necessary content checking to derive a valid "time_t" time. + +SEE ALSO + time_make(3X), time_lzone(3X), ctime(3), time(2) + +AUTHOR + Ken Harrenstien, SRI International + 415/859-6552 + +NAME + time_make - derive a "time_t" time value from a parsed time + +SYNOPSIS + #include + + time_t time_make(struct tmx *tmx); + + +DESCRIPTION + + "time_make" can be considered the inverse of "localtime"; it +takes a broken-down time description and returns a UNIX-format time +value of type "time_t". Note that it works only on "tmx" structures, +not "tm" structures. + + It is possible to have unspecified (TMX_NULL) values in the +structure, which time_make() will attempt to default reasonably: + + Member Default value if unspecified + tm.tm_year Current year. + tm.tm_mon 0 (Jan) if year specified, else current month. + tm.tm_mday 1 if month specified, else current mday. + tm.tm_hour 0 + tm.tm_min 0 + tm.tm_sec 0 + tmz.tm_secwest tmz.tm_minwest * 60 + tmz.tm_minwest local timezone + tm.tm_isdst 0 if timezone not local. If local, + applies local DST algorithm. + + For example, "July 10" will default the year to its current +value (e.g. 1987), and the time to 00:00:00. Just giving "1980" will +produce "Jan 1, 1980 00:00:00". + The timezone, which is specified by tmz.tm_secwest, defaults +to tmz.tm_minwest*60 which in turn defaults to the local timezone. A +value of 1 for either variable is interpreted as an explicit request to +use the local timezone. + If tm.tm_isdst is set to 0, daylight savings time (DST) is never +applied. If set to 1 (to represent USA DST) then DST is always applied +(by subtracting one hour). If not specified, no DST is applied unless +the timezone is that of the local timezone, in which case DST is applied +if appropriate (determined by consulting localtime()). + + The "tm.tm_yday" (day of year) entry is not used to compute +the time unless neither the month nor day are specified. The +"tm.tm_wday" (day of week) entry is never used. However, specifying +either of these values will cause them to be checked, and time_make() +will fail if the values conflict with the other date specifications. + +DIAGNOSTICS + -1 is returned on failure, which is usually due to a parameter +being out of range. A failure return will always set "tmx_err" in the +tmx structure to point to a short constant error message. This variable +is cleared to NULL for a successful return. + +SEE ALSO + time_parse(3X), time_lzone(3X), ctime(3), time(2) + +AUTHOR + Ken Harrenstien, SRI International + 415/859-6552 + +NAME + time_lzone - get local timezone information + time_tzset - set local timezone information from system + +SYNOPSIS + #include + + int time_lzone(struct tmz *tmz); + + int time_tzset(void); + + struct tmz { + long tmz_secwest; /* Secs west of GMT (-12/+12 hrs) */ + int tmz_minwest; /* Same, in mins. (tmz_secwest/60) */ + int tmz_dsttype; /* DST type if any (0 = none, 1 = USA) */ + char *tmz_name; /* Standard timezone name */ + char *tmz_dname; /* DST timezone name (if any) */ + }; + +DESCRIPTION + + "time_lzone()" fills out the structure pointed to. It +does this by copying the contents of the static area indicated by +time_tzset(), which it calls if necessary. + + "time_tzset()" attempts to get the local timezone information +from the system or environment (this is system dependent) and uses +this to set (or re-initialize) its static TMZ structure. If +successful, a pointer to this structure is returned; if it could not +be initialized, NULL is returned. It is not necessary to invoke this +function explicitly unless the program has some reason to believe that +the timezone may have changed during execution. + +SEE ALSO + time_make(3X), time_parse(3X), ctime(3), time(2) + +BUGS + Time zones are a big mess, especially on Unix where there are + several incompatible (and unportable) methods of tracking them. + These functions attempt to standardize the information more sensibly. + +AUTHOR + Ken Harrenstien, SRI International + 415/859-6552 + diff --git a/doc/kcc/libtrm.doc b/doc/kcc/libtrm.doc new file mode 100755 index 000000000..ab48f2479 --- /dev/null +++ b/doc/kcc/libtrm.doc @@ -0,0 +1,24 @@ +libtrm is the library for fancy terminal stuff. currently, libtrm +only contains trmcap, the package for slurping in termcap entries, +returning requested portions, creating cursor movement strings, and so +forth. for full trmcap documentation see termcap(3X); for termcap +entry documentation see termcap(5); + +trmcap contains these functions: + + int tgetent(bp, name); get entry for terminal name into buf bp + int tgetnum(cap); get the numeric value of capability cap + int tgetflag(cap); returns 1 if the given cap is present, else 0 + char *tgetstr(cap, area); return string value for capability cap + char *tgoto(cm, h, v); return cursor addressing string to get to h,v + void tputs(string, nlines, outfun); + output string with padding. see man page. + char *tparam(string, outstring, len, arg0, arg1, arg2, arg3); + expand termcap entry into outstring + +hideous global constants which trmcap defines: + + short ospeed; output speed of the terminal + char PC; character to use for padding to terminal? + char *BC; pointer to "bc" capability in termcap entry + char *UP; pointer to "up" capability in termcap entry diff --git a/doc/kcc/news.txt b/doc/kcc/news.txt new file mode 100755 index 000000000..5ce44fcd5 --- /dev/null +++ b/doc/kcc/news.txt @@ -0,0 +1,205 @@ +03/17/87 KCC 560, LIBC 124: <2,,1> Binary KCC update + + A copy of CC.EXE.560 is included which fixes a number of +annoying bugs that users encountered with KCC 557. The sources are +still those for 557, however. The library is unchanged. + +03/06/87 KCC 557, LIBC 124: <2,,1> Third formal distribution snapshot + + IMPORTANT: this version of KCC is incompatible with previous +versions! The way that structures are returned from functions has +changed, and the layout of "char" and "short" objects in structures has +also changed. In order to enforce this, the symbol $$CVER has been +updated, and any attempt to load .REL modules which have been produced +by incompatible versions of KCC will cause LINK to complain with an +error message similar to this: + + %LNKMDS Multiply-defined global symbol $$CVER + Detected in module PRINTF from file C:LIBC.REL + Defined value = 1000001, this value = 2000001 + +This is easily remedied by re-compiling old modules. Fortunately, no +further incompatible changes are expected to be necessary. + + Nothing has really changed from the user's viewpoint. However, +there are several new features available, and some inefficiencies +corrected. The noteworthy changes are listed below, very briefly; +as usual, CC.DOC should be consulted for more complete and informative +details. + +KCC: --------------------------------------------------------------- + +KCC - Bug fixes: + A multitude of minor bug fixes too trivial to mention, almost +all having to do with incorrectly optimized code. One that wasn't +trivial was that {char c, *cp = &c;} used to produce an (int *)! + +KCC - Incompatible changes: + * "shorts" are now 18 bits long (halfwords), with sizeof(short) == 2. + * The mechanism for returning structure values from functions +is different. This is an internal change, invisible to the user, which +is much more efficient than the previous method. + * Structure members of type "char" and "short" are now packed +differently (more compactly). Any structure using these types will be +laid out differently in storage. + * Integer narrowing and widening is now done properly in all +situations. This may cause incorrectly written code to behave +differently. + * Implicit arithmetic conversions now follow the ANSI +value-preserving rules rather than the old K&R and H&S +unsigned-preserving rules. Ambiguous code may behave differently. + * "float" values are no longer automatically converted to "double", +except for function arguments. This conforms to the ANSI draft. + * The "signed" keyword (introduced by ANSI) has been implemented. + * "volatile" and "const" (also new from ANSI) are now reserved +words (but unimplemented). + +KCC - Extension: New data types: + 5 new data types have been introduced, which act like "char" +but with different byte sizes. You can now manipulate signed or +unsigned bytes of 6, 7, 8, 9, or 18 bits. This is non-portable and +intended strictly for PDP-10 machine-dependent code where efficiency +is desirable. + +KCC - Efficiency improvements: + The change to the structure handling mechanism falls in this +category. Structure copies used to always take two subroutine calls +and two copies; they now use a single in-line BLT (or a series of +single-word moves, whichever is best), and are much faster than +element-by-element copying. + KCC's constant initialization code has been improved to the point +where almost all constants are now initialized at load time rather than +at run time; a similar mechanism eliminates the code that used to generate +string constant pointers. You will see a significant difference with code +that uses many string literals; both startup time and program size are +reduced. + KCC's pointer arithmetic for byte pointers is MUCH better. +Pointer comparison and subtraction formerly used subroutine calls and +many, many instructions; both now use a handful of in-line +instructions and some magic numbers. + There are no more calls to internal run-time subroutines. +All of the operations which used to require this are now compiled +in-line, including double-int and int-double conversion, pointer +operations, and structure copying. + +KCC - unsigned and signed data: + KCC now fully supports "unsigned int" operations. Some code +that uses unsigned integers will now compile differently. Division in +particular needs many more instructions. Any integer type, "char" in +particular, may be declared as "signed" and will behave accordingly. + +KCC - Switch changes: + -L= Passes in the command string to the linking loader. + -v= (Verbosity) has been expanded; see CC.DOC. -v alone prints out + everything, including the loader command string. + -l Libraries loaded with the -l switch are now loaded in /SEARCH + mode (they evidently weren't before). + +KCC - Miscellaneous + -d=sym now produces a *.CYM file instead of *.SYM, to avoid +conflicts with LINK output files. + -P=ansi+kcc is now the default. The effects are minor and documented +in CC.DOC. The three new ANSI keywords of "signed", "const", and "volatile" +are recognized, although only the first has any real effect. + + +LIBC: --------------------------------------------------------------- + + More minor bug fixes to the LIBC stdio routines. + + open() now attempts to track down and expand logical device +names completely (thus performing what the monitor should be doing but +isn't). Thus, open("X:subdir/filename.ext",0) will work even if X is +a search path. Previously only the first device/directory could be tried. +This permits KCC #includes to work with C: defined as a search path. + + malloc() no longer allocates pages 770-777 (non-extended) or +37770-37777 (extended), so that obsolete forms of DDT can be mapped therein. + +12/07/86 KCC 537, LIBC 93: <1,,1> Informal distribution snapshot + Various additional bug fixes. +There may be some stray files and other cruft as this was made just +so that Systems Concepts could get the latest stuff; some things haven't +been checked out. + +10/21/86 KCC 534 + Fixed a register allocation bug which got tickled by very large +floating-point expressions. +LIBC: fixed a minor scanf bug. Fixed system/vfork/exit to work properly. + +09/28/86 KCC 533, LIBC 14: <1,,1> Second formal distribution snapshot + + This is called a "snapshot" to emphasize that while the sources +in this distribution should be consistent and working, the compiler and +library are still under active development to remove known quirks and +deficiencies, and have already changed. + As before, all .REL files must be recompiled; the STDIO +structures are different and there are new C runtime hooks. Most +importantly, the symbol $$CVER is now defined in every module +(currently it is <1,,1>) so as to detect any future conflicts when +loading modules that were incompatibly compiled. + Various change notes follow. For all of them, see the CC.DOC +file for more details. + +KCC: --------------------------------------------------------------- + +KCC - Command line + There are several new switches, and the way KCC interprets +filenames is slightly different. A file with a .REL extension is +always ignored, but is passed on to the loader. A file without any +extension is special if the -q switch is given; it is only compiled if +the .C source is more recent than the .REL binary. + New switches: + -A Specify nonstandard assembler header file (old meaning of -H) + -H Specify nonstandard path for #include <> files. + -i Loader: load program to run with extended addressing. + -L Loader: nonstandard path for library files. + -l Loader: search specified library + -o Loader: specify .EXE filename. + -P Set portability level. + -q Compile extension-less files conditionally. + +KCC - General + There are no real changes in code generation. A couple of +over-optimization bugs were fixed, and a couple of other optimizations +added. + KCC now generates its own assembler header based on the target +CPU, assembler, and system; the file C-HDR.FAI no longer exists. + Two more KCC extensions were added: the `ident` quoting syntax, +and the asm() in-line code mechanism. #asm and #endasm must now appear +only within a function body. + KCC identifiers are now unique up to 31 characters, as per the +ANSI draft (external symbols are still only unique up to 6). + The runtime variable $EXADF no longer exists. The decision on +whether to run extended is now made at load time, either with the -i switch +or by loading C:LIBCKX as the first module. + +LIBC: -------------------------------------------------------------- + +LIBC - CHAR + ispunct() was "fixed" to exclude space. CARM claims space is +included, but neither ANSI nor BSD does so. We assume this is a mistake +in CARM. + +LIBC - STRING + The routines memchr, memcmp, memcpy, memset were added from ANSI. + +LIBC - STORAGE + free(), malloc(), and realloc() now behave as per ANSI when given +NULL or zero as arguments. + +LIBC - STDIO + The "update" mode is now supported for streams. In addition +to this, the library implements the ANSI concept of text vs binary +streams. The 'R' specification was flushed; 'b', '7', '8', '9', 'C', +and "C-" were added. + +LIBC - other + system() was added. + +07/26/86 KCC 512, CLIB 225: First formal distribution version. + + If you already had a version of KCC on your system, you will need +to recompile any .REL modules generated by the old version, because the +new KCC uses a different STDIO package. .EXE files do not need to be +recompiled. diff --git a/doc/kcc/port.doc b/doc/kcc/port.doc new file mode 100755 index 000000000..684eacd9b --- /dev/null +++ b/doc/kcc/port.doc @@ -0,0 +1,302 @@ + INSTALLING AND PORTING KCC + + This file tries to document various things about porting KCC to other +systems; including how to bring up new versions on systems which already have +KCC. + + A "port" involves two major things: the KCC compiler itself, and +the C library (LIBC). They are interdependent, and since the compiler is used +to compile itself, things have to be done carefully. + + KCC supports all CPUs: KA, KI, KS, KL, KX. System is irrelevant. + LIBC supports only TOPS-20. There is partial (but not complete) + support for TOPS-10, WAITS, and ITS. + +The usual situations are these: + +(1) Installing KCC and the library for the first time. +(2) Installing a new version of the C library or KCC. +(3) Using KCC as a cross-compiler for another CPU or system. +(4) Porting KCC and the library to a different CPU or system. + +Other references: + For (1) see the distribution INSTAL.DOC file. + The KCC USER.DOC section about cross-compiling. + The C library file LIBC.DOC. + +Building and installing a compatible new version of KCC/LIBC for your current +system: + + If the new version has exactly the same site-dependent parameters +(C-ENV.H and CCSITE.H) then you can simply install the new binary, without +bothering to recompile from the sources. + + If your site-dependent parameters are different from that of the +distributed KCC then you will need to recompile from the sources. To do +this, + 1. Make sure that the C-ENV.H in the KCC source directory is correct + (or, if it doesn't exist, that the version in the include directory + is correct). Ditto your CCSITE.H or whatever is different. + 2. On TOPS-20, simply invoke CC.MIC, as in "DO CC". Or do it by hand. + Then install the resulting NCC.EXE on your system as "CC". + + NOTE: if the new version of KCC makes some changes in its runtime +assumptions, you may need to change some other things. The most that may be +required is to follow the cross-compilation procedure as if porting to a new +system. If you know what is going on, you may be able to get by with less +than a full new installation. + +Using KCC as a cross-compiler: + + This assumes you want to set things up on your source system so +that you can compile programs into binaries which can then be moved to +a target system and executed there. This can sometimes be useful if the +target system is not really able to run KCC itself for whatever reason. +Also, we assume that the source system is a TOPS-20, since KCC is fully +supported for that system and the existence of logical names makes things +easier. + +[1] Set up a new directory to serve as the standard include (and library) +directory for the target system's files. For the sake of the following +discussion, call this directory NC:. + +[2] Install in NC: an appropriately editted copy of C-ENV.H. + In particular, CPU_xxx and SYS_xxx must be set properly. + +[3] Build a program called, for example, XCC which passes its arguments on + to an invoked CC with the addition of appropriate switches. + See the program TCC.C in the KCC source directory for a sample; + this program runs on TOPS-20 and compiles code for TENEX. + The following switches should be set: + -L to specify the library directory path (NC:) + -H to specify the standard include file directory path (NC:) + -x to specify the target CPU, system, and assembler + And others such as -A if necessary. + +[4] Use XCC to recompile and rebuild the entire C library. Install the + new LIBC.REL in NC:. + +[5] That's it. KCC does not need to be rebuilt, since XCC will simply + invoke KCC with the appropriate switches set. Whenever making + a program to run on the target machine, just use XCC. + +KCC/LIBC Cross-compilation (using logical names): + + These are the complete steps to follow when building a new KCC and +LIBC for another CPU or system. You may be able to get by with doing less, +if you know what is going on and what the changes are. You may have to +do more, if the new CPU or system is not fully supported. + + This procedure for cross-compiling KCC/LIBC for a different CPU/system +relies on TOPS-20 logical names, and assumes that the logical name C: is used +by the source machine's KCC to refer to both the standard include directory +and the library directory. If some other logical name is used, substitute +that for C: in the following discussion. + +[0] Redefine C: to be a search path pointing FIRST to a "new" standard + directory, and SECOND to KCC's standard directory (normally + this is the system-wide logical name C:). + This requires the user to: + @define C: , C: + +[1] Edit C:C-ENV.H for new configuration; write out to C: (new dir). + In particular make sure that the SYS_xxx and CPU_xxx defaults are set. + Make sure that any local copies of C-ENV.H (in the KCC or LIBC + source directories) are identical. + The same applies to any other .H files that need to be changed; new + versions should be put in C: (new dir) and local copies made identical + or flushed. + +[2] Connect to the LIBC source directory. + Delete all .REL files (@DEL *.REL,<.*>*.REL) + +[3] Rebuild entire library with the LIBC.MIC file (@DO LIBC). + Edit LIBC.MIC beforehand to add any -x= switches that are necessary, + for example -x=ki, -x=klx. These switches should match with + the definitions in C-ENV.H!!! + +[4] Install new LIBC.REL into C: (@COPY LIBC.REL.0 C:*.*.*) + +[5] Also install the new LIBCKX.REL file into C:. This is necessary in + order to allow loading of extended-addressing programs. If your + machine does not support extended addressing, of course, you can + ignore this file. + + --- At this point the new library is ready! --- + +[6] Build a new KCC: + Connect to the KCC source directory. + Make sure that the site-dependent defaults in CCSITE.H are okay. + Other parameters can be changed in CCPARM.H if necessary. + Make sure that the local copy of C-ENV.H is either flushed or + is the same as the version in the new C:. + +[7] Delete all the old .REL files (@DEL *.REL). +[8] Compile and load all the C modules with the CC.MIC file (@DO CC). + Edit CC.MIC beforehand to add any -x= switches that are necessary, + for example -x=ki, -x=klx. + This will automatically load up some library routines from the new + LIBC. You now have a binary KCC for the target system. + +[9] To install on your own system: + you can replace the old C: by new C: in either of two ways: + A. (quick) Copy all new C: files over to old C: + Then undefine your job-wide C:. + B. (cautious) Copy all remaining files from old C: to new C:. + Test it out. If no problems, then revise the system-wide + logical name. If problems, swap the names back. + +[10] To install on another system: + On target system, set up the include directory. + Copy all files from new C: to target (.H files plus LIBC.REL), + then all remaining files from old C: to target. + Copy the new CC binary to whereever the target's system progs live. + If desired, copy the LIBC and KCC sources too. + +Cross-compilation without logical names: + + This is a little harder because the source and target directories have +to be more explicitly specified. In the following discussion, the strings OLD +and NEW are assumed to represent the paths for the old (existing) standard +directory, and the new directory where the new .H files and library will be +written and stored. + +[1] Edit C-ENV.H for new configuration; write out to NEW. + In particular make sure that the SYS_xxx and CPU_xxx defaults are set. + Make sure that any local copies of C-ENV.H (in the KCC or LIBC + source directories) are identical. + The same applies to any other .H files that need to be changed; new + versions should be put in NEW (new dir) and local copies made identical + or flushed. + +[2] Connect to the LIBC source directory. + Delete all .REL files. + +[3] Rebuild entire library. The LIBC.MIC file shows which files need + to be compiled and loaded into a library. + All compilations should be of this form: + CC -c -LNEWLIB -INEW -IOLD -x=ki module.C + Where: + "-x=ki" illustrates how a specific CPU must be selected. + "module" is the library module being compiled. + "NEWLIB" is the path for library modules in NEW, e.g. -LC:LIB=.REL + The need to specify this is an unfortunate artifact of + the way library load requests are inserted into the code. + +[4] Build all the .REL files together into a library; the first two + files must be CPU.REL and CRT.REL. Move this file into the NEW + directory. + + --- Now start building the new KCC --- + +[5] Build a new KCC: + Connect to the KCC source directory. + Make sure that the site-dependent defaults in CCSITE.H are okay. + In particular make sure that LIBPREFIX and LIBPOSTFIX match + what was given in the -LNEWLIB switch while compiling LIBC. + Other parameters can be changed in CCPARM.H if necessary. + Make sure that the local copy of C-ENV.H is either flushed or + is the same as the version in NEW. + +[6] Delete all the old .REL files. +[7] Compile and load all the C modules specified by the CC.MIC file. + Use the same form as for compiling the LIBC modules, including + the -L, -I, -x, and -A switches. + You will then have a binary KCC suitable for the target system. + +[8] To install on your own system: + you can replace OLD by NEW in either of two ways: + A. (quick) Copy all NEW files over to OLD. Install the KCC binary. + B. (cautious) Copy all remaining files from OLD to NEW. + Test it out. If OK, install the KCC binary. + +[9] To install on another system: + On target system, set up the include directory. + Copy all files from NEW to target (.H files plus LIBC.REL), + then all remaining files from OLD to target. + Copy the new CC binary to whereever the target's system progs live. + If desired, copy the LIBC and KCC sources too. + +Thoughts on future port to ITS: + +Main problem is that the only linking loader available on ITS is +STINK, which only understands STINK format .REL files. There may be +an old version of the DEC linker available, but this is non-supported +and painful to use. Finally, MIDAS can produce either STINK or DECREL format; +the ITS version of FAIL can do the same. This gives us only three +possibilities, in increasing order of generality: + (1) Use ITS FAIL, producing STINK format. + (2) Make KCC produce MIDAS output. Use MIDAS, with STINK format. + (3) Make STINK understand DECREL format. Use MIDAS or FAIL. + +Note that if KCC is ever improved to bypass the assembler phase by +outputting .REL files directly, it will either have to also know about +STINK format, or STINK will have to know about DECREL format. The latter +is most general. + +Actual porting: + +Approach 1: build a KCC elsewhere, dump in old DECSAV format. FTP over + and convert to SBLK executable format. +Approach 2: generate complete set of .FAI files and then FTP them. + Run ITS FAIL to generate STINK format rels, + then STINK them together to produce KCC and CLIB. +Approach 3: generate complete set of .MID files and FTP them. + Run MIDAS and then STINK the results together. +Approach 4: generate complete set of .REL files and FTP them. + Modify STINK to understand DECREL format, load files with STINK. + +Fixing KCC to produce valid MIDAS output will require: + (1) Modifying KCC to produce common assembler subset. Can do. + (2) Modifying the syntax of all assembler runtimes. This is + the real problem. Only hope for dealing with this reasonably + is to change everything to use new #ASM feature; then KCC + can conditionalize the syntax depending on system. + +Library routines required by KCC: + +(this list can be generated by loading all .REL modules and then +giving the /UNDEF switch to LINK) + + $$$CPU 0 + $$$CRT 0 ; * C Runtime stuff, in CRT.C + $BPCNT 413166 + $BYTE 517604 + $DFIX 437543 + $NSPOP 472623 + $NSPUS 472615 + $START 403165 + $SUBBP 413303 + $ZERO 463273 + + Sys-dependent routines + .CPUTM 403072 ; * All systems supported + EXIT 441516 ; * + GETPID 517376 ; * CCASMB for temp files + JSYS 517315 ; T20 - CCASMB for PRARG% + PFORK 517675 ; T20,10X - CCASMB for asm/link invoke +****> UNLINK 517225 ; T20,10X,ITS - CC to flush asm files + ; Needs LOOKUP+RENAME for T10/WAITS + STDIO ; * STDIO stuff. Should not be sys-dep + .SIOS 0 + FCLOSE 517443 + FFLUSH 517666 + FGETC 517212 + FOPEN 517417 + FPRINT 516663 + FPUTC 515464 + FPUTS 517440 + FWRITE 516627 + SPRINT 517407 + UNGETC 412045 + Stg alloc ; * Depends on S/BRK, all sys supported. + FREE 442156 + MALLOC 517154 + REALLO 517235 + STRING ; * (no sys-dep stuff) + STRCAT 414767 + STRCMP 417712 + STRCPY 517371 + STRLEN 517335 + Misc library + QSORT 457226 ; * (no sys-dep stuff) diff --git a/doc/kcc/signal.doc b/doc/kcc/signal.doc new file mode 100755 index 000000000..9f91458ba --- /dev/null +++ b/doc/kcc/signal.doc @@ -0,0 +1,240 @@ + KCC UN*X SIGNAL SIMULATION + + This file provides some user-oriented information on how to +use the KCC simulation of the Un*x signal mechanism. + + Unfortunately, there is no single consistent signal mechanism +used by all UN*X-type systems. The currently known schemes fall into +three basic categories: + + (1) Traditional: V7, SYS V, ANSI. [signal()] + (2) Better: 4.2 BSD / SUN [signal(), sigvec(), sig*()] + KCC=> (3) Best: 4.3 BSD [signal(), sigvec(), sig*()] + +Attributes: + Handler reset Signal mask Calls restarted + (1) yes no no + (2) no yes no + KCC=> (3) no yes yes + +Handler Reset: + In traditional Un*x implementations, the call of a signal handler +automatically resets that signal's handler to SIG_DFL (default handling, +normally termination). The handler has to do invoke signal() again if +it wishes to catch additional signals. 4.3BSD and KCC do not do this +reset. + +Signal mask: + BSD introduced a signal mask which allows signals to be kept +pending until the mask no longer blocks them from being handled. +Whenever a handler is called, the corresponding bit in its mask (at +least) is always set; thus there is no need to reset the handler to +SIG_DFL. This is much more robust as there is no gap during which +quickly repeated asynchronous signals can mistakenly kill a process. + BSD added a number of new calls to handle this mask. They are +sigvec (general-purpose replacement for signal), sigsetmask, sigblock, and +sigpause. KCC implements all of these. + +Call restart: + In both (1) and (2) a signal during certain system calls would +cause those calls to return -1 with errno set to EINTR. In 4.3BSD this +was changed so that normally such calls are restarted automatically when +a signal handler returns. A flag bit with sigvec() allows the old action +of EINTR to still be taken. A new call, sigreturn(), was added to permit +true context restoration. KCC implements this too. + +More on system call interruption: + On Un*x signals may only interrupt the following calls: + read(), write() - slow devices only (never DSK:) + wait() + ioctl() on a slow device (esp. TTY:) + any call that locks an inode - open(), creat()? + Calls which are not restarted by 4.3BSD if interrupted: + pause(), sigpause() + + KCC permits only the above USYS calls to be interrupted. For all +but pause() and sigpause(), the calls will be restarted automatically +unless specifically requested by the SV_INTERRUPT bit in a sigvec call. + +Changing the signal mechanism: + To accomodate cases where it is difficult to change the user +code (e.g. during initial stages of porting some software), the signal +mechanism can be changed from 4.3BSD to 4.2BSD or V7/SYSV by including +the following code in the module where "main" is defined: + + #define _URTSUD_DEFAULT_SIGS _URTSUD_xxx_SIGS + #include + +where "xxx" is one of SYSV, BSD42, or BSD43. + +For additional information more detailed than that provided in this +file, consult the files CODSIG.DOC and SIGVEC.C in the source directory. + +KCC-supported signals: + + /--------------- (A)synchronous or (S)ynchronous. + | /------------ (P)anic channel or not. + | | /--- Only seen for (J)SYS or (U)ser; * = both. + Code: [AS][P-][O-][JU*] + \------- O means if interrupt PC is user-mode, the + PC is that of the offending instruction and + not the next one (as for all other cases). + +Signal PSI Code + +SIGINT x A--* TTY Interrupt (interactive) +SIGQUIT x A--* TTY Quit (interactive) +SIGALRM x A--* Alarm Clock (TIMER%) + +SIGCHLD .ICIFT A--* Inferior fork termination +SIGFPE .ICFOV S--U Floating Point overflow +SIGFPE .ICAOV S--U Arithmetic overflow +SIGSEGV .ICPOV SP-U PDL overflow +SIGILL .ICILI SP-* Illegal Instruction +SIGBUS .ICIRD SPO* Illegal memory read +SIGBUS .ICIWR SPO* Illegal memory write +SIGPIPE .ICDAE SPO* Device or data error +SIGXFSZ .ICQTA SPO* Quota exceeded +SIGXFSZ .ICMSE SPO* Machine resources exhausted + +SIGT20EOF .ICEOF S--J EOF condition on input +SIGT20NXP .ICNXP S-O* Ref to non-ex page + + If a panic signal occurs during execution of a USYS call then +the program will be halted with an error message, even if a handler is +defined for that signal. It is possible to ignore panic signals with +SIG_IGN although this is unwise. + The default action (SIG_DFL) for a particular signal varies. +As long as a signal is not set to anything, its action remains +whatever the TOPS-20 system action is; panic signals will cause the +process to be halted, and all other signals are ignored. If a signal is +explicitly set to SIG_DFL then its default action will become whatever +the Un*x default action is. See the include file for a listing +of all signals with their default actions. + A "core" file is never made, since this is unnecessary and unhelpful +on TOPS-20/TENEX. + +Use of SIGINT and SIGQUIT: + + At startup there are no interrupt characters. That is, +t_intrc and t_quitc of the "tchars" ioctl structure are both -1. +Simply setting these characters does not cause either to generate +signals unless the signal handler has been explicitly set to something +by signal(). If explicitly set to SIG_DFL then the signal will +terminate the process, since this is the default Un*x action. + + These interrupt characters can only be some form of control character, +including DEL. + +Signal Handlers: + + When a signal handler is invoked, it is called with the following +arguments: + void sighandler(sig, chn, scp) + int sig; /* Signal # */ + int chn; /* PSI channel # (T20/10X) */ + struct sigcontext *scp; /* Pointer to context */ + +Since more than one PSI channel is mapped into a single signal, the +"chn" variable allows the handler to distinguish between them if +necessary. The signal context structure is defined in and +contains the complete context of the signal, including the interrupt +PC and flags, saved ACs, and old signal mask. Code which references +this structure is not portable to machines other than the PDP-10, but +at this level things are non-portable anyway. + + Returning from the handler will resume the process where it was +interrupted. longjmp() to some other location will work as long as the +signal handlers are not nested. + +WARNING: + Any time you write a signal handler routine, you must be aware +of what you might be interrupting and how the handler actions may +affect the rest of the program. There are too many subtleties to go +into more than a few of the important aspects here. For example, +while it should be okay to use USYS calls within the handler (the +interrupt system ensures that these are treated as atomic), it is +almost NEVER okay to invoke any library routine which alters static +data, such as "ctime". In particular, none of the storage allocation +facilities such as "malloc" should be called, because the program might +have been interrupted out of a call to malloc, and the data structures +will be in an inconsistent state. It is also risky to use any of +the standard I/O library routines, for similar reasons. + +WARNING: + The signal code goes to a great deal of trouble to ensure that +if a user program JSYS is interrupted, it can be continued on return +from the handler. But the TOPS-20/TENEX PSI scheme is so complex and +messed up that it is impossible to guarantee that this will always +work. To be COMPLETELY safe, you can use the jsys() facility, which will +never permit its JSYS to be interrupted unless the JSYS_OKINT flag is +set, and even then will provide you with a definite indication that +a signal was handled. + + +Additional notes: + +What "a/synchronous" means: + + A SYNCHRONOUS interrupt is one that happens at a specific PC +due to the instruction at that location. Typical examples: illegal +instruction interrupts (which can include JSYS calls), floating-point +exceptions, and illegal memory references. For these types of +interrupts the PC is significant and it or the contents it points to +may need to be checked to determine what to do, because simply +continuing the process at that PC will very likely just generate +another such interrupt. + + An ASYNCHRONOUS interrupt is one that may happen at ANY time, +regardless of place; these are generated by events external to the +program. Typical examples: TTY input interrupts, timer interrupts. +For these, the PC is unimportant except that it should be preserved +and restored if the interrupt handler wishes to continue whatever was +interrupted. + + No UN*X C signal handler has the capability of returning from +handling a synchronous interrupt. In fact there is no mechanism +provided for a signal handler to find out what its return PC is. +(it's possible, with trickery, but I've never seen an example). +4.3BSD (as opposed to 4.2 or any other Un*x) now makes this possible +by providing the handler with a pointer to a saved-context structure! + + Note that some signal handlers return to normal code by +means of longjmp(); this is particularly true for alarm() handlers. +ANSI specifies that longjmp should restore the environment properly +even from within a signal handler, but is not required to do anything +meaningful if called from a nested signal handler. KCC supports this +use of longjmp(). + +Extensions to sigvec(): + + For additional flexibility, the "sigvec" structure has been +extended to include additional parameters. Some new flags in sv_flags +are used to indicate when the additional structure members are +significant. + +The things that can be specified, independently of each other: + SV_XINTLEV: ON If handler should run at interrupt level. + SV_XASMHAN: ON If handler is special assembler routine (ACs not saved, + no args given). Otherwise, normal C handler. + SV_XOS: ON If the sigvec structure should be checked for: + (1) Exact PSI channel # to use for this signal (0 = existing). + (2) What PSI level to use (0 = existing). + (3) .TICxx code (plus 1) to ATI% to this channel (0 = none). + +Not all of the flags work yet: + + Currently only SV_XINTLEV is implemented. It works to use +longjmp() within handlers called with this flag! + + SV_XASMHAN is not yet used. If added, it will be an error to +specify SV_XASMHAN without SV_XINTLEV; that is, if the handler is an +assembly routine, then it MUST run at interrupt level. + + SV_XOS is not yet used. If added, specification of a positive +.TIC code will always replace any existing code by the new one, and +use of -1 will always clear any existing code. If the value is 0, +however, then the meaning depends on whether a channel # was +specified. If the channel # was given, 0 is the same as -1. +Otherwise, if no channel # was given, then 0 means leave any existing +code alone. diff --git a/doc/kcc/usys.doc b/doc/kcc/usys.doc new file mode 100755 index 000000000..a7b98d41f --- /dev/null +++ b/doc/kcc/usys.doc @@ -0,0 +1,450 @@ + USYS.DOC - KCC Un*x System-call simulation + + This file documents various things about the USYS library routines, +which are intended to provide simulation and support for Un*x system +call functions. + + Specifications for the interfaces were taken from the March +1984 4.2BSD Unix Programmer's Manual (UPM), plus the 4.3BSD man pages, +and all code here works as described in those references unless +otherwise documented. + + Implementors should also read the CODSYS.DOC file in the +source directory. + +Contents: + Intro - listing format + Summary of USYS routines + Definitions + Individual Routine Descriptions (as needed) + +Library function listing format: + +Name Module Port-status Comments +(routine name) (source file) (see below) + + Port-status code: + E = file #includes "c-env.h" for environment config. + - runs on the given sys, one of: T20,10X,T10,WAITS,ITS. + *10 = portable to all PDP-10 OS (T20,10X,T10,WAITS,ITS) + * = fully portable (either no OS-dependent stuff, or a + fully-portable conditional exists) + + Comments: + "U" means the call is USYS_macro enclosed and cannot be + interrupted by signals. + "-" means it isn't enclosed + (this better be cuz interrupts don't affect the call!) + "I" means it is interruptible (i.e. can return EINTR error). + "IC" means interrupts may be continued within the call. + +USYS function summary: Src: lib/usys/ + +Name Module Port-status Comments + +access ACCESS T20,10X U 10X only partial. +alarm ALARM T20 U +brk SBRK *10 U +chdir CHDIR T20,10X U +chmod CHMOD T20,10X U +chown CHOWN T20,10X U +close CLOSE T20,10X,ITS U +creat OPEN T20,10X U +dup DUP *10 U +dup2 DUP *10 U +errno (data) URT *10 - +exec[lv][ep] FORK T20,10X U +exit EXIT *10 - +fchmod CHMOD T20,10X U +fchown CHOWN T20,10X U +fcntl FCNTL *10 U +fork FORKEX T20,10X U +forkexec FORKEX T20,10X U KCC-specific routine. +fstat STAT T20,10X U (also: xfstat) +ftime TIME *10 - +geteuid GETUID T20,10X U +getpid GETPID *10 U see format note. +gettimeofday TIME *10 - +getuid GETUID T20,10X U +gtty SGTTY T20 U +ioctl IOCTL T20 UIC Partial. +kill SIGVEC T20,10X U +lseek LSEEK T20,10X U +open OPEN T20,10X U (Uses BSD flags; mode not supported) +pause PAUSE *10 -I Always returns with EINTR +pipe PIPE T20 U (monitor must have PIP: device) +psignal PSIGNA *10 - +raise SIGVEC T20,10X U (ANSI function, not syscall) +read READ T20,10X U +rename RENAME T20,10X U +sbrk SBRK *10 U +sigblock SIGVEC T20,10X U +signal SIGNAL T20,10X U +sigpause SIGVEC T20,10X UI Always returns with EINTR +sigreturn SIGVEC T20,10X U +sigsetmask SIGVEC T20,10X U +sigstack SIGVEC T20,10X U +sigvec SIGVEC T20,10X U +sleep SLEEP *10 -I (returns no value) +stat STAT T20,10X U (also: xstat) +stty SGTTY T20 U +tell LSEEK T20,10X U +time TIME *10 - (also: tadl_xxx routines) +times TIMES *10 - +unlink UNLINK T20,10X,ITS U (10X doesn't expunge) +utime UTIME T20,10X U +vfork FORK T20,10X U +wait WAIT T20,10X UIC +write WRITE T20,10X,ITS UIC +_exit EXIT *10 U +_runtm URT *10 (internal) C programs start here. +_urtsud (data) URTSUD *10 (internal) Runtime startup defs. + +DEFINITIONS: + + The UPM introduction contains some definitions which provide +a convenient way to start describing how the KCC simulations differ from +Un*x; some concepts are supported and others are not. + +Process ID (PID): Supported, see long description. +Parent Process ID: Supported, see long description. + +Process Group ID: not implemented +TTY Group ID: not implemented + +Real User ID: Supported. + The user ID on T20/10X is the "user number". + +Real Group ID: not implemented +Effective UID, GID, and Access Groups: not implemented +Super-user: not implemented +Special Processes: not implemented + +File Descriptor (FD): Supported. + Small non-negative integers in the range 0 to 63 inclusive. + FDs 0, 1, and 2 correspond to standard input, output, and error output. + On T20/10X these are initially set to the JFNs .PRIIN, .PRIOU, .CTTRM. + You can obtain the JFN for a FD by using the fcntl() call. + +File Name: Supported. + On T20/10X, up to 39 chars per component. Must be 7-bit ASCII. + Can quote with ^V. + +Path Name: Supported. + Un*x style / paths are permitted, where foo/ is taken to mean foo + is a subdirectory of the current directory. + + If the monitor worked right, a filespec of the form "C:foo/bar.h" could + be turned into "C:<.foo>bar.h", assuming standard-type relative + directory fixes, and we would win. But no. Instead, we have to do + the work manually: the logical device is recursively expanded until + an end device-directory pair is found, at which point the file + lookup is tried. If it fails, the expansion/traversal continues. + +Directory: Supported. + <.> and <..> work on some T20 systems (Stanford mods). + +Root Directory and Current Working Directory: + A directory of the form "/foo" is taken to mean "". + The notion of a current working directory is supported. + +File Access Permissions: + Each set of T20/10X owner, group, and world access bits + corresponds to a set of Un*x owner, group, and other bits. + Un*x T20/10X + 04 040 Read access + 02 020 Write access + 01 010 Execute access + - 04 Append access + - 02 Directory listing access + - 01 - + Thus, a call such as "chmod(foo.bar, 0644)" will set the T20/10X + protection of "foo.bar" to 604040. + There is no way a user program can either read or set the last + three T20/10X protection bits, except by doing a CHFDB% itself. + Finally, there is no T20/10X counterpart to the set-UID or set-GID + bits. + +Sockets and Address Families: not implemented (yet!) + +ERRORS: + + The global "errno" is set by all failing USYS calls. +The standard UN*X error numbers from are used where possible, +although a few T20-specific error codes have been defined. See +for details. + There is currently no easy way to find out what TOPS-20/TENEX +error (if any) caused errno to be set. The best one can do is find the +most recent error for the process with GETER%. Perhaps someday this will +be improved. + +Exceptions: + ftime, time, gettimeofday, times + getpid + _exit (never returns) + +PROCESS ID (PID): Long Description + +PID values are generated by: + (1) getpid() - self process ID. This must not change over + the lifetime of the process! + (2) fork() - to identify the child process. + (3) wait() - to identify the child process that stopped. + This should match the value returned by fork(). + +PID values are used by: + (1) kill() - to send signals to self, child, or parent. + (It is rare to send them anywhere else.) + (2) Code that checks the return value of wait(). + (3) Code that generates unique filenames, port numbers, or the like + which should not conflict with those of any other + active process. + + For ITS, TOPS-10, and WAITS the PID is simply the job #, a +small positive integer; zero and negative PIDs will never be seen, as +job 0 is the monitor itself and no system can support 2**35 jobs. + + However, TENEX (and hence TOPS-20) has never had a notion of a +unique process identifier, except internally inside the monitor; this +fork ID is simply not accessible to the user. Fork handles are all +relative, in an obscene attempt to prevent programs from referencing +any process they shouldn't. This makes it impossible to implement +getpid() in a straightforward way. The subterfuge I have resorted to +is as follows: + + T20 PID = ,, + - Left half of a PID generated by MUTIL% for process + This is guaranteed by system to be unique. + - low 9 bits of relative fork handle, in 0777000. + - low 9 bits of job number, in 0777. + + getpid() remembers the value generated on first call and +returns that thereafter. This satisfies the uniqueness and constancy +criterion, as well as being efficient. + fork() and wait() convert the relative fork handles from +CFORK% and GFRKS% to a child PID with a zero LH but with the other +fields set. Since relative fork handles are from 400000 to 400777, we +only need the low 9 bits. + fork() in the new child process copies the saved getpid() +value, if any; this is its parent's PID and may be used by kill(). The +saved value is then cleared so if the child calls getpid() it will generate +its own unique value. + kill() checks its PID argument first against the saved +getpid() value to see if a signal is being sent to itself. If not it then +sees whether it matches that of its parent (if any) and sends a signal +to .FHSUP if so. Otherwise, if the LH is 0 it assumes the signal is +being sent to a child, and generates the appropriate relative fork +handle from the 9 bits in the PID value. Note: There is no good way +to identify "miscellaneous" signals generated by another process (PSIs +on the "CHNmisc" channel); only those PSIs uniquely mapped to a single +signal can be successfully sent between processes. + + This scheme fails only if PIDs are somehow passed from one +process to another either via pipe, file, or vfork() shared memory, since +the result of a child's getpid() won't match what its parent's fork() +returned. But this should practically never happen. + +TENEX: + On TENEX, which doesn't have IPCF, we just use GFRKS% to +locate our fork within the job fork structure and hope the resulting +number, which we stick in the LH, doesn't change. At least TENEX +doesn't have extended addressing either so we can munch the GFRKS% +data on the stack. + +OPEN() - Some I/O details: + + The open() call has several additional flags which are intended to +help specify the proper actions on TOPS-20/TENEX systems, since the +defaults assumed by open() may not always be correct. + + Standard BSD flags: + O_RDONLY open for reading only + O_WRONLY open for writing only + O_RDWR open for reading and writing + O_NDELAY do not block on open (not supported) + O_APPEND append on each write + O_CREAT create file if it does not exist + O_TRUNC truncate size to 0 + O_EXCL error if create and file exists + + KCC-specific flags (not portable) + O_BINARY Open in binary (9-bit byte) mode + O_CONVERTED Force LF-conversion + O_UNCONVERTED Force NO LF-conversion + O_BSIZE_7 Force 7-bit bytesize + O_BSIZE_8 Force 8-bit bytesize + O_BSIZE_9 Force 9-bit bytesize + + TOPS-20 and TENEX specific flags + O_T20_WILD Allow wildcards on GTJFN% + O_T20_WROLD For writes, do NOT use GJ%FOU + O_T20_SYS_LOG logical device is system-wide! + O_T20_THAWED Open file for thawed access + + The BSD flags behave as per the UPM documentation, and the T20 +flags are fairly straightforward. The KCC flags however are more subtle; +they affect the characteristics of the I/O that will be done, rather than +how a file will be found or created. The two decisions that must be made +are: (1) Bytesize, and (2) LF-conversion. These are explained below. + +BYTESIZE: + The decision of which bytesize to use for I/O is somewhat +complicated. The bytesize on UN*X is always 8 bits, but on PDP-10s it +can be anything from 0 to 36 bits. The algorithm we use is as follows: + If a byte size (one of 7, 8, or 9) is explicitly requested, use that. + Otherwise, for a new file, use 9 if O_BINARY, else 7. + for an old file, use the file's bytesize. + A size of 0 or 36 is treated as for a new file. + Any other size is simply used. If this is not + one of 7, 8, or 9 then the results are unpredictable. + +LF-CONVERSION: + UN*X text files use the convention that a LF alone is a +"newline", whereas PDP-10 systems use CRLF together. Thus, the normal +mode of I/O uses LF-conversion, wherein read() converts a input CRLF +sequence to LF, and write() converts an output LF to CRLF. The algorithm +used to determine whether LF-conversion should be done is: + Conversion is normally only done if the bytesize is 7. + Any other size implies NO conversion. + However, this default can be overriden by certain flags: + If O_CONVERTED is set, conversion is ALWAYS done. + If O_UNCONVERTED or O_BINARY is set, conversion is NEVER done. + +LSEEK() - Problems with LF-conversion + + lseek() deals only with system-level file pointers. When no +LF-conversion is being done, this corresponds exactly to the UN*X notion +of a file position, namely the # of bytes offset from the start of the file, +and it is possible to create your own file positions arithmetically. + + However, when LF-conversion is being done then this is not possible; +the position returned by lseek will correspond to the system's position, +rather than to the number of bytes fed through read() or write(). In this +case you can only lseek to a position previously returned by lseek() itself. +(Note that 0 is a special case that always works). Typically the pointer +returned will be larger than the number of bytes read or written thus far, +since the system is aware of the CR's in the file even though the C program +isn't. + +STAT() - file status information + + This section describes the correspondence between the components +of the stat() structure (as defined for Un*x) and the TOPS-20 file system +information. + +struct stat +{ + dev_t st_dev; /* The .DVxxx device type */ + ino_t st_ino; /* .FBADR - Disk address of file index blk */ + unsigned int st_mode; /* Un*x-style mode bits */ + int st_nlink; /* 1 (always) */ + int st_uid; /* T20: User #, 10X: directory # */ + int st_gid; /* 0 (always, for now) */ + dev_t st_rdev; /* 0 (always, for now) */ + off_t st_size; /* .FBSIZ - size in bytes (any bytesize) */ + time_t st_atime; /* .FBREF - last ref (Un*x format time) */ + int st_spare1; + time_t st_mtime; /* .FBWRT - last write (Un*x format time) */ + int st_spare2; + time_t st_ctime; /* .FBCRE - last mod (Un*x format time) */ + int st_spare3; + long st_blksize; /* # bytes in a page (derived from FB%BSZ) */ + long st_blocks; /* # pages in file (FB%PGC of .FBBYV) */ + long st_spare4[2]; +}; + + +FORKEXEC() - New KCC-specific call + + This call is intended to combine the functions of fork() and +exec() so that a user program that wants to perform the very common +procedure of first calling fork() and then having the child call exec() +can now simply use forkexec() and accomplish the same thing much faster. + + The calling sequence is simply: + #include + int forkexec(fxp); + struct frkxec *fxp; + + See the include file for details on the contents of the frkxec +structure and the flags that can be provided. + + All of the exec*() functions call forkexec() with FX_NOFORK set. + +TTY Handling: + + The library supports many (though not all) of the Un*x TTY +functions. The primary means of getting information about the TTY and +setting TTY parameters is with the ioctl() call. All 4.3BSD TTY-related +ioctl functions are recognized, although not all are completely supported. +In particular, all requests to "get" data structures will always return +as much information as is available; attempting to "set" some elements of +these structures may or may not work, as described in the following comments. + +IOCTL function comments: + +FIONREAD - Get # bytes to read on FD. Supported. + +TIOCGETP - Get sgttyb parameters, same as V6/V7 gtty(). Supported. +TIOCSETP - Set sgttyb parameters, same as V6/V7 stty(). Supported. + sg_ispeed, sg_ospeed Can read and set. + sg_erase Cannot set to anything but DEL (fails if you try). + sg_kill Cannot set to anything but ^U (fails if you try). + sg_flags The following flags are used: + RAW, CRMOD, ECHO, CBREAK + All other flags are ignored, esp. LCASE and TANDEM. + +TIOCSETN - V7: same as TIOCSETP, but without flushing TTY input. Supported. +TIOCEXCL - V7: set exclusive use of tty (not implemented). +TIOCNXCL - V7: reset exclusive use of tty (not implemented). +TIOCHPCL - V7: Hang up on last close (not implemented). +TIOCFLUSH - V7: Flush TTY input and output buffers. Supported. + +All other functions are for BSD4.3. + +TIOCSTI - Simulate terminal input. Supported. +TIOCSBRK - Set break bit. (not implemented) +TIOCCBRK - Clear break bit. (not implemented) +TIOCSDTR - Set data terminal ready. (not implemented) +TIOCCDTR - Clear data terminal ready. (not implemented) +TIOCGPGRP - Get pgrp of tty. (not implemented) +TIOCSPGRP - Set pgrp of tty. (not implemented) + +TIOCGETC - Get special characters (tchars). Supported. +TIOCSETC - Set special characters (tchars). Supported (sort of). + t_intrc and t_quitc (for SIGINT and SIGQUIT) are initially -1 but + can be set to any control character. Note that because + chars are unsigned, the initial value when converted to + an integer is 0777, not -1! + No other elements of tchars can be set to anything but what they + already are: + t_stopc = ^S, t_startc = ^Q, t_eofc = ^Z, t_brkc = -1 + + +TIOCLBIS - Set bits in local mode word. (not implemented) +TIOCLBIC - Clear bits in local mode word. (not implemented) +TIOCLGET - Get local mode mask. (not implemented) +TIOCLSET - Set local mode mask. (not implemented) + +TIOCSLTC - Set local special chars (ltchars). Supported. +TIOCGLTC - Get local special chars (ltchars). Supported (sort of). + None of these chars can be set to anything but what they already are: + t_suspc = ^C, t_dsuspc = ^C, t_rprntc = ^R, t_flushc = ^O, + t_werasc = ^W, t_lnextc = ^V + +TIOCGETD - Get line discipline. Supported. +TIOCSETD - Set line discipline. Supported (sort of). + The line discipline is always NTTYDISC and cannot be set otherwise. + +TIOCGWINSZ - Get window size info. Supported. +TIOCSWINSZ - Set window size info. Supported. + ws_col and ws_row correspond to the terminal's width and height. + Both can be read and set. + ws_xpixel and wx_ypixel are initially 0 but can be set and then read. + +Signals: + + Signals are complicated, both on Un*x and T20/10X. The KCC +implementation by default attempts to support the 4.3BSD signal +mechanism, which uses a variety of system calls. For those planning +to use signals, the file SIGNAL.DOC should be consulted. diff --git a/src/c/c.defs b/src/c/c.defs new file mode 100755 index 000000000..9284f9dd7 --- /dev/null +++ b/src/c/c.defs @@ -0,0 +1,59 @@ +/* + + C Standard Definitions + +*/ + +# define ITS ITS + +/* data types */ + + +struct _filespec {int dev, fn1, fn2, dir;}; +# define filespec struct _filespec + +# define channel int + +struct _cal {int year, month, day, hour, minute, second;}; +# define cal struct _cal + +struct _tag {int *pc, *fp, *ap, *sp;}; +# define tag struct _tag + + +/* common values */ + +# define TRUE 1 +# define FALSE 0 + +# define OPENLOSS -1 /* returned by COPEN if lose */ +# define EOF_VALUE 0 /* returned by CGETC if EOF */ + +/* C interrupts */ + +# define INT_DEFAULT 0 +# define INT_IGNORE 1 + +# define realt_interrupt 0 +# define mpv_interrupt 1 +# define ioc_interrupt 2 +# define ilopr_interrupt 3 +# define mar_interrupt 4 +# define utrap_interrupt 5 +# define pure_interrupt 6 +# define wiro_interrupt 7 + +# define sys_down_interrupt 8 +# define clock_interrupt 9 +# define timer_interrupt 10 +# define pdlov_interrupt 11 +# define ttyi_interrupt 12 +# define cli_interrupt 13 +# define overflow 14 +# define float_overflow 15 + +# define channel0_interrupt 16 +# define inferior0_interrupt 32 + +# define ctrls_interrupt 41 +# define ctrlg_interrupt 42 diff --git a/src/c/clib.stinkr b/src/c/clib.stinkr new file mode 100755 index 000000000..e3217dbdb --- /dev/null +++ b/src/c/clib.stinkr @@ -0,0 +1,7 @@ +; xfile for loading basic C library +; this file must be loaded first +; segment 0 must start at 100 + +s 100,n,p,n +i sinit +l c;[crel] > diff --git a/src/c/ctype.h b/src/c/ctype.h new file mode 100755 index 000000000..0faa30650 --- /dev/null +++ b/src/c/ctype.h @@ -0,0 +1,15 @@ +#define _U 01 +#define _L 02 +#define _A 03 +#define _N 04 +#define _S 010 + +extern char _ctype[]; + +#define isalpha(c) (_ctype[c]&_A) +#define isupper(c) (_ctype[c]&_U) +#define islower(c) (_ctype[c]&_L) +#define isdigit(c) (_ctype[c]&_N) +#define isspace(c) (_ctype[c]&_S) +#define toupper(c) ((c)-'a'+'A') +#define tolower(c) ((c)-'A'+'a') diff --git a/src/c/nc.insert b/src/c/nc.insert new file mode 100755 index 000000000..ae5bc6305 --- /dev/null +++ b/src/c/nc.insert @@ -0,0 +1,61 @@ +; C;NC INSERT + +; THIS FILE IS NEEDED TO ASSEMBLE MIDAS PROGRAMS PRODUCED BY +; THE C COMPILER AS WELL AS HAND-CODED MIDAS PROGRAMS DESIGNED +; TO BE LOADED WITH C PROGRAMS + +RELOCATABLE +.INSRT SYSENG;MULSEG INSERT +.MSEG 200000',600000',700000' + +IF1,[ +.MLLIT==1 + +A=1 +B=2 +C=3 +D=4 +P=15. +.CCALL=1_27. +GO=JRST + +EQUALS ENTRY .GLOBAL +EQUALS EXTERN .GLOBAL + +DEFINE .IDATA +.SEG 0 +TERMIN + +DEFINE .UDATA +.SEG 1 +TERMIN + +DEFINE .CODE +.SEG 2 +TERMIN + +DEFINE .PDATA +.SEG 3 +TERMIN + +; STACK HACKING FOR VARIABLE REFERENCES + +%P==0 +DEFINE PPUSH [A] + PUSH P,A + %P==%P+1 + TERMIN +DEFINE PPOP [A] + POP P,A + %P==%P-1 + TERMIN +DEFINE CCALL N,F + .CCALL N,F + %P==%P-N + TERMIN + +];END IF1 + +IF2,[IFDEF FS1,[ + .KILL %A,%P,A,B,C,D,P,GO,.CCALL + ]] diff --git a/src/c/stdio.h b/src/c/stdio.h new file mode 100755 index 000000000..336cca962 --- /dev/null +++ b/src/c/stdio.h @@ -0,0 +1,21 @@ +/* STDIO.H for DEC20 implementation */ + +/* actual code is in C20STD.C */ + +# define BUFSIZ 512 /* this number is irrelevant */ +# define FILE int /* the actual structure is irrelevant */ +# define NULL 0 /* null file pointer for error return */ +# define EOF (-1) /* returned on end of file */ + +# define peekchar pkchar /* rename to avoid name conflict */ +# define fopen flopen /* " */ +# define getc fgetc /* " */ +# define getchar fgeth /* " */ +# define fprintf ffprintf /* " */ +# define calloc fcalloc /* " */ + +# define feof ceof /* direct translation */ +# define putc cputc /* " */ +# define fputc cputc /* " */ + +extern FILE *stdin, *stdout, *stderr; diff --git a/src/kcc_sy/fsdefs.h b/src/kcc_sy/fsdefs.h new file mode 100755 index 000000000..48c3cbd11 --- /dev/null +++ b/src/kcc_sy/fsdefs.h @@ -0,0 +1,104 @@ +/* -*-C-*- + * ITS filesystem definitions + * + * Defines the format of binary MFDs and UFDs for use by C programs. + */ + +/* +** MFD INFO +*/ + + /* RANDOM INFO IN MFD */ + +#define MDNUM 0 /* ASCENDING DIR NUM */ +#define MDNAMP 1 /* PNTR TO ORG OF USER NAME BLOCK AREA */ +#define MDYEAR 2 /* CURRENT YEAR */ +#define MPDOFF 3 /* DE-CORIOLIS CLOCK OFFSET */ +#define MPDWDK 4 /* PREFERRED WRITING DISK (PHYSICAL DRIVE #) */ +#define MDCHK 5 /* THIS WORD MUST BE M.F.D. (FOR CHECKING) */ +#define MDNUDS 6 /* NUMBER USER DIRECTORIES (FOR CHECKING ONLY) */ +#define LMIBLK 7 /* TOTAL STG USED BY HACKS LIKE THIS */ + + /* USER NAME BLKS FROM C(MDNAMP) TO END */ + +#define LMNBLK 2 /* # WDS/BLK */ +#define MNUNAM 0 /* 6BIT USER NAME */ + +/* +** UFD INFO +*/ + +#define UFDBYT 6 /* SIZE OF BYTES */ +#define UFDBPW (36 / UFDBYT) /* NUMBER OF BYTES PER WORD */ + + /* RANDOM INFO IN UFD */ + +#define UDESCP 0 /* FS PNTR TO DESC AREA */ +#define UDNAMP 1 /* PNTR TO ORG OF NAME AREA */ +#define UDNAME 2 /* USER NAME (FOR CHECKING) */ +#define UDBLKS 3 /* LEFT HALF HAS AMOUNT OF SPACE ALLOCATED (NOT */ + /* USED CURRENTLY BY SYSTEM), RIGHT HALF HAS */ + /* NUMBER OF BLOCKS USED. */ +#define UDALLO 4 /* IF NONZERO, LEFT HALF HAS DISK NUMBER, RIGHT */ + /* HALF HAS AMOUNT OF SPACE ALLOCATED */ +#define UDDESC 11 /* FIRST LOC AVAIL FOR DESC */ + + /* UFD DESCRIPTORS + ** 0 => FREE 1-UDTKMX => TAKE NEXT N + ** UDTKMX+1 THRU UDWPH-1 => SKIP N-UDTKMX AND TAKE ONE + ** UDWPH => WRITE-PLACE-HOLDER + ** 040 BIT SET => LOAD ADDRESS. LOWER 5 BITS PLUS NEXT NXLBYT (2) + ** CHARS (17 BITS IN ALL) + ** 040 BIT & 020 BIT => "FUNNY" BLOCK IF DMDSK. WHAT IS THIS, ANYWAY? + ** END BY 0 + ** + ** IF LINK DESCR + ** 6 CHAR OR UNTIL ; = SYS NAME. MUST HAVE NO CHAR = 0 IN THIS OR + ** NEXT 2 NAMES + ** NEXT CHAR QUOTED BY : (FOR NAMES WITH : OR ;) + ** NEXT CHAR N1 + ** NEXT CHAR N2 + ** END BY 0 + */ + +#define UDTKMX 12 /* HIGHEST "TAKE N" CODE */ +#define UDWPH 31 /* PLACE HOLDER ON WRITE (OR NULL FILE) */ +#define UDSKMX (UDWPH - UDTKMX - 1) /* # BLOCKS THAT CAN BE SKIPPED */ +#define NXLBYT 2 /* # ADDITIONAL BYTES FOR LOAD ADDR */ + + /* NAME AREA DATA */ + +#define LUNBLK 5 /* WDS/NAME BLK */ +#define UNFN1 0 /* FIRST FN */ +#define UNFN2 1 /* SECOND FN */ +#define UNRNDM 2 /* ALL KINDS OF RANDOM INFO */ +#define UNDSCP 0001500000000 /* PNTR TO DESC */ +#define UNPKN 0150500000000 /* PACK # */ +#define UNLINK 01000000 /* LINK BIT */ +#define UNREAP 02000000 /* IF 1, DONT REAP FILE */ +#define UNWRIT 04000000 /* OPEN FOR WRITING */ +#define UNMARK 010000000 /* GC MARK BIT */ +#define UNCDEL 020000000 /* DEL WHEN CLOSED */ +#define DELBTS 020000000 /* DELETED -- IGNORE */ +#define UNIGFL 024000000 /* BITS TO IGNORE FILE */ +#define UNWRDC 0301200000000 /* WORD COUNT OF LAST BLOCK MOD 2000 */ +#define UNDUMP 0400000000000 /* HAS BEEN DUMPED */ +#define UNDATE 3 /* DATE ETC. */ +#define UNTIM 0002200000000 /* COMPACTED TIME OF CREATION */ +#define UNYMD 0222000000000 /* Y,M,D OF CREATION */ +#define UNMON 0270400000000 /* MONTH */ +#define UNDAY 0220500000000 /* DAY */ +#define UNYRB 0330700000000 /* YEAR */ +#define UNREF 4 /* REFERENCE DATE SAME AS LEFT HALF OF UNDATE */ +#define UNREFD 0222000000000 /* REFERENCE DATE */ +#define UNAUTH 0111100000000 /* MFD INDEX OF AUTHOR, ALL 1=> NO */ + /* DIRECTORY */ +#define UNBYTE 0001100000000 /* FILE BYTE SIZE AND LENGTH INFO. */ + /* LET S=BITS PER BYTE, C=COUNT OF UNUSED BYTES + ** IN LAST WD + ** 400+100xS+C S=1 TO 3 C=0 TO 35. + ** 200+20xS+C S=4 TO 7 C=0 TO 8 + ** 44+4xS+C S=8 TO 18. C=0 TO 3 + ** 44-S S=19. TO 36. C=0 + ** NOTE THAT OLD FILES HAVE UNBYTE=0 => S=36. + */ diff --git a/src/kcc_sy/humble.h b/src/kcc_sy/humble.h new file mode 100755 index 000000000..aee29ccfa --- /dev/null +++ b/src/kcc_sy/humble.h @@ -0,0 +1,11 @@ +/* -*-C-*- + * HUMBLE header file + */ + +extern int j_create(), j_kill(); +extern int j_read(), j_write(); +extern int j_dump(), j_load(); +extern int j_vread(), j_vwrite(); +extern int j_atty(), j_dtty(); + +#define SIXBIT(name) (* ((int *) ((_KCCtype_char6 *) name)))