Emacs NYC talk
[arxana.git] / latex / arxana-reboot-backend.tex
blob2056e1634d0eb5647e8534d22fb68c155ddc02ae
1 \section{SQL tables} \label{sql-code}
3 \begin{notate}{Objects and codes} \label{objects-and-codes}
4 Every object in the system is identified by an ordered
5 pair: a \emph{code} and a \emph{reference}. The codes say
6 which table contains the indicated object, and references
7 provide that object's id. To a specific element of a list
8 or n-tuple, a third number, that element's \emph{offset},
9 is required. The codes are as follows:
11 \begin{center}
12 \begin{tabular}{|l|l|}
13 \hline
14 0 & list \\ \hline
15 1 & string \\ \hline
16 2 & pair \\ \hline
17 3 & triple \\ \hline
18 \end{tabular}
19 \end{center}
20 \end{notate}
22 \begin{idea}
23 CREATE TABLE strings (
24 id SERIAL PRIMARY KEY,
25 text TEXT NOT NULL UNIQUE
28 CREATE TABLE pairs (
29 id SERIAL PRIMARY KEY,
30 code1 INT NOT NULL,
31 ref1 INT NOT NULL,
32 code2 INT NOT NULL,
33 ref2 INT NOT NULL,
34 UNIQUE (code1, ref1,
35 code2, ref2)
38 CREATE TABLE triples (
39 id SERIAL PRIMARY KEY,
40 code1 INT NOT NULL,
41 ref1 INT NOT NULL,
42 code2 INT NOT NULL,
43 ref2 INT NOT NULL,
44 code3 INT NOT NULL,
45 ref3 INT NOT NULL,
46 UNIQUE (code1, ref1,
47 code2, ref2,
48 code3, ref3)
50 \end{idea}
52 \begin{notate}{A list of lists}\label{models-of-theories}
53 As a central place to manage our collections, we first
54 create a list of lists. The `heading' is the list's name,
55 and its `header' is metadata.
56 \end{notate}
58 \begin{idea}
59 CREATE TABLE lists (
60 id SERIAL PRIMARY KEY,
61 heading REFERENCES strings(id) UNIQUE,
62 header REFERENCES strings(id)
64 \end{idea}
66 \begin{notate}{Lists on demand}\label{models-of-theories}
67 Whenever we want to create a new list, we first add to the
68 `lists' table, and then create a new table ``listk''
69 (where k is equal to the new maximum id on `lists').
70 \end{notate}
72 \begin{idea}
73 CREATE TABLE listk (
74 offset SERIAL PRIMARY KEY,
75 code INT NOT NULL,
76 ref INT NOT NULL
78 \end{idea}
80 \begin{notate}{Side-note on containers via triples} \label{containers-using-triples}
81 To model a basic container, we can just use triples like
82 ``(A in B)''. This is useful, but the elements of B are
83 of course unordered. In Section \ref{importing}, we make
84 extensive use of triples like (B 1 $\alpha$), (B 2
85 $\beta$), etc., to indicate that B's first component is
86 $\alpha$, second component is $\beta$, and so on; so we
87 can make ordered list-like containers as well.
89 This is an example of the difference in expressive power
90 of tags (which only provide a sense of unordered
91 containment in ``virtual baskets'') and triples (which
92 here are seen to at least provide the additional sense of
93 ordered containment in ``virtual filing cabinets'',
94 although they have much more in store for us); cf. Note
95 \ref{prog-lit-review}.
97 As useful as models based on these two principles are in
98 principle, the user could easily be overloaded by looking
99 at lots of different containers encoded in raw triples,
100 all at once.
101 \end{notate}
103 \begin{notate}{Sense of containment}
104 Note that every element of a list is in the list in the
105 same ``sense'' -- for example, we can't instantly
106 distinguish elements that are ``halfway in'' from those
107 that are ``all the way in'', the same way we could with
108 pure triples.
109 \end{notate}
111 %% \begin{notate}{References into theories}
112 %% Since at the moment we have less than 10 basic codes, we
113 %% can uniquely reference contents of theory $k$ with ordered
114 %% pairs $10k+\mathit{basic\ code}$ and $\mathit{reference}$.
115 %% \end{notate}
117 \begin{notate}{Uniqueness of strings and triples} \label{unique-things}
118 An attempt to create a duplicate contents in a string or
119 triple generates a warning. This saves storage, given
120 possible repetitive use -- and avoids confusion. We can,
121 however, reference duplicate ``copies'' on the lists.
122 \end{notate}
124 \begin{notate}{Change} \label{change}
125 Notice also that since neither strings nor triples
126 ``change'', we have to account for change in other ways.
127 In particular, the contents of lists can change. (We may
128 subsequently add some metadata to certain lists are
129 ``locked'', or indicate that they can only be changed by
130 adding, etc., so that their contents can be cited stably
131 and reliably.)
132 \end{notate}
134 %% \begin{notate}{Each place contains one object} \label{places}
135 %% It is obvious from the table definition that I want each
136 %% place to contain precisely one thing; perhaps it is less
137 %% obvious why I want to use a database table to maintain
138 %% this relationship between ``places'' and ``things''. This
139 %% is largely a matter of convenience, but in particular it
140 %% makes it easy for places to change.
141 %% \end{notate}
143 \begin{notate}{Provenance and other metadata} \label{provenance}
144 We could of course add much more structure to the
145 database, starting with simple adjustments like adding
146 provenance metadata or versioning into the records for
147 each stored thing. For the time being, I assume that such
148 metadata will appear in the application or content layer,
149 as triples. (The exception are the ``headings'' and
150 ``headers'' associated with lists.)
151 \end{notate}
153 \section{Common Lisp-side}
155 \subsection{Preliminaries}
157 \subsubsection*{System definition}
159 \begin{common}{arxana.asd}
160 (defsystem "arxana"
161 :version "1"
162 :author "Joe Corneli <holtzermann17@gmail.com>"
163 :licence "Public Domain"
164 :components
165 ((:file "packages")
166 (:file "utilities" :depends-on ("packages"))
167 (:file "database" :depends-on ("utilities"))
168 (:file "queries" :depends-on ("packages"))))
169 \end{common}
171 \subsubsection*{Package definition}
173 \begin{common}{packages.lisp}
174 (defpackage :arxana
175 (:use #:cl #:clsql #:clsql-sys))
176 \end{common}
178 \subsubsection*{Utilities}
180 \begin{notate}{Useful things} \label{useful}
181 These definitions are either necessary or useful for
182 working the database and manipulating triple-centric
183 and/or theory-situated data. The implementation of
184 theories given here is inspired by Lisp's streams. This
185 is perhaps the most gnarly part of the code; the pay-off
186 of doing things the way we do them here is that
187 subsequently theories can sit ``transparently'' over other
188 structures.
189 \end{notate}
191 \begin{common}{utilities.lisp}
192 (in-package arxana)
193 (locally-enable-sql-reader-syntax)
195 ;; (defun connect-to-database ()
196 ;; (connect `("localhost" "joe" "joe" "")
197 ;; :database-type :postgresql-socket))
199 (defun connect-to-database ()
200 (connect `("localhost" "joe" "joe" "joe")
201 :database-type :mysql))
203 (defmacro select-one (&rest args)
204 `(car (select ,@args :flatp t)))
206 (defmacro select-flat (&rest args)
207 `(select ,@args :flatp t))
209 (defun resolve-ambiguity (stuff)
210 (first stuff))
212 (defun isolate-components (content i j)
213 (list (nth (1- i) content)
214 (nth (1- j) content)))
216 (defun isolate-beginning (triple)
217 (isolate-components (cdr triple) 1 2))
219 (defun isolate-middle (triple)
220 (isolate-components (cdr triple) 3 4))
222 (defun isolate-end (triple)
223 (isolate-components (cdr triple) 5 6))
225 (defvar *read-from-heading* nil)
227 (defvar *write-to-heading* nil)
228 \end{common}
230 \begin{notate}{On `datatype'}
231 Just translate coordinates into their primary dimension.
232 (How should this change to accomodate codes 4, 5, 6,
233 possibly etc.?)
234 \end{notate}
236 \begin{common}{utilities.lisp}
237 (defun datatype (data)
238 (cond ((eq (car data) 0)
239 "strings")
240 ((eq (car data) 1)
241 "places")
242 ((eq (car data) 2)
243 "triples")
244 ((eq (car data) 3)
245 "theories")))
247 (locally-disable-sql-reader-syntax)
248 \end{common}
250 \begin{notate}{Resolving ambiguity}
251 Often it will eventuate that there will be more than one
252 item returned when we are only truly prepared to deal with
253 one item. In order to handle this sort of ambiguity, it
254 would be great to have either a non-interactive notifier
255 that says that some ambiguity has been dealt with, or an
256 interactive tool that will let the user decide which of
257 the ambiguous options to choose from. For now, we provide
258 the simplest non-interactive tool: just choose the first
259 item from a possibly ambiguous list of items.
260 \end{notate}
262 \begin{notate}{Using a different database}
263 See Note \ref{backend-variant} for instructions on changes
264 you will want to make if you use a different database.
265 \end{notate}
267 \begin{notate}{Use of the ``count'' function}
268 The SQL count function is thought to be inefficient with
269 some backends; workarounds exist. (And it's considered to
270 be efficient with MySQL.)
271 \end{notate}
273 \begin{notate}{Abstraction} \label{abstraction}
274 While it might be in some ways ``nice'' to allow people to
275 chain together ever-more-abstract references to elements
276 from other theories, I actually think it is better to
277 demand that there just be \emph{one} layer of abstraction
278 (since we can then quickly translate back and forth,
279 rather than running through a chain of translations).
281 This does not imply that we cannot have a theory
282 superimposed over another theory (or over multiple
283 theories) that draws input from throughout a massively
284 distributed interlaced system -- rather, just that we
285 assume we will need to translate to ``base coordinates''
286 when building such structures. However, we'll certainly
287 want to explore the possibilities for running links
288 between theories (abstractly similar in some sense to
289 pointing at a component of a triple, but here there's no
290 uniform beg, mid, end scheme to refer to).
291 \end{notate}
293 \subsection{Main table definitions}
295 \begin{notate}{Defining tables from within Lisp}
296 This is Lisp code to define the permanent SQL tables
297 described in Section \ref{sql-code}.
298 \end{notate}
300 \begin{common}{tabledefs.lisp}
301 ;; (execute-command "CREATE TABLE strings (
302 ;; id SERIAL PRIMARY KEY,
303 ;; text TEXT NOT NULL UNIQUE
304 ;; );")
306 (execute-command "CREATE TABLE strings (
307 id SERIAL PRIMARY KEY,
308 text TEXT,
309 UNIQUE INDEX (text(255))
310 );")
312 (execute-command "CREATE TABLE places (
313 id SERIAL PRIMARY KEY,
314 code INT NOT NULL,
315 ref INT NOT NULL
316 );")
318 (execute-command "CREATE TABLE triples (
319 id SERIAL PRIMARY KEY,
320 code1 INT NOT NULL,
321 ref1 INT NOT NULL,
322 code2 INT NOT NULL,
323 ref2 INT NOT NULL,
324 code3 INT NOT NULL,
325 ref3 INT NOT NULL,
326 UNIQUE (code1, ref1,
327 code2, ref2,
328 code3, ref3)
329 );")
331 (execute-command "CREATE TABLE theories (
332 id SERIAL PRIMARY KEY,
333 name INT UNIQUE REFERENCES strings(id)
334 );")
335 \end{common}
337 \begin{notate}{Eliminating and tables}
338 In case you ever need to redefine these tables, you can
339 run code like this first, to delete the existing copies.
340 (Additional tables are added whenever a theory is created;
341 code for deleting theories or their contents will appear
342 in Section \ref{processing-theories}.)
343 \end{notate}
345 \begin{idea}
346 (dolist (view (list-views)) (drop-view view))
347 (execute-command "DROP TABLE strings")
348 (execute-command "DROP TABLE triples")
349 (execute-command "DROP TABLE places")
350 (execute-command "DROP TABLE theories")
351 \end{idea}
353 \subsection{Modifying the database}
355 \begin{common}{database.lisp}
356 (in-package arxana)
357 (locally-enable-sql-reader-syntax)
358 \end{common}
360 \subsection*{Processing strings}
362 \begin{notate}{On `string-to-id'}
363 Return the id of `text', if present, otherwise nil.
365 There was a segmentation fault with clisp here at one
366 point, maybe because I hadn't gotten the clsql sql reader
367 syntax loaded up properly. Note that calling the code
368 without the function wrapper did not produce the same
369 segfault.
370 \end{notate}
372 \begin{common}{database.lisp}
373 (defun string-to-id (text)
374 (select [id]
375 :from [strings]
376 :where [= [text] text]))
377 \end{common}
379 \begin{notate}{On `add-string'} \label{add-string}
380 Add the argument `text' to the list of strings. If the string
381 is successfully created, its coordinates are returned.
382 Otherwise, and in particular, if the request was to create
383 a duplicate, nil is returned.
385 Should this give a message ``Adding \meta{text} to the
386 strings table'' when the string is added by an indirecto
387 function call, such as through `massage'?
388 (Note \ref{massage}.)
389 \end{notate}
391 \begin{common}{database.lisp}
392 (defun add-string (text)
393 (handler-case
394 (progn (insert :into [strings]
395 :attributes '(text)
396 :values `(,text))
397 `(1 ,(string-to-id text)))
398 (sql-database-data-error ()
399 (warn "\"~a\" already exists."
400 text))))
401 \end{common}
403 \begin{notate}{Error handling bug}
404 The function `add-string' (Note \ref{add-string}) exhibits
405 the first of several error handling calls designed to
406 ensure uniqueness (Note \ref{unique-things}).
407 Experimentally, this works, but I'm observing that, at
408 least sometimes, if the user tries to add an item that's
409 already present in the database, the index tied to the
410 associated table increases even though the item isn't
411 added. This is annoying. I haven't checked whether this
412 happens on all possible installations of the underlying
413 software.
414 \end{notate}
416 \subsection*{Parsing general input}
418 \begin{notate}{On `massage'} \label{massage}
419 User input to functions like `add-triple' and so on and so
420 forth can be strings, integers (which the function
421 ``serializes'' as the string versions of themselves), or
422 as \emph{coordinates} -- lists of the form (code ref).
423 This function converts all of these input forms into the
424 last one! It takes an optional argument `addstr' which,
425 if supplied, says to add string data to the database if it
426 wasn't there already.
427 \end{notate}
429 \begin{common}{database.lisp}
430 (defun massage (data &optional addstr)
431 (cond
432 ((integerp data)
433 (massage (format nil "~a" data) addstr))
434 ((stringp data)
435 (let ((id (string-to-id data)))
436 (if id
437 (list 0 id)
438 (when addstr
439 (add-string data)))))
440 ((and (listp data)
441 (equal (length data) 2))
442 data)
443 (t nil)))
444 \end{common}
447 \subsection*{Processing triples}
449 \begin{notate}{On `triple-to-id'}
450 Return the id of the triple (beg mid end),
451 if present, otherwise nil.
452 \end{notate}
454 \begin{common}{database.lisp}
455 (defun triple-to-id (beg mid end)
456 (let ((b (massage beg))
457 (m (massage mid))
458 (e (massage end)))
459 (select [id]
460 :from [triples]
461 :where [and [= [code1] (first b)]
462 [= [ref1] (second b)]
463 [= [code2] (first m)]
464 [= [ref2] (second m)]
465 [= [code3] (first e)]
466 [= [ref3] (second e)]])))
467 \end{common}
469 \begin{notate}{On `add-triple'} \label{add-triple}
470 Elements of triples are parsed by `massage'
471 (Note \ref{massage}). If the triple
472 is successfully created, its coordinates are returned.
473 Otherwise, and in particular, if the request was to create
474 a duplicate, nil is returned.
475 \end{notate}
477 \begin{common}{database.lisp}
478 (defun add-triple (beg mid end)
479 "Add a triple comprised of BEG MID and END."
480 (let ((b (massage beg t))
481 (m (massage mid t))
482 (e (massage end t)))
483 (when (and b m e)
484 (handler-case
485 (progn
486 (insert-records
487 :into [triples] :attributes '(code1 ref1
488 code2 ref2
489 code3 ref3)
490 :values `(,(first b) ,(second b)
491 ,(first m) ,(second m)
492 ,(first e) ,(second e)))
493 `(2 ,(triple-to-id b m e)))
494 (sql-database-data-error ()
495 (warn "\"~a\" already entered as [~a ~a ~a]."
496 (list beg mid end) b m e))))))
497 \end{common}
499 \subsection*{Processing theories} \label{processing-theories}
501 \begin{notate}{Things to do with theories}
502 For the record, we want to be able to create a theory, add
503 elements to that theory, remove or change elements in the
504 theory, and, for convenience, zap everything in a theory.
505 Perhaps we will also want functions to remove the tables
506 associated with a theory as well, swap the position of two
507 theories, or change the name of a theory. We will also
508 want to be able to export and import theories, so they can
509 be ``beamed'' between installations. At appropriate
510 places in the Emacs interface, we'll need to set
511 `*write-to-heading*' and `*read-from-heading*'.
512 \end{notate}
514 \begin{notate}{What can go in a theory} \label{what-can-go-in}
515 Notice that there is no rule that says that a triple or
516 place that's part of a theory needs to point only at
517 strings that are in the same theory.
518 \end{notate}
520 \begin{notate}{On `list-to-id'}
521 Return the id of the theory with given `heading', if present,
522 otherwise, nil.
523 \end{notate}
525 \begin{common}{database.lisp}
526 (defun list-to-id (heading)
527 (let ((string-id (string-to-id heading)))
528 (select [id]
529 :from [lists]
530 :where [= [heading] string-id])))
531 \end{common}
533 \begin{notate}{On `add-theory'} \label{add-theory}
534 Add a theory to the theories table, and all the new
535 dimensions of the frame that comprise this theory.
536 (Theories have names that are strings -- it seems a
537 little funny to always have to translate submitted
538 strings to ids for lookup, but this is what we do.)
539 \end{notate}
541 \begin{common}{database.lisp}
542 (defun add-list (heading)
543 (let ((string-id (second (massage heading t))))
544 (handler-case
545 (progn (insert :into [lists]
546 :attributes '(heading)
547 :values `(,string-id))
548 (let ((k (theory-to-id heading)))
549 (execute-command
550 (format nil "CREATE TABLE lists~A (
551 offset SERIAL PRIMARY KEY,
552 code INT NOT NULL,
553 ref INT NOT NULL
554 );" k))
555 `(0 ,k)))
556 (sql-database-data-error
558 (warn "The list \"~a\" already exists."
559 heading)))))
560 \end{common}
562 \begin{notate}{On `get-lists'}
563 Find all lists that contain `symbol'.
564 \end{notate}
566 \begin{common}{database.lisp}
567 (defun get-lists (symbol)
568 (let* ((data (massage symbol))
569 (type (datatype data))
570 (id (second data))
571 (n (caar
572 (query "select count(*) from lists")))
573 results)
574 (loop for k from 1 upto n
575 do (let ((present
576 (query (concatenate
577 'string
578 "select offset from list"
579 (format nil "~A" k)
580 " where ((code = "
581 (format nil "~A" type)
582 ") and (ref = "
583 (format nil "~A" id)
584 "))"))))
585 (when present
586 ;; bit of a problem if there are multiple
587 ;; entries of that item on the given
588 ;; list.
589 (setq results (cons (list 0 k present)
590 results)))))
591 results))
592 \end{common}
594 \begin{notate}{On `save-to-list'}
595 Record `symbol' on list named `name'.
596 \end{notate}
598 \begin{common}{database.lisp}
599 (defun save-to-list (symbol name)
600 (let* ((data (massage symbol t))
601 (type (datatype data))
602 (string-id (string-to-id name))
603 (k (select-one [id]
604 :from [lists]
605 :where [= [name] string-id]))
606 (tablek (concatenate 'string
607 type (format nil "~A" k))))
608 (insert-records :into (sql-expression :table tablek)
609 :attributes '(id)
610 :values `(,(second data)))))
611 \end{common}
613 \subsection*{Lookup by id or coordinates}
615 \begin{notate}{The data format that's best for Lisp} \label{what-is-best-for-lisp}
616 It is a reasonable question to ask whether or not the an
617 item's id should be considered part of that item's
618 defining data when that data is no longer in the database.
619 For the functions defined here, the id is an input, and so
620 by default I'm not including it in the output here,
621 because it is already known. However, for functions like
622 `triples-given-beginning' (See Note
623 \ref{graph-like-data}), the id is \emph{not} part of the
624 known data, and so it is returned. Therefore I am
625 providing the `retain-id' flag here, for cases where
626 output should be consistent with that of these other
627 functions.
628 \end{notate}
630 \begin{common}{database.lisp}
631 (defun string-lookup (id &optional retain-id)
632 (let ((ret (select [text]
633 :from [strings]
634 :where [= [id] id])))
635 (if retain-id
636 (list id ret)
637 ret)))
639 (defun triple-lookup (id &optional retain-id)
640 (let ((ret (select [code1] [ref1]
641 [code2] [ref2]
642 [code3] [ref3]
643 :from [triples]
644 :where [= [id] id])))
645 (if retain-id
646 (cons id ret)
647 ret)))
649 (defun list-lookup (id &optional retain-id)
650 (let ((ret (select [name]
651 :from [lists]
652 :where [= [id] id])))
653 (if retain-id
654 (list id ret)
655 ret)))
656 \end{common}
658 \begin{notate}{Succinct idioms for following pointers}
659 Here are some variants on the functions above which save
660 us from needing to extract the id of the item from its
661 coordinates.
662 \end{notate}
664 \begin{common}{database.lisp}
665 (defun string-contents (coords)
666 (string-lookup (second coords)))
668 (defun place-contents (coords)
669 (place-lookup (second coords)))
671 (defun triple-contents (coords)
672 (triple-lookup (second coords)))
673 \end{common}
675 \begin{notate}{Switchboard} \label{switchboard}
676 Even more succinctly, one function that can get
677 the object indicated by any set of coordinates.
678 \end{notate}
680 \begin{common}{database.lisp}
681 (defun switchboard (coords)
682 (cond ((eq (first coords) 0)
683 (string-contents coords))
684 ((eq (first coords) 1)
685 (place-contents coords))
686 ((eq (first coords) 2)
687 (triple-contents coords))))
688 \end{common}
690 \begin{notate}{Anti-pasti}
691 The readability of this code could perhaps be improved if
692 we used functions like `switchboard' more frequently.
693 (More to the point, it seems it's not currently used.) In
694 particular, it would be nice if we could sweep idioms like
695 \verb+`(2 ,(car triple))+ under the rug.
696 \end{notate}
698 \begin{common}{database.lisp}
699 (locally-disable-sql-reader-syntax)
700 \end{common}
702 \subsection{Queries} \label{queries}
704 \begin{notate}{The use of views} \label{use-of-views}
705 It is easy enough to select those triples which match
706 simple data, e.g., those triples which have the same
707 beginning, middle, or end, or any combination of these.
708 It is a little more complicated to find items that match
709 criteria specified by several different triples; for
710 example, to \emph{find all the books by Arthur C. Clarke
711 that are also works of fiction}.
713 Suppose our collection of triples contains a portion as
714 follows:
715 \begin{center}
716 \begin{tabular}{lll}
717 Profiles of the Future & is a & book \\ 2001: A Space
718 Odyssey & is a & book \\ Ender's Game & is a & book
719 \\ Profiles of the Future & has genre & non-fiction
720 \\ 2001: A Space Odyssey & has genre & fiction \\ Ender's
721 Game & has genre & fiction \\ Profiles of the Future & has
722 author & Arthur C. Clarke \\ 2001: A Space Odyssey & has
723 author & Arthur C. Clarke \\ Ender's Game & has author &
724 Orson Scott Card
725 \end{tabular}
726 \end{center}
728 One way to solve the given problem would be to find those
729 items that \emph{are written by Arthur C. Clarke} (* ``has
730 author'' and ``Arthur C. Clarke''), that \emph{are books}
731 (* ``is a'' ``book''), and \emph{that are classified as
732 fiction} (* ``has genre'' ``fiction''). We are looking
733 for items that match \emph{all} of these conditions.
735 Our implementation strategy is: collect the items matching
736 each criterion into a view, then join these views. (See
737 the function `satisfy-conditions'
738 \ref{satisfy-conditions}.)
740 If we end up working with large queries and a lot of data,
741 this use of views may not be an efficient way to go -- but
742 we'll cross that bridge when we come to it.
743 \end{notate}
745 \begin{notate}{Search queries}
746 In Note \ref{sphinx-setup} et seq., we give some
747 instructions on how to set up the Sphinx search engine to
748 work with Arxana. However, a much tighter integration of
749 Sphinx into Arxana is possible, and will be coming soon.
750 \end{notate}
752 \begin{common}{queries.lisp}
753 (in-package arxana)
754 (locally-enable-sql-reader-syntax)
755 \end{common}
757 \subsection*{Printing}
759 \begin{notate}{On `print-system-object'} \label{print-system-object}
760 The function `print-system-object' bears some resemblance
761 to `massage', but is for printing instead,
762 and therefor has to be recursive (because triples and
763 places can point to other system objects, printing can be
764 a long and drawn out ordeal).
765 \end{notate}
767 \begin{common}{queries.lisp}
768 (defun print-system-object (data &optional components)
769 (cond
770 ;; just return strings
771 ((stringp data)
772 data)
773 ;; printing from coordinates (code, ref)
774 ((and (listp data)
775 (equal (length data) 2))
776 ;; we'll need some hack to deal with
777 ;; elements-of-theories, which, right now, are two
778 ;; elements long but are not (code, ref) pairs but
779 ;; rather (local_id, ref) pairs, or maybe actually if
780 ;; we take context into consideration, they're
781 ;; actually (k, table, local_id, ref) quadruplets.
782 ;; Obviously with *that* data we can translate to
783 ;; (code, ref). On the other hand, if we *don't*
784 ;; take it into consideration, we probably can't do
785 ;; much of anything. So we should be careful to be
786 ;; aware of just what sort of information we're
787 ;; passing around.
788 (cond ((equal (first data) 0)
789 (string-lookup (second data)))
790 ((equal (first data) 1)
791 (print-system-object
792 (place-lookup (second data) t)))
793 ((equal (first data) 2)
794 (let ((triple (triple-lookup (second data) t)))
795 (if components
796 (list
797 (print-beginning triple)
798 (print-middle triple)
799 (print-end triple))
800 (concatenate
801 'string
802 (format nil "T~a[" (second data))
803 (print-beginning triple) "."
804 (print-middle triple) "."
805 (print-end triple) "]"))))
806 ((equal (first data) 3)
807 (concatenate 'string "List printing not implemented yet."))))
808 ;; place
809 ((and (listp data)
810 (equal (length data) 3))
811 (concatenate 'string
812 (format nil "P~a|" (first data))
813 (print-system-object (cdr data)) "|"))
814 ;; triple
815 ((and (listp data)
816 (equal (length data) 7))
817 (if components
818 (list
819 (print-beginning data)
820 (print-middle data)
821 (print-end data))
822 (concatenate
823 'string
824 (format nil "T~a[" (first data))
825 (print-beginning data) "."
826 (print-middle data) "."
827 (print-end data) "]")))
828 (t nil)))
830 (defun print-beginning (triple)
831 (print-system-object (isolate-beginning triple)))
833 (defun print-middle (triple)
834 (print-system-object (isolate-middle triple)))
836 (defun print-end (triple)
837 (print-system-object (isolate-end triple)))
838 \end{common}
840 \begin{notate}{Depth}
841 If we are going to have complicated recursive references,
842 our printer, and anything else that gives the system some
843 semantics, should come with some sort of ``layers'' switch
844 that can be used to limit the amount of recursion we do in
845 any given computation.
846 \end{notate}
848 \begin{notate}{Printing objects as they appear in Lisp} \label{printing-objects-in-lisp}
849 With the following functions we provide facilities for
850 printing an object, either from its id or from the
851 expanded form of the data that represents it in Lisp.
852 (This is one good reason to have one standard form for
853 this data; compare Note \ref{what-is-best-for-lisp}.
854 These functions assume that the id \emph{is} part of
855 what's printed, so if using functions like `triple-lookup'
856 to retrieve data for printing, you'll have to graft the id
857 back on before printing with these functions.)
858 \end{notate}
860 \begin{notate}{Printing theories}
861 We'll want to both print all of the content of a theory,
862 and print \emph{from} the theory in a more limited way.
863 (Perhaps we get the second item for free, already?)
864 \end{notate}
866 \begin{common}{queries.lisp}
867 (defun print-string (string &optional components)
868 (print-system-object string components))
870 (defun print-place (place &optional components)
871 (print-system-object place components))
873 (defun print-triple (triple &optional components)
874 (print-system-object triple components))
876 (defun print-string-from-id (id &optional components)
877 (print-system-object (list 0 id) components))
879 (defun print-place-from-id (id &optional components)
880 (print-system-object (list 1 id) components))
882 (defun print-triple-from-id (id &optional components)
883 (print-system-object (list 2 id) components))
884 \end{common}
886 \begin{notate}{Printing some stuff but not other stuff} \label{printing-some}
887 These functions are good for printing lists as come out of
888 the database. See Note \ref{strings-and-ids} on printing
889 strings.
890 \end{notate}
892 \begin{common}{queries.lisp}
893 (defun print-strings (strings)
894 (mapcar 'second strings))
896 (defun print-places (places &optional components)
897 (mapcar (lambda (item)
898 (print-system-object item components))
899 places))
901 (defun print-triples (triples &optional components)
902 (mapcar (lambda (item)
903 (print-system-object item components))
904 triples))
906 (defun print-theories (theories &optional components)
907 (mapcar (lambda (item)
908 (print-system-object item components))
909 theories))
910 \end{common}
912 \begin{notate}{Printing everything in each table} \label{printing-everything}
913 These functions collect human-readable versions of
914 everything in each table. Notice that `all-strings' is
915 written differently.
916 \end{notate}
918 \begin{common}{queries.lisp}
919 (defun all-strings ()
920 (mapcar 'second (select [*] :from [strings])))
922 (defun all-places ()
923 (mapcar 'print-system-object
924 (select [*] :from [places])))
926 (defun all-triples ()
927 (mapcar 'print-system-object
928 (select [*] :from [triples])))
930 (defun all-theories ()
931 (mapcar 'print-system-object
932 (select [*] :from [theories])))
933 \end{common}
935 \begin{notate}{Printing on particular dimensions}
936 One possible upgrade to the printing functions would be to
937 provide the built-in to ``curry'' the printout -- for
938 example, just print the source nodes from a list of
939 triples. However, it should of course also be possible to
940 do processing like this Lisp after the printout has been
941 made (the point is, it is presumably it is more efficient
942 only to retrieve and format the data we're actually
943 looking for).
944 \end{notate}
946 \begin{notate}{Strings and ids} \label{strings-and-ids}
947 Unlike other objects, strings don't get printed with their
948 ids. We should probably provide an \emph{option} to print
949 with ids (this could be helpful for subsequent work with
950 the strings in question; on the other hand, since strings
951 are being kept unique, we can immediately exchange a
952 string and it's id, so I'm not sure if it's necessary to
953 have an explicit ``option'').
954 \end{notate}
956 \subsection*{Functions that establish basic graph structure}
958 \begin{notate}{Thinking about graph-like data} \label{graph-like-data}
959 Here we have in mind one or more objects (e.g. a
960 particular source and sink) that is associated with
961 potentially any number of triples (e.g. all the possible
962 middles running between these two identified objects).
963 These functions establish various forms of locality or
964 neighborhood within the data.
966 The results of such queries can be optionally cached in a
967 view, which is useful for further processing
968 (cf. \ref{satisfy-conditions}).
970 These functions take input in the form of strings and/or
971 coordinates (cf. Note \ref{massage}).
972 \end{notate}
974 \begin{common}{queries.lisp}
975 (defun triples-given-beginning (node &optional view)
976 "Get triples outbound from the given NODE. Optional
977 argument VIEW causes the results to be selected into a
978 view with that name."
979 (let ((data (massage node))
980 (window (or view "interal-view"))
981 ret)
982 (when data
983 (create-view
984 window
985 :as (select [*]
986 :from [triples]
987 :where [and [= [code1] (first data)]
988 [= [ref1] (second data)]]))
989 (setq ret (select [*] :from window))
990 (unless view
991 (drop-view window))
992 ret)))
994 (defun triples-given-end (node &optional view)
995 "Get triples inbound into NODE. Optional argument VIEW
996 causes the results to be selected into a view with
997 that name."
998 (let ((data (massage node))
999 (window (or view "interal-view"))
1000 ret)
1001 (when data
1002 (create-view
1003 window
1004 :as (select [*]
1005 :from [triples]
1006 :where [and [= [code3] (first data)]
1007 [= [ref3] (second data)]]))
1008 (setq ret (select [*] :from window))
1009 (unless view
1010 (drop-view window))
1011 ret)))
1013 (defun triples-given-middle (edge &optional view)
1014 "Get the triples that run along EDGE. Optional argument
1015 VIEW causes the results to be selected into a view
1016 with that name."
1017 (let ((data (massage edge))
1018 (window (or view "interal-view"))
1019 ret)
1020 (when data
1021 (create-view
1022 window
1023 :as (select [*]
1024 :from [triples]
1025 :where [and [= [code2] (first data)]
1026 [= [ref2] (second data)]]))
1027 (setq ret (select [*] :from window))
1028 (unless view
1029 (drop-view window))
1030 ret)))
1032 (defun triples-given-middle-and-end (edge node &optional
1033 view)
1034 "Get the triples that run along EDGE into NODE.
1035 Optional argument VIEW causes the results to be
1036 selected into a view with that name."
1037 (let ((edgedata (massage edge))
1038 (nodedata (massage node))
1039 (window (or view "interal-view"))
1040 ret)
1041 (when (and edgedata nodedata)
1042 (create-view
1043 window
1044 :as (select [*]
1045 :from [triples]
1046 :where [and [= [code2] (first edgedata)]
1047 [= [ref2] (second edgedata)]
1048 [= [code3] (first nodedata)]
1049 [= [ref3] (second nodedata)]]))
1050 (setq ret (select [*] :from window))
1051 (unless view
1052 (drop-view window))
1053 ret)))
1055 (defun triples-given-beginning-and-middle (node edge
1056 &optional view)
1057 "Get the triples that run from NODE along EDGE.
1058 Optional argument VIEW causes the results to be selected
1059 into a view with that name."
1060 (let ((nodedata (massage node))
1061 (edgedata (massage edge))
1062 (window (or view "interal-view"))
1063 ret)
1064 (when (and nodedata edgedata)
1065 (create-view
1066 window
1067 :as (select [*]
1068 :from [triples]
1069 :where [and [= [code1] (first nodedata)]
1070 [= [ref1] (second nodedata)]
1071 [= [code2] (first edgedata)]
1072 [= [ref2] (second edgedata)]]))
1073 (setq ret (select [*] :from window))
1074 (unless view
1075 (drop-view window))
1076 ret)))
1078 (defun triples-given-beginning-and-end (node1 node2
1079 &optional view)
1080 "Get the triples that run from NODE1 to NODE2. Optional
1081 argument VIEW causes the results to be selected
1082 into a view with that name."
1083 (let ((node1data (massage node1))
1084 (node2data (massage node2))
1085 (window (or view "interal-view"))
1086 ret)
1087 (when (and node1data node2data)
1088 (create-view
1089 window
1090 :as (select [*]
1091 :from [triples]
1092 :where [and [= [code1] (first node1data)]
1093 [= [ref1] (second node1data)]
1094 [= [code3] (first node2data)]
1095 [= [ref3] (second node2data)]]))
1096 (setq ret (select [*] :from window))
1097 (unless view
1098 (drop-view window))
1099 ret)))
1101 ;; This one use `select-one' instead of `select'
1102 (defun triple-exact-match (node1 edge node2 &optional
1103 view)
1104 "Get the triples that run from NODE1 along EDGE to
1105 NODE2. Optional argument VIEW causes the results to be
1106 selected into a view with that name."
1107 (let ((node1data (massage node1))
1108 (edgedata (massage edge))
1109 (node2data (massage node2))
1110 (window (or view "interal-view"))
1111 ret)
1112 (when (and node1data edgedata node2data)
1113 (create-view
1114 window
1115 :as (select [*]
1116 :from [triples]
1117 :where [and [= [code1] (first node1data)]
1118 [= [ref1] (second node1data)]
1119 [= [code2] (first edgedata)]
1120 [= [ref2] (second edgedata)]
1121 [= [code3] (first node2data)]
1122 [= [ref3] (second node2data)]]))
1123 (setq ret (select-one [*] :from window))
1124 (unless view
1125 (drop-view window))
1126 ret)))
1127 \end{common}
1129 \begin{notate}{Becoming flexible about a string's status}
1130 One possible upgrade would be to provide versions of these
1131 functions that will flexibly accept either a string or a
1132 ``placed string'' as input (since frequently we're
1133 interested in content of that sort; see
1134 \ref{importing-sketch}).
1135 \end{notate}
1137 \subsection*{Finding places that satisfy some property}
1139 \begin{notate}{On `get-places-subject-to-constraint'}
1140 Like `get-places' (Note \ref{get-places}), but this
1141 time takes an extra condition of the form (A C B)
1142 where one of A, B, and C is `nil'. We test each
1143 of the places in place of this `nil', to see if a
1144 triple matching that criterion exists.
1145 \end{notate}
1147 \begin{common}{queries.lisp}
1148 (defun get-places-subject-to-constraint (symbol condition)
1149 (let ((candidate-places (get-places symbol))
1150 accepted-places)
1151 (dolist (place candidate-places)
1152 (let ((filled-condition
1153 (map 'list (lambda (elt) (or elt
1154 `(1 ,place)))
1155 condition)))
1156 (when (apply 'triple-relaxed-match
1157 filled-condition)
1158 (setq accepted-places
1159 (cons place accepted-places)))))
1160 accepted-places))
1161 \end{common}
1163 \subsection*{Logic}
1165 \begin{notate}{Caution: compatibility with theories?}
1166 For the moment, I'm not sure how compatible this function
1167 is with the theories apparatus we've established, or with
1168 the somewhat vaguer notion of trans-theory questions or
1169 concerns. Global queries should work just fine, but
1170 theory-local questions may need some work. Before getting
1171 into compatibility of these questions with the theory
1172 apparatus, I want to make sure that apparatus is working
1173 properly. Note that the questions here do rely on
1174 functions for graph-like thinking (Note
1175 \ref{graph-like-data} et seq.), and it would certainly
1176 make sense to port to ``subgraphs'' as represented by
1177 theories.
1178 \end{notate}
1180 \begin{notate}{On `satisfy-conditions'} \label{satisfy-conditions}
1181 This function finds the items which match constraints.
1182 Constraints take the form (A B C), where precisely one of
1183 A, B, or C should be `nil', and any of the others can be
1184 either input suitable for `massage', or
1185 `t'. The `nil' entry stands for the object we're
1186 interested in. Any `t' entries are wildcards.
1188 The first thing that happens as the function runs is that
1189 views are established exhibiting each group of triples
1190 satisfying each predicate. The names of these views are
1191 then massaged into a large SQL query. (It is important to
1192 ``typeset'' all of this correctly for our SQL `query'.)
1193 Finally, once that query has been run, we clean up,
1194 dropping all of the views we created.
1195 \end{notate}
1197 \begin{common}{queries.lisp}
1198 (defun satisfy-conditions (constraints)
1199 (let* ((views (generate-views constraints))
1200 (formatted-list-of-views (format-views
1201 views))
1202 (where-condition (generate-where-condition
1203 views
1204 constraints))
1205 (ret
1206 ;; Let's see what the query is, first of all.
1207 (query
1208 (concatenate
1209 'string
1210 "select v1.id, v1.code1, v1.ref1, "
1211 "v1.code2, v1.ref2, "
1212 "v1.code3, v1.ref3 "
1213 "from "
1214 formatted-list-of-views
1215 "where "
1216 where-condition
1217 ";"))))
1218 (mapc (lambda (name) (drop-view name)) views)
1219 ret))
1220 \end{common}
1222 \begin{notate}{Subroutines for `satisfy-conditions'}
1223 The functions below produce bits and pieces of the SQL
1224 query that `satisfy-conditions' submits. The point of the
1225 `generate-views' is to create a series of views centered
1226 on the term(s) we're interested in (the `nil' slots in
1227 each submitted constraint). With
1228 `generate-where-condition', we insist that all of these
1229 interesting terms should, in fact, be equal to one
1230 another.
1231 \end{notate}
1233 \begin{notate}{On `generate-views'}
1234 In a `cond' form, for each constraint we must select the
1235 appropriate function to generate the view; at the very end
1236 of the cond form, we spit out the viewname (for `mapcar'
1237 to add to the list of views).
1238 \end{notate}
1240 \begin{common}{queries.lisp}
1241 (defun generate-views (constraints)
1242 (let ((counter 0))
1243 (mapcar
1244 (lambda (constraint)
1245 (setq counter (1+ counter))
1246 (let ((viewname (format nil "v~a" counter)))
1247 (cond
1248 ;; A * ? or A ? *
1249 ((or (and (eq (second constraint) t)
1250 (eq (third constraint) nil))
1251 (and (eq (second constraint) nil)
1252 (eq (third constraint) t)))
1253 (triples-given-beginning
1254 (first constraint)
1255 viewname))
1256 ;; * B ? or ? B *
1257 ((or (and (eq (first constraint) t)
1258 (eq (third constraint) nil))
1259 (and (eq (first constraint) nil)
1260 (eq (third constraint) t)))
1261 (triples-given-middle
1262 (second constraint)
1263 viewname))
1264 ;; * ? C or ? * C
1265 ((or (and (eq (first constraint) t)
1266 (eq (second constraint) nil))
1267 (and (eq (first constraint) nil)
1268 (eq (second constraint) t)))
1269 (triples-given-end
1270 (third constraint)
1271 viewname))
1272 ;; ? B C
1273 ((eq (first constraint) nil)
1274 (triples-given-middle-and-end
1275 (second constraint)
1276 (third constraint)
1277 viewname))
1278 ;; A ? C
1279 ((eq (second constraint) nil)
1280 (triples-given-beginning-and-middle
1281 (first constraint)
1282 (second constraint)
1283 viewname))
1284 ;; A C ?
1285 ((eq (third constraint) nil)
1286 (triples-given-beginning-and-end
1287 (first constraint)
1288 (third constraint)
1289 viewname)))
1290 viewname))
1291 constraints)))
1293 (defun format-views (views)
1294 (let ((formatted-list-of-views ""))
1295 (mapc (lambda (view)
1296 (setq formatted-list-of-views
1297 (concatenate
1298 'string
1299 formatted-list-of-views
1300 (format nil "~a," view))))
1301 (butlast views))
1302 (setq formatted-list-of-views
1303 (concatenate
1304 'string
1305 formatted-list-of-views
1306 (format nil "~a " (car (last views)))))
1307 formatted-list-of-views))
1309 (defun generate-where-condition (views conditions)
1310 (let ((where-condition "")
1311 (c (select-component (first conditions))))
1312 ;; there should be one less "=" condition than there
1313 ;; are things to compare; until we get to the last
1314 ;; view, everything is joined together by an `and'.
1315 ;; -- this needs to consider (map over) both `views'
1316 ;; and `conditions'.
1317 (loop
1318 for i from 1 upto (1- (length views))
1320 (let ((compi (select-component (nth i conditions)))
1321 (viewi (nth i views)))
1322 (setq
1323 where-condition
1324 (concatenate
1325 'string
1326 where-condition
1327 (concatenate
1328 'string
1329 "(v1.code" c " = " viewi ".code" compi ") and "
1330 "(v1.ref" c " = " viewi ".ref" compi ") and ")))))
1331 (let ((viewn (nth (1- (length views)) views))
1332 (compn (select-component
1333 (nth (length views) conditions))))
1334 (setq
1335 where-condition
1336 (concatenate
1337 'string
1338 where-condition
1339 "(v1.code" c " = " viewn ".code" compn ") and "
1340 "(v1.ref" c " = " viewn ".ref" compn ")")))
1341 where-condition))
1343 (defun select-component (condition)
1344 (cond ((eq (first condition) nil) "1")
1345 ((eq (second condition) nil) "2")
1346 ((eq (third condition) nil) "3")))
1347 \end{common}
1349 \begin{common}{queries.lisp}
1350 (locally-disable-sql-reader-syntax)
1351 \end{common}
1353 \begin{notate}{Even more complicated logic}
1354 In order to conveniently manage complex queries, it would
1355 be nice if we could store the results of earlier queries
1356 into views, so that we can combine several such views for
1357 further processing.
1358 \end{notate}