<!DOCTYPE style-sheet PUBLIC "-//James Clark//DTD DSSSL Style Sheet//EN">

<style-sheet>
<style-specification>
<style-specification-body>

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; This file is part of refdb
;; Markus Hoenicka 010918
;; markus@mhoenicka.de
;; $Id: db2ris.dsl,v 1.2 2002/12/13 20:33:50 mhoenicka Exp $
;;
;;
;;
;;; The purpose of this stylesheet is to convert a DocBook Bibliography
;;; element into a list of RIS datasets
;;
;;; Limitations: currently only BiblioEntry elements are supported, i.e.
;;;              raw bibliography entries. Cooked would be a PITA.

;;; Comments: this stylesheet contains an implementation of assoc. assoc is
;;;           not defined as a primitive in Jade, but it is defined
;;;           in OpenJade. If you use the latter, comment out the
;;;           definition to gain a little speed
;;
;;;           the reference type is determined by the function
;;;           reftype-heuristic. The default implementation checks
;;;           the role attribute of BiblioEntry. This should contain
;;;           the RIS reference type, e.g. "BOOK" or "JOUR". If the
;;;           role attribute is missing, the value of DEFAULTREFTYPE is
;;;           used instead. You can set the value of DEFAULTREFTYPE on
;;;           the Openjade command line with the -V switch (Jade
;;;           unfortunately does not support setting arbitrary
;;;           variable values, it just supports setting variables
;;;           to #t. So if you use Jade, you have to change the value
;;;           in this script). The implementation of this function can of
;;;           course be changed or extended to your needs
;;
;;; Processing: <!-- jade -t sgml -d db2ris.dsl [sgml-declaration] sgml-file | sed 'N;s%^>*<db2ris\n>\(.*\)</db2ris$%\1%g' | sed 's%^>$%%'g -->

(declare-flow-object-class element
			   "UNREGISTERED::James Clark//Flow Object Class::element")
(declare-flow-object-class formatting-instruction
			   "UNREGISTERED::James Clark//Flow Object Class::formatting-instruction")

;; variables
(define AUTHORLONG "f")
(define DEFAULTREFTYPE "JOUR")

;; the association list to access the above variable values by a string name
(define db2ris-params
  (list
   (cons 'AUTHORLONG AUTHORLONG)
   (cons 'DEFAULTREFTYPE DEFAULTREFTYPE)))


;; make sure we get only biblioentry elements
(element BOOK
  (process-node-list (select-elements (descendants (current-node)) "bibliography")))

(element ARTICLE
  (process-node-list (select-elements (descendants (current-node)) "bibliography")))

(element BIBLIOGRAPHY
  (process-node-list (select-elements (descendants (current-node)) "biblioentry")))

(element BIBLIOENTRY
  (let ((direct-list (node-list (select-elements (descendants (current-node)) (normalize "abbrev"))
				(select-elements (descendants (current-node)) (normalize "abstract"))
				(select-elements (descendants (current-node)) (normalize "address"))
				(select-elements (descendants (current-node)) (normalize "author"))
				(select-elements (descendants (current-node)) (normalize "corpauthor"))
				(select-elements (descendants (current-node)) (normalize "edition"))
				(select-elements (descendants (current-node)) (normalize "editor"))
				(select-elements (descendants (current-node)) (normalize "isbn"))
				(select-elements (descendants (current-node)) (normalize "issn"))
				(select-elements (descendants (current-node)) (normalize "issuenum"))
				(select-elements (descendants (current-node)) (normalize "pagenums"))
				(select-elements (descendants (current-node)) (normalize "pubdate"))
				(select-elements (descendants (current-node)) (normalize "publisher"))
				(select-elements (descendants (current-node)) (normalize "title"))
				(select-elements (descendants (current-node)) (normalize "volumenum"))))
	(note-list (node-list (select-elements (descendants (current-node)) (normalize "bibliomisc"))
				(select-elements (descendants (current-node)) (normalize "collab"))
				(select-elements (descendants (current-node)) (normalize "confgroup"))
				(select-elements (descendants (current-node)) (normalize "contractnum"))
				(select-elements (descendants (current-node)) (normalize "contractsponsor"))
				(select-elements (descendants (current-node)) (normalize "copyright"))
				(select-elements (descendants (current-node)) (normalize "corpname"))
				(select-elements (descendants (current-node)) (normalize "invpartnumber"))
				(select-elements (descendants (current-node)) (normalize "orgname"))
				(select-elements (descendants (current-node)) (normalize "othercredit"))
				(select-elements (descendants (current-node)) (normalize "printhistory"))
				(select-elements (descendants (current-node)) (normalize "productname"))
				(select-elements (descendants (current-node)) (normalize "productnumber"))
				(select-elements (descendants (current-node)) (normalize "pubsnumber"))
				(select-elements (descendants (current-node)) (normalize "releaseinfo"))
				(select-elements (descendants (current-node)) (normalize "revhistory"))
				(select-elements (descendants (current-node)) (normalize "seriesvolnums"))
				(select-elements (descendants (current-node)) (normalize "subtitle"))
				(select-elements (descendants (current-node)) (normalize "titleabbrev"))
				(select-elements (descendants (current-node)) (normalize "citetitle"))))
	(reftype (reftype-heuristic (current-node))))

    (make sequence
      ;; each RIS dataset starts with a newline
      (make element
	gi: "db2ris"
	(empty-sosofo))
      (make element
	gi: "db2ris"
	(literal "TY  - " reftype))
      ;; first process the elements that directly translate into a RIS tag
      (process-node-list direct-list)
      ;; next, process the elements that we pool into the notes tag
      (if (node-list-empty? note-list)
	  (empty-sosofo)
	  (make element
	    gi: "db2ris"
	    (literal "N1  - ")
	    (process-node-list note-list)))
      (make element
	gi: "db2ris"
	(literal "ER  - ")))))

;;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;; elements of direct-list
(element TITLE
  (let* ((reftype (reftype-heuristic (ancestor (normalize "biblioentry") (current-node))))
	 (tag-string (cond ((string=? reftype (normalize "book")) "BT  - ")
			   ((string=? reftype (normalize "jour")) "TI  - ")
			   (else "TI  - "))))
    (make element
      gi: "db2ris"
      (literal tag-string)
      (process-children)
)))

(element (SERIESINFO TITLE)
  (make element
    gi: "db2ris"
    (literal "T3  - ")
    (process-children)
))

(element (BIBLIOSET TITLE)
  ;; Unfortunately (for our purpose) both the article title and the 
  ;; name of a journal are encoded in TITLE elements which cannot be
  ;; easily distinguished by their position in the node list. The current
  ;; implementation assumes that the journal-related information is within a
  ;; BiblioSet with the RELATION attribute set to either "journal"
  ;; "pub", "abbrev", or "full" and that for the article-related information
  ;; the attribute value is something else. If you use a different mechanism
  ;; to distinguish title types you should modify the titletype-heuristic
  ;; function accordingly
  (let* ((titletype (titletype-heuristic (current-node)))
	 (tag-string (cond ((string=? titletype (normalize "journal")) "JO  - ")
			   ((string=? titletype (normalize "pub")) "JO  - ")
			   ((string=? titletype (normalize "abbrev")) "JO  - ")
			   ((string=? titletype (normalize "full")) "JF  - ")
			   ((string=? titletype (normalize "article")) "TI  - ")
			   (else "TI  - "))))
    (make element
      gi: "db2ris"
      (literal tag-string)
      (process-children)
)))

(element AUTHOR
  (let ((surname (select-elements (descendants (current-node)) "surname"))
	(firstname (select-elements (descendants (current-node)) "firstname"))
	(othername (select-elements (descendants (current-node)) "othername"))
	(lineage (select-elements (descendants (current-node)) "lineage")))
    (make element
      gi: "db2ris"
      (literal "AU  - ")
      (if (node-list-empty? surname)
	  (literal "Anonymous")
	  (make sequence
	    (process-node-list (node-list-first surname))
	    (process-first-descendant 'FIRSTNAME)
	    (process-first-descendant 'OTHERNAME)
	    (process-first-descendant 'LINEAGE)))
)))
    
(element EDITOR
  (let ((surname (select-elements (descendants (current-node)) (normalize "surname")))
	(firstname (select-elements (descendants (current-node)) (normalize "firstname")))
	(othername (select-elements (descendants (current-node)) (normalize "othername")))
	(lineage (select-elements (descendants (current-node)) (normalize "lineage"))))
    (make element
      gi: "db2ris"
      (literal "A2  - ")
      (if (node-list-empty? surname)
	  (literal "Anonymous")
	  (make sequence
	    (process-node-list (node-list-first surname))
	    (process-first-descendant 'FIRSTNAME)
	    (process-first-descendant 'OTHERNAME)
	    (process-first-descendant 'LINEAGE)))
)))
    
(element (SERIESINFO EDITOR)
  (let ((surname (select-elements (descendants (current-node)) (normalize "surname")))
	(firstname (select-elements (descendants (current-node)) (normalize "firstname")))
	(othername (select-elements (descendants (current-node)) (normalize "othername")))
	(lineage (select-elements (descendants (current-node)) (normalize "lineage"))))
    (make element
      gi: "db2ris"
      (literal "A3  - ")
      (if (node-list-empty? surname)
	  (literal "Anonymous")
	  (make sequence
	    (process-node-list (node-list-first surname))
	    (process-first-descendant 'FIRSTNAME)
	    (process-first-descendant 'OTHERNAME)
	    (process-first-descendant 'LINEAGE)))
)))
    
(element FIRSTNAME
; we have to consider the following common cases. Some of these may be
; considered markup errors, but they should be processed anyway:
; Franklin
; Franklin D.
; Franklin Delano
; F. D.
; F.D.
; F.
; F
  (let* ((namestring (data (current-node)))
	 (nametokens (match-split namestring " "))
	 (numtokens (length nametokens)))
    (if (> numtokens 0)
	(make sequence
	  (literal ",")
	  (if (= 1 (string-length namestring))
	      (literal namestring ".")
	      (literal (process-namestring nametokens numtokens))))
	(empty-sosofo))))

(element SURNAME
  (make sequence
    (process-children)))

(element OTHERNAME
  (let* ((namestring (data (current-node)))
	 (nametokens (match-split namestring " "))
	 (numtokens (length nametokens)))
    (if (> numtokens 0)
	(if (= 1 (string-length namestring))
	    (literal namestring ".")
	    (literal (process-namestring nametokens numtokens)))
	(empty-sosofo))))

(element LINEAGE
  (make sequence
    (literal ",")
    (process-children)))

(element CORPAUTHOR
  (make element
    gi: "db2ris"
    (literal "AU  - ")
    (process-children)
))

(element VOLUMENUM
  (make element
    gi: "db2ris"
    (literal "VL  - ")
    (process-children)
))

(element ISSUENUM
  (make element
    gi: "db2ris"
    (literal "IS  - ")
    (process-children)
))

(element PAGENUMS
  (let* ((string (data (current-node)))
	 (dash (strchr string #\- 0)))
    (if dash
	(make sequence
	  (make element
	    gi: "db2ris"
	    (literal "SP  - " (substring string 0 dash)))
	  (make element
	    gi: "db2ris"
	    (literal "EP  - " (substring string (+ dash 1) (string-length string)))))
	(make element
	  gi: "db2ris"
	  (make sequence
	    (literal "SP  - ")
	    (process-children)))
)))

(element PUBDATE
  (let ((datestring (process-date (data (current-node)))))
    (make element
      gi: "db2ris"
      (literal "PY  - " datestring)
)))

(element PUBLISHER
  (make element
    gi: "db2ris"
    (literal "PB  - ")
    (process-children)
))

(element ADDRESS
  (make element
    gi: "db2ris"
    (literal "AD  - ")
    (process-children)
))

(element ABBREV
  (make element
    gi: "db2ris"
    (literal "ID  - ")
    (process-children)
))

(element ABSTRACT
  (make element
    gi: "db2ris"
    (literal "N2  - ")
    (process-children)
))

(element ISSN
  (make element
    gi: "db2ris"
    (literal "SN  - ")
    (process-children)
))

(element ISBN
  (make element
    gi: "db2ris"
    (literal "SN  - ")
    (process-children)
))

;;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;; elements of note-list

(element COLLAB
  (make sequence
    (literal "COLLAB:")
    (process-children)))

(element CONFGROUP
  (make sequence
    (literal "CONFGROUP:")
    (process-children)))

(element CONTRACTNUM
  (make sequence
    (literal "CONTRACTNUM:")
    (process-children)))

(element CONTRACTSPONSOR
  (make sequence
    (literal "CONTRACTSPONSOR:")
    (process-children)))

(element COPYRIGHT
  (make sequence
    (literal "COPYRIGHT:")
    (process-children)))

(element CORPNAME
  (make sequence
    (literal "CORPNAME:")
    (process-children)))

(element INVPARTNUMBER
  (make sequence
    (literal "INVPARTNUMBER:")
    (process-children)))

(element ORGNAME
  (make sequence
    (literal "ORGNAME:")
    (process-children)))

(element OTHERCREDIT
  (make sequence
    (literal "OTHERCREDIT:")
    (process-children)))

(element PRINTHISTORY
  (make sequence
    (literal "PRINTHISTORY:")
    (process-children)))

(element PRODUCTNAME
  (make sequence
    (literal "PRODUCTNAME:")
    (process-children)))

(element PRODUCTNUMBER
  (make sequence
    (literal "PRODUCTNUMBER:")
    (process-children)))

(element PUBSNUMBER
  (make sequence
    (literal "PUBSNUMBER:")
    (process-children)))

(element RELEASEINFO
  (make sequence
    (literal "RELEASEINFO:")
    (process-children)))

(element REVHISTORY
  (make sequence
    (literal "REVHISTORY:")
    (process-children)))

(element SERIESVOLNUMS
  (make sequence
    (literal "SERIESVOLNUMS:")
    (process-children)))

(element SUBTITLE
  (make sequence
    (literal "SUBTITLE:")
    (process-children)))

(element TITLEABBREV
  (make sequence
    (literal "TITLEABBREV:")
    (process-children)))

(element CITETITLE
  (make sequence
    (literal "CITETITLE:")
    (process-children)))

;;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;; library functions

(define (strchr thestring thechar start-offset)
  ;; REFENTRY strchr
  ;; PURP returns the offset of the first occurrence of thechar in
  ;; thestring starting at start-offset
  ;; DESC
  ;; sorta inspired by the libc function of the same name
  ;; will return -1 if start-offset is beyond the end of the string
  ;; if start-offset is less than zero, it will silently be changed
  ;; to zero. If thechar is not found, the return value is #f
  ;; ARGS
  ;; ARG 'thestring'
  ;; the string to search
  ;; /ARG
  ;; ARG 'thechar'
  ;; the character to search for
  ;; /ARG
  ;; ARG 'start-offset'
  ;; the offset where the search should start
  ;; /ARG
  ;; /ARGS
  ;; /DESC
  ;; EXAMPLE
  ;; (strchr "foo" \#o 0) will return 1
  ;; /EXAMPLE
  ;; /REFENTRY
  (let loop ((count (max 0 start-offset))
	     (end (- (string-length thestring) 1)))
    (if (> count end) ; could happen in the first round due to wrong offset value
	-1
	(if (char=? (string-ref thestring count) thechar)
	    count
	    (if (= count end)
		#f
		(loop (+ count 1) end))))))


(define (db2ris-symbol-value varname)
  ;; REFENTRY db2ris-symbol-value
  ;; PURP returns the value of a variable with the name of a given string
  ;; DESC
  ;; uses the associative list refdb-styleparams to access the values
  ;; of the above variables by their names in a string
  ;; ARGS
  ;; ARG 'varname'
  ;; the string that holds the name of a variable
  ;; /ARG
  ;; /ARGS
  ;; /DESC
  ;; EXAMPLE
  ;; (refdb-symbol-value 'foo) where foo is a string. This will return
  ;; the value of the variable foo
  ;; /EXAMPLE
  ;; /REFENTRY
  (let ((a (assoc (string->symbol varname) db2ris-params)))
    (if a (cdr a) #f)))

(define (match-split string target)
  ;; Splits string at every occurrence of target and returns the result
  ;; as a list. "this is a test" split at "is" returns
  ;; ("th" "is" " " "is" " a test")
  ;; http://www.mulberrytech.com/dsssl/dsssldoc/procedures/c0203.html
  (let loop ((result '()) (current "") (rest string))
    (if (< (string-length rest) (string-length target))
	(append result (if (equal? (string-append current rest) "")
				  '()
				  (list (string-append current rest))))
	(if (equal? target (substring rest 0 (string-length target)))
	    (loop (append result
			  (if (equal? current "")
			      '()
			      (list current))
			  (list target))
		  ""
		  (substring rest (string-length target) (string-length rest)))
	    (loop result
		  (string-append current (substring rest 0 1))
		  (substring rest 1 (string-length rest)))))))


(define (reftype-heuristic node)
  ;; REFENTRY reftype-heuristic
  ;; PURP tries to determine the RIS publication type and returns it as
  ;; a string
  ;; DESC
  ;; This implementation expects the RIS publication type in the role
  ;; attribute of BiblioEntry. If you use a different mechanism to 
  ;; distinguish reference types, you should modify this implementation
  ;; ARGS
  ;; ARG 'node'
  ;; the BiblioEntry node
  ;; /ARG
  ;; /ARGS
  ;; /DESC
  ;; EXAMPLE
  ;; (reftype-heuristic (current-node)) will return "BOOK" if the
  ;; ROLE attribute of the BIBLIOENTRY element was set to "BOOK"
  ;; /EXAMPLE
  ;; /REFENTRY
  (let ((reftype (attribute-string (normalize "role") node)))
    (if reftype
	reftype
	(db2ris-symbol-value "DEFAULTREFTYPE")	)))


(define (titletype-heuristic node)
  ;; REFENTRY titletype-heuristic
  ;; PURP tries to determine the type of a title and returns it as
  ;; a string
  ;; DESC
  ;; Unfortunately (for our purpose) both the article title and the 
  ;; name of a journal are encoded in TITLE elements which cannot be
  ;; easily distinguished by their position in the node list. This
  ;; function tries to do this anyway. The current implementation
  ;; assumes that the journal-related information is within a
  ;; BiblioSet with the RELATION attribute set to either "journal"
  ;; "pub", "abbrev", or "full" and that
  ;; for the article-related information the attribute value is something
  ;; else. If you use a different mechanism to distinguish title types
  ;; you should modify this function accordingly
  ;; ARGS
  ;; ARG 'node'
  ;; the BiblioSet node
  ;; /ARG
  ;; /ARGS
  ;; /DESC
  ;; EXAMPLE
  ;; (titletype-heuristic (current-node)) will return "journal" if the
  ;; RELATION attribute of the parent BIBLIOSET element was set to "journal"
  ;; /EXAMPLE
  ;; /REFENTRY
  (normalize (attribute-string (normalize "relation") (ancestor (normalize "biblioset") node))))


(define (process-namestring nametokens numtokens)
  ;; REFENTRY process-namestring
  ;; PURP processes a tokenized namestring
  ;; DESC
  ;; this function sends each token to process-nametoken and appends
  ;; the returned string to a result string. This assembled string
  ;; is the return value. If there are no tokens, the result will be
  ;; an empty string
  ;; ARGS
  ;; ARG 'nametokens'
  ;; a list of strings (a tokenized namestring)
  ;; /ARG
  ;; ARG 'numtokens'
  ;; the number of tokens in nametokens
  ;; /ARG
  ;; /ARGS
  ;; /DESC
  ;; EXAMPLE
  ;; (process-namestring tokens 5) could return "Franklin D." if the
  ;; original namestring was "Franklin Delano". The exact formatting
  ;; of each token depends on the called function process-nametoken
  ;; /EXAMPLE
  ;; /REFENTRY
  (let loop ((count 0)
	     (result ""))
      (let ((result (string-append result (process-nametoken (list-ref nametokens count) count (- numtokens 1)))))
	(if (= count (- numtokens 1))
	    result
	    (loop (+ count 1) result)))))

(define (process-nametoken token count maxcount)
  ;; REFENTRY process-nametoken
  ;; PURP formats a token from a tokenized namestring
  ;; DESC
  ;; formats the token according to its position in the string and
  ;; according to some global settings
  ;; ARGS
  ;; ARG 'token'
  ;; the string that holds the name part to format
  ;; /ARG
  ;; ARG 'count'
  ;; the position of the current token in the tokenlist
  ;; /ARG
  ;; ARG 'maxcount'
  ;; the highest possible value of count in the tokenlist
  ;; /ARG
  ;; /ARGS
  ;; /DESC
  ;; EXAMPLE
  ;; (process-nametoken "Franklin" 0 3) could return "F." if the
  ;; global settings call for abbreviating the name parts
  ;; /EXAMPLE
  ;; /REFENTRY
  (let* ((tokenlength (string-length token))
        ;; if we're not othername but othername exists, we must be firstname
        ;; if authorlong=t, we should put a space after token even if
        ;; it is the last token in the tokenlist
	(othernode (node-list-first (select-elements (parent (current-node)) (normalize "OTHERNAME"))))
	(appendspace (cond ((and (not (node-list-empty? othernode)) (node-list=? othernode (current-node))) #f)
			   (else #t))))
    (if (string=? token " ")
	""
	(if (= 1 tokenlength)
	    (string-append token ".")
	    (if (= tokenlength 2)
		(if (char=? #\. (string-ref token 1))
		    token
		    (string-append token "."))
		(if (string=? (db2ris-symbol-value "AUTHORLONG") "t")
		    (if (or (< count maxcount) appendspace)
			(string-append token " ")
			token)
		    (string-append (substring token 0 1) ".")))))))


(define (process-date datestring)
  ;; REFENTRY process-date
  ;; PURP formats a date string according to the RIS spec
  ;; DESC
  ;; the current implementation uses the first 4 characters, assuming
  ;; that this is a 4-digit year, and appends "///" to turn it into
  ;; a valid RIS date spec
  ;; ARGS
  ;; ARG 'datestring'
  ;; the string that holds the date information
  ;; /ARG
  ;; /ARGS
  ;; /DESC
  ;; EXAMPLE
  ;; (process-date "1999 and whatnot") returns "1999///"
  ;; /EXAMPLE
  ;; /REFENTRY
  (string-append (substring datestring 0 4) "///"))

(define (assoc obj alist)
;; assoc is not a primitive in Jade. You can comment out this definition
;; to gain some speed if you use OpenJade (>=1.3?)
  (cond ((null? alist) #f)
        ((equal? obj (car (car alist)))
         (car alist))
        (else (assoc obj (cdr alist)))))

(define (normalize str)
  ;; REFENTRY normalize
  ;; PURP Normalize the str according to the SGML declaration in effect
  ;; DESC
  ;; Performs SGML general name normalization on the string;
  ;; used to compare attribute names and generic identifiers correctly
  ;; according to the SGML declaration in effect; this is necessary
  ;; since XML is case-sensitive but the reference concrete syntax and
  ;; many SGML DTDs are not.
  ;; /DESC
  ;; AUTHOR Chris Maden
  ;; /REFENTRY
  (if (string? str)
      (general-name-normalize str
			      (current-node))
      str))


</style-specification-body>
</style-specification>
</style-sheet>


