;;; -*- Mode: LISP; Package: CGP; BASE: 10; Syntax: ANSI-Common-Lisp; -*-
;;
;; Copyright (C) Paul Meurer 1999, 2000. All rights reserved.
;; paul.meurer@hit.uib.no
;; HIT-centre, University of Bergen
;;
;; Version 0.95
;;
;; Constraint Grammar Parser editing functions
;;
;;-------------------------------------------------------------------------------------
;; TO DO: handle comments properly when reloading a stored grammar
;;
;;-------------------------------------------------------------------------------------
(in-package "CGP")
(defun %write-constraint-to-string (constraint &key (alternate-case-p t))
(with-output-to-string (stream)
(write-char #\( stream)
(loop with upcasep = alternate-case-p
for (sym . rest) on constraint
do (cond ((eq sym 'not)
(write-string "NOT" stream))
(upcasep
(format stream "~:@(~s~)" sym)
(setf upcasep nil))
(t
(format stream "~(~s~)" sym)
(setf upcasep alternate-case-p)))
when rest do (write-char #\Space stream))
(write-char #\) stream)))
(defun %feature-to-string-list (f-list language &key (to-platform :sgml) additional-conversions)
(declare (ignore language))
(mapcar (lambda (ct)
(delete #\| (%write-constraint-to-string ct))
#+ignore
(convert-string (delete #\| (%write-constraint-to-string ct))
:mac to-platform additional-conversions))
f-list))
;; make this nicer!
(defun %labels-to-string-list (labels language &key (to-platform :sgml) additional-conversions)
(declare (ignore language))
(delete #\| (%write-constraint-to-string labels :alternate-case-p nil))
#+old
(convert-string (delete #\| (%write-constraint-to-string labels :alternate-case-p nil))
:mac to-platform additional-conversions))
(defmethod write-rule-body ((rule rule) stream &key language html-p (to-platform :sgml)
(end-parenthesis t))
(with-slots (type domain target constraints heuristic-level) rule
(let ((*print-pretty* nil))
(if html-p
(progn
#m #S(format nil "(~s ~s ~a"
(or domain '@w)
type
(delete #\\ (write-to-string (if (consp target) (car target) (list target)))))
(dolist (string (%feature-to-string-list constraints language :to-platform nil))
#m (br/)
#m #s (format nil " ~a" string)))
(format stream "(~s ~s ~a~{~% ~a~}"
(or domain '@w)
#+old(if domain (convert-string domain $encoding to-platform) '@w)
type
(delete #\\ (write-to-string (if (consp target) (car target) (list target)))
#+old
(convert-string (write-to-string (if (consp target) (car target) (list target)))
:mac to-platform))
(%feature-to-string-list constraints language :to-platform nil #+old to-platform))))
(when end-parenthesis (write-char #\) stream))))
(defmethod write-rule-body ((rule mapping-rule) stream &key language html-p (to-platform :sgml)
(end-parenthesis t))
(with-slots (target constraints labels) rule
(let ((*print-pretty* nil))
(if html-p
(progn
#m #s(format nil "(~a" (delete #\\ (write-to-string target)))
#m(br/)
(write-string " (" stream)
(loop for (string rest) on (%feature-to-string-list constraints language :to-platform nil)
do #m #s string
when rest
do #m(br/)
(write-string " " stream))
#m(br/)
#m #s(format nil " ~a" (%labels-to-string-list labels language)))
(format stream "(~a~% (~{~a~^~% ~})~% ~a"
(delete #\\ #+old(convert-string (write-to-string target) :mac to-platform)
(write-to-string target))
(%feature-to-string-list constraints language :to-platform to-platform)
(%labels-to-string-list labels language :to-platform to-platform))))
(when end-parenthesis (write-char #\) stream))))
#+old
(defmethod write-rule-body ((rule mapping-rule) stream &key language html-p (to-platform :sgml)
(end-parenthesis t))
(with-slots (target constraints labels) rule
(let ((*print-pretty* nil))
(format stream (if html-p "(~a
(~{~a~^
~})
~a" "(~a~% (~{~a~^~% ~})~% ~a")
(delete #\\ (convert-string (write-to-string target)
:mac to-platform
(when html-p '(#\< "<" #\> ">" #\& "&"))))
(%feature-to-string-list constraints language :to-platform to-platform
:additional-conversions
(when html-p '(#\< "<" #\> ">" #\& "&")))
(%labels-to-string-list labels language :to-platform to-platform
:additional-conversions
(when html-p '(#\< "<" #\> ">" #\& "&")))))
(when end-parenthesis (write-char #\) stream))))
(defmethod rule-table ((cg constraint-grammar) constraint-type domain)
(cond ((find constraint-type '(=m :syntactic-map))
(morphosyntactic-mappings cg))
(domain
(gethash domain (domain-rules cg)))
(t
(rules cg))))
(defmethod rule-table ((cg ne-constraint-grammar) constraint-type domain)
(declare (ignore domain))
(case constraint-type
((=nm :named-entity--map)
(named-entity-mappings cg))
(otherwise
(call-next-method))))
(defmethod get-rules ((cg constraint-grammar) &key id type heuristic-niveau domain features rule-table
constraint-node keep-groups-together-p)
(when (and type (not (eq heuristic-niveau :alle)))
(setf type (or (type+level-to-type type (or heuristic-niveau 0)) t)))
;(print (list type heuristic-niveau))
(unless (eq type t)
(let ((rules
(cond (id
(list (aref (rule-array cg) id)))
((eq heuristic-niveau :alle)
(reduce #'append
(mapcar (lambda (heuristic-niveau)
(get-rules cg :type type :heuristic-niveau heuristic-niveau
:domain domain :features features))
'(0 1 2 3))))
((and (null type) (null rule-table) (null constraint-node))
(get-rules cg :type '(=0 =! =s0 =s!) :domain domain :features features))
((consp type)
(reduce #'append
(mapcar (lambda (type) (get-rules cg :type type :domain domain :features features))
type)))
(domain
(if (eq domain :alle)
(collecting
(maphash (lambda (dom rule-table)
(declare (ignore dom))
(collect-append
(get-rules cg :features features :type type :rule-table rule-table)))
(domain-rules cg)))
(when-let (rule-table (gethash domain (domain-rules cg)))
(get-rules cg :features features :type type :rule-table rule-table))))
(type
(case type
((=0 =! =!! =0h =!h =!!h)
(when-let (rule-table (gethash type (or rule-table (rules cg))))
(get-rules cg :features features :rule-table rule-table)))
(=m
(when-let (rule-table (morphosyntactic-mappings cg))
(get-rules cg :features features :rule-table rule-table)))
(=nm
(when-let (rule-table (named-entity-mappings cg))
(get-rules cg :features features :rule-table rule-table)))
((=s0 =s! =s0h1 =s!h1 =s0h2 =s!h2 =s0h3 =s!h3
=n0 =n! =n0h =n!h =n0h1 =n!h1 =n0h2 =n!h2 =n0h3 =n!h3)
(if features
(when-let (rule-table (gethash type (or rule-table (rules cg))))
(when-let (constraint-node (gethash (car features) rule-table))
(get-rules cg :features features :constraint-node constraint-node)))
(collecting
(when-let (rule-table (gethash type (or rule-table (rules cg))))
(maphash (lambda (feature constraint-node)
(collect-append
(get-rules cg :features (list feature)
:constraint-node constraint-node)))
rule-table)))))))
((and (null features) rule-table)
(collecting
(maphash (lambda (feature rules)
(declare (ignore feature))
(dolist (features+rule rules)
(collect (cdr features+rule))))
rule-table)))
(constraint-node
(collecting
(labels ((walk (node)
(dolist (rule (constraint-rules node))
(collect rule))
(mapc #'walk (child-constraints node))))
(walk constraint-node))))
(t
(u:collecting
(let ()
(dolist (features+rule (gethash (car features) rule-table))
(when (subsetp (cdr features) (car features+rule))
(u:collect (cdr features+rule))))))))))
(if keep-groups-together-p
(let ((used-groups ()))
(collecting
(dolist (rule rules)
(when (and rule (not (equal rule 0))) ; *** fix this! initialize copied rule array with nil
(let ((group-comment (aref (rule-group-comments cg) (rule-id rule))))
;(print group-comment)
(cond ((null group-comment)
(collect rule))
((null (car group-comment)) ;; bug somewhere!
(collect rule))
((find group-comment used-groups)
nil)
(t
(push group-comment used-groups)
(dolist (rule-id (cdr group-comment))
(collect (aref (rule-array cg) rule-id))))))))))
rules))))
(defun %copy-seq (obj)
(if (listp obj) (copy-seq obj) obj))
(defun copy-hash-table (table &key (value-copy-function #'identity))
(let ((copied-table (make-hash-table :test (hash-table-test table)
:size (hash-table-size table))))
(maphash (lambda (key value)
(setf (gethash key copied-table) (funcall value-copy-function value)))
table)
copied-table))
(defmethod copy-constraint-node ((node constraint-node) &key parent)
(let ((copied-node (make-instance (class-of node)
:constraint (constraint node)
:parent parent
:rules (copy-seq (constraint-rules node)))))
(setf (child-constraints copied-node)
(mapcar (lambda (child) (copy-constraint-node child :parent copied-node))
(child-constraints node)))
copied-node))
(defmethod deep-copy-hash-table (obj)
(typecase obj
(cons
(copy-tree obj))
(constraint-node
(copy-constraint-node obj))
(hash-table
(copy-hash-table obj :value-copy-function 'deep-copy-hash-table))
(t
obj)))
(defmethod copy-cg ((cg constraint-grammar) &key name)
(let ((copied-cg (make-instance (class-of cg)
:name name
:operations (parse-operations cg)
:feature-precedence (feature-precedence cg)
:multi-tagger (multi-tagger cg)
:parent-cg (name cg))))
(with-slots (sentence-delimiters
set-declarations encoded-set-declarations templates syntactic-functions principal-functions
barrier-elements
rules domain-rules
%mapping-features %syntactic-function-codes
morphosyntactic-mappings morphological-heuristics rule-array careful-p ; constraint-tree
language documentation group-comments)
copied-cg
(setf sentence-delimiters (copy-seq (sentence-delimiters cg))
set-declarations (set-declarations cg)
encoded-set-declarations (encoded-set-declarations cg)
templates (templates cg)
syntactic-functions (copy-hash-table (syntactic-functions cg))
principal-functions (copy-hash-table (principal-functions cg))
barrier-elements (copy-hash-table (barrier-elements cg))
rules (deep-copy-hash-table (rules cg))
domain-rules (deep-copy-hash-table (domain-rules cg))
%mapping-features (%copy-seq (%mapping-features cg))
%syntactic-function-codes (%copy-seq (%syntactic-function-codes cg))
morphosyntactic-mappings (deep-copy-hash-table (morphosyntactic-mappings cg))
morphological-heuristics (deep-copy-hash-table (morphological-heuristics cg))
rule-array (make-array (length (rule-array cg)) :initial-contents (rule-array cg) :adjustable t :fill-pointer t)
careful-p (careful-p cg)
;;constraint-tree (constraint-tree cg)
language (language cg)
documentation (cg-documentation cg)
group-comments (make-array (length (rule-array cg)) :adjustable t :fill-pointer 0))
(loop for grc across (rule-group-comments cg)
with grc-list = nil and copy-grc = nil
do (cond ((null grc)
(vector-push nil group-comments))
((eq grc grc-list)
(vector-push copy-grc group-comments))
(t
(setf grc-list grc copy-grc (copy-seq grc))
(vector-push copy-grc group-comments)))))
copied-cg))
(defmethod copy-cg :around ((cg ne-constraint-grammar) &key &allow-other-keys)
(let ((copied-cg (call-next-method)))
(with-slots (%named-entity-mapping-features %named-entity-tag-codes named-entity-mappings named-entity-tags)
copied-cg
(setf %named-entity-mapping-features (%copy-seq (%named-entity-mapping-features cg))
%named-entity-tag-codes (%copy-seq (%named-entity-tag-codes cg))
named-entity-mappings (deep-copy-hash-table (named-entity-mappings cg))
named-entity-tags (copy-hash-table (named-entity-tags cg))))
copied-cg))
#+test
(inspect (copy-cg *nbo-cg*))
#+test
(make-named-cg-copy *nbo-cg* "NBO-test")
;(defparameter *cg-table* (make-hash-table :test #'equal))
(defmethod make-named-cg-copy ((cg constraint-grammar) name)
(setf (gethash name *cg-table*) (copy-cg cg :name name)))
(defun save-constraint-grammars (&key (cg-table *cg-table*))
(maphash (lambda (name cg)
(write-cg-to-file cg
:path (concat "projects:cgp;rules;" name ".lisp")
:temp-path (concat "projects:cgp;rules;" name ".temp")))
cg-table))
#+test
(save-constraint-grammars)
#+test
(let ((path "projects:cgp;rules;xx.lisp"))
(list (setf (pathname-type path) "temp") path))
(defmethod write-cg-to-file ((cg constraint-grammar) &key path temp-path default-p version-comment)
(print :path)
(print temp-path)
(let ((temp-path (or temp-path #-allegro(setf (pathname-type path) "temp")
#+allegro(merge-pathnames ".temp" path)))
(success-p nil))
(print :temp-path)
(print temp-path)
(print path)
(unwind-protect
(progn
(with-open-file (stream temp-path :direction :output :if-exists :supersede :if-does-not-exist :create)
(write-cg-to-stream cg :stream stream :default-p default-p :version-comment version-comment))
(setf success-p t))
(if success-p
(rename-file temp-path path)
(delete-file temp-path)))))
#+test
(defun write-comment (string &optional (stream *standard-output*))
(dolist (line (split string #\Newline))
(format stream ";; ~a~%" line)))
#+test
(write-cg-to-stream (gethash "nbo" *cg-table*))
#+test
(defmethod write-cg-to-stream ((cg constraint-grammar) &key (stream *standard-output*)
(encoding $encoding) version-comment)
(let ((*package* (find-package :cgp))
(*print-case* :downcase))
(format stream "~s~%" '(foo bar))))
;; changed 7.2.2001
(defmethod write-cg-to-stream ((cg constraint-grammar) &key (stream *standard-output*)
(encoding $encoding) default-p version-comment)
(let ((*package* (find-package :cgp))
(*print-case* :downcase))
(with-slots (group-comments creation-date change-date) cg
(setf change-date (get-universal-time)) ;; set change-date when writing
(labels ((encode (obj) obj
#+ignore-yet
(cond ((eq encoding $encoding)
obj)
((stringp obj)
(if (eq $encoding encoding)
obj
(convert-string (copy-seq obj) $encoding encoding)))
((consp obj)
(mapcar #'encode obj))
(t
obj))))
(write-line ";;;-*- Mode: Lisp; Package: CGP -*-" stream) ; mode line
(terpri stream)
(when version-comment (format stream "~{;;*;; ~a~%~}~%" (split version-comment #\Newline)))
(terpri stream)
(write-string ";;; Denne filen er automatisk generert. " stream)
(u::format-universal-time (get-universal-time) stream :timestamp :nbo)
(terpri stream)
(terpri stream)
(write-line "(in-package :cgp)" stream)
(terpri stream)
(let* ((language (language cg))
(name (if default-p (string-downcase language) (name cg))))
(format stream
"(let* ((*tagger* *~a-tagger*)
(*cg* (make-instance '~s
:language ~s
:name \"~a\"
:multi-tagger *~a-tagger*
:locked-p ~s
:operations
(list ;#'analize-unknown-words
#'disambiguate-sentence
#'h-disambiguate-sentence
#'disambiguate-sentence
#'map-sentence
#'s-disambiguate-sentence
(lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 1))
(lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 3))
#+ignore #'h-s-disambiguate-sentence)
:feature-precedence (mapcar #'feature-code '(det adj verb subst pron adv fork konj clb)))))
(setf (gethash ~s *cg-table*) *cg*)~%~%"
language (type-of cg) language
(if default-p (string-downcase language) name)
language (cg-locked-p cg)
(if default-p (string-downcase language) name)
))
(format stream "(admin-info~% :language ~s~% :created ~s~% :last-change ~s~% :locked ~s~% :parent ~s)~%~%"
(language cg)
(u::format-universal-time creation-date nil :timestamp :nbo)
(u::format-universal-time change-date nil :timestamp :nbo)
(cg-locked-p cg)
(parent-cg cg))
(terpri stream)
(format stream ";; Information section~%(define-information-section~% ~s)~%~%"
(if (eq $encoding encoding)
(cg-documentation cg)
(convert-string (copy-seq (cg-documentation cg)) $encoding encoding)))
(format stream ";; Sentence delimiters~%(define-sentence-delimiters~% '~s)~%~%"
(sentence-delimiters cg))
(write-line ";; set declarations" stream)
(write-line ";; \"strings\" are converted to %symbols% for faster lookup" stream)
(format stream "(define-sets~% '(~%")
(let ((definitions
(sort (collecting
(maphash (lambda (symbol def)
(collect (cons symbol (encode def))))
(set-declarations cg)))
#'string< :key #'car)))
(dolist (def definitions)
(format stream " ~s~%" def)))
(write-line "))" stream)
(terpri stream)
(write-line ";; syntactic function declarations" stream)
(format stream "(define-syntactic-functions~% '~s)~%~%"
(sort (collecting
(maphash (lambda (symbol val)
(declare (ignore val))
(collect symbol))
(syntactic-functions cg)))
#'string<))
(terpri stream)
(when (typep cg 'ne-constraint-grammar)
(write-line ";; named entity tags" stream)
(format stream "(define-named-entity-tags~% '~s)~%~%"
(sort (collecting
(maphash (lambda (symbol val)
(declare (ignore val))
(collect symbol))
(named-entity-tags cg)))
#'string<))
(terpri stream))
(write-line ";; Reglene er listet i rekkefølgen de brukes av taggeren." stream)
(terpri stream)
(write-line ";; Morphological disambiguation rules" stream)
(format stream "(define-rules~% '(~%")
(let* ((written-groups ())
(group-id 0)
(domains
(sort (collecting ; sort the features first
(maphash (lambda (domain rules-table)
(declare (ignore rules-table))
(collect domain))
(domain-rules cg)))
#'string<))
(rule-type-tables
(append (mapcar (lambda (domain) (gethash domain (domain-rules cg))) domains)
(list (rules cg)))))
(dolist (rule-type-table rule-type-tables)
(dolist (type '(=! =!! =0 =!h =!!h =0h))
(when-let (rule-table (gethash type rule-type-table))
(let ((features
(sort (collecting ;; sort the features first
(maphash (lambda (feature features+rule)
(declare (ignore features+rule))
(collect feature))
rule-table))
#'string<)))
(dolist (feature features)
(dolist (features+rule (gethash feature rule-table))
(let* ((rule (cdr features+rule))
(rule-group (aref group-comments (rule-id rule))))
(when (and rule-group (not (find rule-group written-groups :key #'cdr)))
(format stream "(group-comment ~d ~s)~%~%" (incf group-id) (car rule-group))
(push (cons group-id rule-group) written-groups))
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(format stream "~% (")
(when (rule-comment rule)
(format stream ":comment ~s" (encode (rule-comment rule)))
(when rule-group (format stream "~% ")))
(when rule-group
(format stream ":group ~a" (car (find rule-group written-groups :key #'cdr))))
(write-line "))" stream))
(terpri stream)))))))
(write-line ";; Morphosyntactic mapping rules" stream)
(let* ((rule-table (morphosyntactic-mappings cg))
(features
(sort (collecting ; sort the features first
(maphash (lambda (feature features+rule)
(declare (ignore features+rule))
(collect feature))
rule-table))
#'string<)))
(dolist (feature features)
(dolist (features+rule (gethash feature rule-table))
(let* ((rule (cdr features+rule))
(rule-group (aref group-comments (rule-id rule))))
(when (and rule-group (not (find rule-group written-groups :key #'cdr)))
(format stream "(group-comment ~d ~s)~%~%" (incf group-id) (car rule-group))
(push (cons group-id rule-group) written-groups))
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(format stream "~% (")
(when (rule-comment rule)
(format stream ":comment ~s" (encode (rule-comment rule)))
(when rule-group (format stream "~% ")))
(when rule-group
(format stream ":group ~a" (car (find rule-group written-groups :key #'cdr))))
(write-line "))" stream))
(terpri stream))))
(write-line ";; Syntactic disambiguation rules" stream)
(write-line ";; Syntactic disambiguation rules")
(dolist (rule-type-table rule-type-tables)
(dolist (type '(=s! =s0 =s!h1 =s0h1 =s!h2 =s0h2 =s!h3 =s0h3))
(when-let (rule-table (gethash type rule-type-table))
(let ((features
(sort (collecting ; sort the features first
(maphash (lambda (feature rule-tree)
(declare (ignore rule-tree))
(collect feature))
rule-table))
#'string<)))
(dolist (feature features)
(map-tree-rules
cg (gethash feature rule-table)
(lambda (rule)
(let ((rule-group (aref group-comments (rule-id rule))))
(when (and rule-group (not (find rule-group written-groups :key #'cdr)))
(format stream "(group-comment ~d ~s)~%~%" (incf group-id) (car rule-group))
(push (cons group-id rule-group) written-groups))
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(format stream "~% (")
(when (rule-comment rule)
(format stream ":comment ~s" (encode (rule-comment rule)))
(when rule-group (format stream "~% ")))
(when rule-group
(format stream ":group ~a" (car (find rule-group written-groups :key #'cdr))))
(write-line "))" stream))
(terpri stream))))))))
(when (typep cg 'ne-constraint-grammar)
(write-line ";; Named entity mapping rules" stream)
(let* ((rule-table (named-entity-mappings cg))
(features
(sort (collecting ; sort the features first
(maphash (lambda (feature features+rule)
(declare (ignore features+rule))
(collect feature))
rule-table))
#'string<)))
(dolist (feature features)
(dolist (features+rule (gethash feature rule-table))
(let* ((rule (cdr features+rule))
(rule-group (aref group-comments (rule-id rule))))
(when (and rule-group (not (find rule-group written-groups :key #'cdr)))
(format stream "(group-comment ~d ~s)~%~%" (incf group-id) (car rule-group))
(push (cons group-id rule-group) written-groups))
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(format stream "~% (")
(when (rule-comment rule)
(format stream ":comment ~s" (encode (rule-comment rule)))
(when rule-group (format stream "~% ")))
(when rule-group
(format stream ":group ~a" (car (find rule-group written-groups :key #'cdr))))
(write-line "))" stream))
(terpri stream))))
(write-line ";; Named entity disambiguation rules" stream)
(dolist (type '(=n! =n0 =n!h =n0h =n!h1 =n0h1 =n!h2 =n0h2 =n!h3 =n0h3))
(when-let (rule-table (gethash type (rules cg)))
(let ((features
(sort (collecting ; sort the features first
(maphash (lambda (feature rule-tree)
(declare (ignore rule-tree))
(collect feature))
rule-table))
#'string<)))
(dolist (feature features)
(map-tree-rules
cg (gethash feature rule-table)
(lambda (rule)
(let ((rule-group (aref group-comments (rule-id rule))))
(when (and rule-group (not (find rule-group written-groups :key #'cdr)))
(format stream "(group-comment ~d ~s)~%~%" (incf group-id) (car rule-group))
(push (cons group-id rule-group) written-groups))
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(format stream "~% (")
(when (rule-comment rule)
(format stream ":comment ~s" (encode (rule-comment rule)))
(when rule-group (format stream "~% ")))
(when rule-group
(format stream ":group ~a" (car (find rule-group written-groups :key #'cdr))))
(write-line "))" stream))
(terpri stream))))))))
(write-line " ))" stream)
(terpri stream)
(write-line ")" stream)
(write-line ";;; EOF")
nil)))))
#+old
(defmethod write-cg-to-stream ((cg constraint-grammar) &key (stream *standard-output*)
(encoding $encoding))
(with-slots (group-comments creation-date change-date) cg
(labels ((encode (obj)
(cond ((eq encoding $encoding)
obj)
((stringp obj)
(if (eq $encoding encoding)
obj
(convert-string (copy-seq obj) $encoding encoding)))
((consp obj)
(mapcar #'encode obj))
(t
obj))))
(write-line ";;;-*- Mode: Lisp; Package: CGP -*-" stream) ; mode line
(terpri stream)
(write-line ";;; Denne filen er automatisk generert." stream)
(terpri stream)
(write-line "(in-package :cgp)" stream)
(terpri stream)
(format stream "(admin-info~% :language ~s~% :created ~s~% :last-change ~s~% :locked ~s~% :parent ~s)~%~%"
(language cg)
(u::format-universal-time creation-date nil :timestamp :nbo)
(u::format-universal-time change-date nil :timestamp :nbo)
(cg-locked-p cg)
(if (parent-cg cg)
(name (parent-cg cg))
nil))
(write-line ";; Morphological disambiguation" stream)
(terpri stream)
(format stream ";; Information section~%(define-information-section~% ~s)~%~%"
(if (eq $encoding encoding)
(cg-documentation cg)
(convert-string (copy-seq (cg-documentation cg)) $encoding encoding)))
(format stream ";; Sentence delimiters~%(define-sentence-delimiters~% '~s)~%~%"
(sentence-delimiters cg))
(write-line ";; set declarations" stream)
(write-line ";; \"strings\" are converted to %symbols% for faster lookup" stream)
(format stream "(define-sets~% '(~%")
(let ((definitions
(sort (u:collecting
(maphash (lambda (symbol def)
(u:collect (cons symbol (encode def))))
(set-declarations cg)))
#'string< :key #'car)))
(dolist (def definitions)
(format stream " ~s~%" def)))
(write-line "))" stream)
(terpri stream)
(write-line ";; syntactic function declarations" stream)
(format stream "(define-syntactic-functions~% '~s)~%~%"
(sort (u:collecting
(maphash (lambda (symbol val)
(declare (ignore val))
(u:collect symbol))
(syntactic-functions cg)))
#'string<))
(terpri stream)
(write-line ";; disambiguation constraints" stream)
(format stream "(define-disambiguation-rules~% '(~%")
(let ((written-groups ()))
(loop for rule across (rule-array cg)
do (when (and rule
(typep rule 'disambiguation-rule)
(= 0 (heuristic-level rule))
(not (find (aref group-comments (rule-id rule)) written-groups)))
(let ((rule-group (aref group-comments (rule-id rule))))
(cond (rule-group
(push rule-group written-groups)
(format stream "(comment ~s~%" (car rule-group))
(dolist (rule (cdr rule-group))
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(if (rule-comment rule)
(format stream "~%~s)~%" (encode (rule-comment rule)))
(write-line ")" stream))
(terpri stream))
(write-line ")" stream))
(t
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(if (rule-comment rule)
(format stream "~%~s)~%" (encode (rule-comment rule)))
(write-line ")" stream))
(terpri stream)))))))
(write-line ") :clearp t)" stream)
(terpri stream)
(write-line ";; heuristic disambiguation constraints" stream)
(format stream "(define-heuristic-disambiguation-rules~% '(~%")
(loop for rule across (rule-array cg)
do (when (and rule
(typep rule 'disambiguation-rule)
(= 1 (heuristic-level rule)))
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(if (rule-comment rule)
(format stream "~%~s)~%" (encode (rule-comment rule)))
(write-line ")" stream))
(terpri stream)))
(write-line ") :clearp t)" stream)
(terpri stream)
(write-line ";; morphosyntactic mappings" stream)
(format stream "(define-mapping-rules~% '(~%")
(loop for rule across (rule-array cg)
do
(when (and rule
(typep rule 'mapping-rule))
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(if (rule-comment rule)
(format stream "~%~s)~%" (encode (rule-comment rule)))
(write-line ")" stream))
(terpri stream)))
(write-line ") :clearp t)" stream)
(terpri stream)
(write-line ";; syntactic constraints" stream)
(format stream "(define-syntactic-rules~% '(~%")
(loop for rule across (rule-array cg)
do
(when (and rule
(typep rule 'syntactic-disambiguation-rule)
(or (null (heuristic-level rule)) ; preliminary
(= 0 (heuristic-level rule))))
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(if (rule-comment rule)
(format stream "~%~s)~%" (encode (rule-comment rule)))
(write-line ")" stream))
(terpri stream)))
(write-line ") :clearp t)" stream)
(terpri stream)
(dotimes (i 3)
(format stream ";; heuristic syntactic constraints, level ~d~%" (1+ i))
(format stream "(define-heuristic-syntactic-rules ~% '(~%")
(loop for rule across (rule-array cg)
do
(when (and rule
(typep rule 'syntactic-disambiguation-rule)
(heuristic-level rule)
(= (1+ i) (heuristic-level rule)))
(format stream ";; ~d~%" (rule-id rule))
(write-rule-body rule stream :to-platform encoding :end-parenthesis nil)
(if (rule-comment rule)
(format stream "~%~s)~%" (encode (rule-comment rule)))
(write-line ")" stream))
(terpri stream)))
(format stream ") :clearp t :level ~d)~%~%" (1+ i)))
nil)))
;;(stored-cg-versions)
(defun stored-cg-versions (&key (path "projects:cgp;rules;*.lisp") (html-p t))
"returns a list of (file-name cg-name universal-time comment) lists"
(let ((versions
(collecting
(dolist (file (directory path))
(let* ((file-name (pathname-name file))
(ut-start (- (length file-name) 10))
(universal-time-string (when (> ut-start 0) (subseq file-name ut-start)))
(cg-name (when universal-time-string (subseq file-name 0 ut-start))))
(when universal-time-string
(multiple-value-bind (universal-time end) (parse-integer universal-time-string :junk-allowed t)
(when (= end 10)
(let ((comment nil))
(block comment
(with-file-lines (line file)
(cond ((and (> (length line) 6)
(string= line ";;*;;" :end1 5))
(setf comment
(if html-p
(if comment
(concat comment "
" (utf-8-encode #+old chars-to-entities (subseq line 6)))
(subseq line 6))
(if comment
(concat comment #.(format nil "~%") (subseq line 6))
(subseq line 6)))))
((and (> (length line) 12)
(string= line "(in-package " :end1 12))
(return-from comment))
(t
nil))))
(collect (list file-name cg-name universal-time comment)))))))))))
(sort versions #'> :key #'caddr)))
#+test
(write-cg-to-stream *nbo-cg*)
#+test
(write-cg-to-file *nbo-cg* :path "projects:cgp;rules;nbo-all.lisp")
;;(print (%parse-query-response-key :nob "hidden_value_Newnbo3177_0"))
(defun %parse-query-response-key (language key)
(let ((str (string key))
(lang-length (length (symbol-name language))))
(cond ((and (> (length str) 29)
(string-equal str "hidden_value_rulegroupcomment" :end1 29))
(values (parse-integer str :start (+ 29 lang-length)) :group-comment t))
((and (> (length str) 27)
(string-equal str "hidden_value_rulecommentnew" :end1 27))
(values (parse-integer str :start (+ 27 lang-length)) :comment t))
((and (> (length str) 24)
(string-equal str "hidden_value_rulecomment" :end1 24))
(values (parse-integer str :start (+ 24 lang-length)) :comment))
((and (> (length str) 21)
(string-equal str "rule_rulegroupcomment" :end1 21))
(values (parse-integer str :start (+ 21 lang-length)) :group-comment))
((and (> (length str) 19)
(string-equal str "rule_rulecommentnew" :end1 19))
(values (parse-integer str :start (+ 19 lang-length)) :comment t))
((and (> (length str) 16)
(string-equal str "rule_rulecomment" :end1 16))
(values (parse-integer str :start (+ 16 lang-length)) :comment))
((and (> (length str) 16)
(string-equal str "hidden_value_new" :end1 16))
;;(values (parse-integer str :start (+ 16 lang-length)) :rule t)
(let ((_pos (position #\_ str :start (+ 16 lang-length))))
(if _pos
(values (parse-integer str :start (1+ _pos)) ; id
:rule ; type
t ; new-rule-p
(parse-integer str :start (+ 16 lang-length) :end _pos) ; cloned-from
)
(values (parse-integer str :start (+ 16 lang-length)) :rule t))))
((and (> (length str) 13)
(string-equal str "hidden_value_" :end1 13))
(values (parse-integer str :start (+ 13 lang-length)) :rule))
((and (> (length str) 8)
(string-equal str "rule_new" :end1 8))
(let ((_pos (position #\_ str :start (+ 8 lang-length))))
(if _pos
(values (parse-integer str :start (1+ _pos)) ; id
:rule ; type
t ; new-rule-p
(parse-integer str :start (+ 8 lang-length) :end _pos) ; cloned-from
)
(values (parse-integer str :start (+ 8 lang-length)) :rule t))))
((and (> (length str) 5)
(string-equal str "rule_" :end1 5))
(values (parse-integer str :start (+ 5 lang-length)) :rule))
(t
nil))))
(defun valid-constraint-p (constraint)
(print constraint)
(when (consp constraint)
(when (eq (car constraint) 'NOT)
(setf constraint (cdr constraint)))
(unless (or (not (cdr constraint))
(and (cdr constraint)
(cddr constraint)
(cdddr constraint)))
(destructuring-bind (pos-op set &optional link) constraint
(and (or (integerp pos-op)
(not (find-if-not (lambda (c)
(find c "LRC*-+0123456789" :test #'char-equal))
(string pos-op))))
(gethash set (set-declarations *cg*))
(or (null link)
(integerp link)
(not (find-if-not (lambda (c)
(find c "LRC*-+0123456789" :test #'char-equal))
(string link)))))))))
#+test
(let ((*cg* *nbo-cg*)
(*tagger* *nbo-tagger*))
(print (valid-constraint-p '(LR0 (subst subst) *R))))
(defun check-rule-consistency (rule)
(labels ((test-features (target)
(when (find-if-not #'feature-code target)
(error "Det finnes ugyldig trekk i ~a" rule))))
(destructuring-bind (first . rest) rule
(cond ((consp first) ;; mapping-regel
(unless (and (listp (car rest))
(listp (cdr rest))
(listp (cadr rest))
(null (cddr rest)))
(error "ugyldig kropp i mappingregel ~a" rule))
(let ((target (if (stringp (car first))
(cdr first)
first)))
(test-features target))
(let ((constraint (find-if-not #'valid-constraint-p (car rest))))
(when constraint
(error "~a er ikke en velformet føring" constraint)))
(test-features (cadr rest)))
((or (eq first '@w)
(stringp first))
(destructuring-bind (rule-type target . constraints) rest
(unless (operator-to-rule-class *cg* rule-type)
(error "~a er ukjent regeltype" rule-type))
(test-features (if (stringp (car target))
(cdr target)
target))
(let (( constraint (find-if-not #'valid-constraint-p constraints)))
(when constraint
(error "~a er ikke velformet føring" constraint)))))
(t
(error "Kan ikke analysere første del av regelen: ~a" rule))))))
#+test
(let ((*cg* *nbo-cg*)
(*tagger* *nbo-tagger*))
(check-rule-consistency '("fast" =! (adj fl)
(*-1 setn-gr R+1)
(NOT R+1 prep/adv)
(NOT 0 farlige-adj)
(*-1 %seg% *R)
(NOT *R ikke-adv)
(*-1 %seg% L-1)
(L-1C %fast%-verb *L)
(*L nom *R)
(LR0 nom-fl)
(NOT -1 det)
(NOT 1 subst/adj))))
(defmethod %update-rules ((cg constraint-grammar) query-alist)
(let ((rules-to-delete ())
(rules-to-add ())
(changed-p nil))
;;(print query-alist)
(with-slots (rule-array group-comments change-date) cg
(dolist (rule-def query-alist)
(destructuring-bind (key . value) rule-def
(multiple-value-bind (rule-id type new-rule-p group-rule-id)
(%parse-query-response-key (language cg) key)
(when rule-id
;;(print (list rule-id type new-rule-p group-rule-id))
(let ((rule (unless new-rule-p (aref rule-array rule-id))))
(when (eq type :rule)
(cond ((string-equal value "deleted")
(unless new-rule-p
(push rule rules-to-delete)))
(t
(let* ((*package* (find-package :cgp))
(definition (read-from-string (utf-8-decode value)
#+ignore(convert-string value :win :mac)))
(*cg* cg)
(*tagger* (multi-tagger cg)))
(check-rule-consistency definition)
(setf rules-to-add ;; ensures order
(append rules-to-add (list (if new-rule-p rule-id rule) nil)))
(cond (new-rule-p
(push definition (getf rules-to-add rule-id))
(push :definition (getf rules-to-add rule-id))
(when group-rule-id
(push group-rule-id (getf rules-to-add rule-id))
(push :insert-before (getf rules-to-add rule-id))))
(t
(push rule rules-to-delete)
(push definition (getf rules-to-add rule))
(push :definition (getf rules-to-add rule))
;; makes sure that the modified rule is going be at the same
;; place as the original one if possible
(push (rule-id rule) (getf rules-to-add rule))
(push :insert-before (getf rules-to-add rule)))))))))))))
;; comments
(dolist (rule-def query-alist)
(destructuring-bind (key . value) rule-def
(multiple-value-bind (rule-id type new-rule-p #+ignore group-rule-id)
(%parse-query-response-key (language cg) key)
(case type
(:comment
(when rule-id
(let ((rule (aref rule-array rule-id)))
(cond (new-rule-p
(push #+old(convert-string value :win :mac)
(utf-8-decode value)
(getf rules-to-add rule-id))
(push :comment (getf rules-to-add rule-id)))
(t
(push rule rules-to-delete)
(push #+old(convert-string value :win :mac)
(utf-8-decode value)
(getf rules-to-add rule))
(push :comment (getf rules-to-add rule))
;; makes sure that the modified rule is going be at the same place
;; as the original one if possible
(push (rule-id rule) (getf rules-to-add rule))
(push :insert-before (getf rules-to-add rule)))))))
(:group-comment
(when rule-id
(setf (car (aref group-comments rule-id))
(utf-8-decode value)
#+old(convert-string value :windows $encoding)
changed-p t)))))))
(print "rules-to-add:") (print rules-to-add)
(print "rules-to-delete:") (print rules-to-delete)
(let ((already-stored-rules
(loop for (rule definition) on rules-to-add by #'cddr
collect (apply #'add-rule cg (unless (integerp rule) rule) definition))))
(dolist (rule rules-to-delete)
(when (and rule (not (find rule already-stored-rules)))
(delete-rule cg rule :delete-from-array-p (not (find rule rules-to-add))))))
(when (or rules-to-delete rules-to-add changed-p)
(setf change-date (get-universal-time)))))
(with-slots (%feature-codes) cg
(clrhash %feature-codes)))
(defmethod rule-definition ((rule rule))
(with-slots (domain type target constraints) rule
(list* (or domain '@w) type
;; in syntactic rules target is an atom; in md-rules, a list of a list of features.
(if (consp target)
(car target)
target)
constraints)))
(defmethod rule-definition ((rule mapping-rule))
(with-slots (target constraints labels) rule
(list target constraints labels)))
;(inspect (aref (rule-array (gethash "nbo-test" *cg-table*)) 2305))
;(rule-definition (aref (rule-array (gethash "nbo-test" *cg-table*)) 244))
;(rule-definition (aref (rule-array (gethash "nbo-test" *cg-table*)) 2305))
(defmethod add-rule ((cg constraint-grammar) rule-previous-version &key definition comment insert-before)
;(print (list definition comment insert-before))
(when (null definition)
;; get the definition from the previous version
(setf definition (rule-definition rule-previous-version)))
(when (and (null comment) rule-previous-version)
;; get the definition from the previous version
(setf comment (rule-comment rule-previous-version)))
(let ((group-comment (when insert-before (aref (rule-group-comments cg) insert-before))))
(define-rule cg definition (when rule-previous-version (rule-id rule-previous-version)) ; nil
;; :rule-class (when rule-previous-version (class-of rule-previous-version))
:previous-version rule-previous-version :comment comment ; :group-comment group-comment
:insert-before insert-before)))
(defmethod delete-rule ((cg constraint-grammar) (rule disambiguation-rule) &key (delete-from-array-p t))
(print delete-from-array-p)
(with-slots (target language type domain boundary-mode heuristic-level id) rule
(let* ((type-table (if (stringp domain)
(gethash domain (domain-rules cg))
(rules cg)))
(rule-table (gethash type type-table)))
(Print (list rule-table domain))
(labels ((base-form-error? (feature features) ; (9.1.)
(when (stringp feature)
(error "The first feature of a target may not be a base form: ~s"
features))))
(with-slots (rule-array group-comments) cg
;; delete old rule and store rule in rule array
(when delete-from-array-p
(setf (aref rule-array id) nil))
;; update group-comments list for rule
(when (and delete-from-array-p (aref group-comments id))
(setf (cdr (aref group-comments id))
(delete (rule-id rule) (cdr (aref group-comments id))))
;; delete old rule from group comments
(setf (aref group-comments id) nil))
;; delete rule
(dolist (features target)
(cond ((consp features) ; a feature set
(setf (gethash (car features) rule-table)
(delete (cons (cdr features) rule)
(gethash (car features) rule-table)
:test #'stored-rules-equal-p))
(when (null (gethash (car features) rule-table))
(remhash (car features) rule-table)))
(t
(setf (gethash features rule-table)
(delete (cons nil rule)
(gethash features rule-table)
:test #'stored-rules-equal-p))
(when (null (gethash features rule-table))
(remhash features rule-table)))))
rule)))))
(defmethod delete-rule ((cg constraint-grammar) (rule mapping-rule) &key (delete-from-array-p t))
(with-slots (rule-array group-comments) cg
(with-slots (type target id) rule
(let ((rule-table (morphosyntactic-mappings cg)))
(when delete-from-array-p
(setf (aref rule-array id) nil))
;; delete rule
(setf (gethash (car target) rule-table)
(delete (cons (cdr target) rule)
(gethash (car target) rule-table)
:test #+ignore #'equal #-ignore #'stored-rules-equal-p
:count 1))
;; update group-comments list for rule
(when (and delete-from-array-p (aref group-comments id))
(setf (cdr (aref group-comments id))
(delete (rule-id rule) (cdr (aref group-comments id))))
;; delete old rule from group comments
(setf (aref group-comments id) nil))
(when (null (gethash (car target) rule-table))
(remhash (car target) rule-table))
rule))))
(defmethod delete-rule ((cg constraint-grammar) (rule named-entity-mapping-rule) &key (delete-from-array-p t))
(with-slots (rule-array group-comments) cg
(with-slots (type target id) rule
(let ((rule-table (named-entity-mappings cg)))
(when delete-from-array-p
(setf (aref rule-array id) nil))
;; delete rule
(setf (gethash (car target) rule-table)
(delete (cons (cdr target) rule)
(gethash (car target) rule-table)
:test #+ignore #'equal #-ignore #'stored-rules-equal-p
:count 1))
;; update group-comments list for rule
(when (and delete-from-array-p (aref group-comments id))
(setf (cdr (aref group-comments id))
(delete (rule-id rule) (cdr (aref group-comments id))))
;; delete old rule from group comments
(setf (aref group-comments id) nil))
(when (null (gethash (car target) rule-table))
(remhash (car target) rule-table))
rule))))
(defmethod delete-rule ((cg constraint-grammar) (rule syntactic-disambiguation-rule) &key (delete-from-array-p t))
(with-slots (rule-array group-comments) cg
(with-slots (type target domain id) rule
(let* ((type-table (if (stringp domain) (domain-rules cg) (rules cg)))
(rule-table (gethash type type-table)))
(when rule-table
(remove-rule-from-tree cg rule (gethash target rule-table))
(when delete-from-array-p
(setf (aref rule-array id) nil))
;; update group-comments list for rule
(when (and delete-from-array-p (aref group-comments id))
(setf (cdr (aref group-comments id))
(delete (rule-id rule) (cdr (aref group-comments id))))
;; delete old rule from group comments
(setf (aref group-comments id) nil)))))))
(defun %parse-declaration-query-response-key (key)
(let ((str #-cl-http key #+cl-http(symbol-name key)))
(cond ((and (> (length str) 27)
(string-equal str "hidden_value_declarationset" :end1 27))
(values (parse-integer str :start 27) :set))
((and (> (length str) 28)
(string-equal str "hidden_value_declarationname" :end1 28))
(values (parse-integer str :start 28) :name))
((and (> (length str) 14)
(string-equal str "declarationset" :end1 14))
(values (parse-integer str :start 14) :set))
((and (> (length str) 15)
(string-equal str "declarationname" :end1 15))
(values (parse-integer str :start 15) :name)))))
#+test
(let ((*tagger* *nbo-tagger*))
(print (feature-code 'adv)))
#+test
(maphash (lambda (key val)
(print (list key val)))
(feature-table *nbo-tagger*))
(defmethod %encode-definition ((cg constraint-grammar) definition)
(let ((*tagger* (multi-tagger cg)))
(cond ((listp definition)
(remove-if #'null (mapcar (lambda (def) (%encode-definition cg def)) definition)))
((stringp definition)
definition)
(t ;; *** should depend on language!
(or (feature-code definition)
(error "The feature \"~a\" is not defined." definition))))))
(defmethod %update-set-declarations ((cg constraint-grammar) dec-count query-alist)
(with-slots (set-declarations encoded-set-declarations change-date) cg
(let ((declarations ())
(changed-p nil)
(*package* (find-package :cgp)))
(dolist (declaration query-alist)
(multiple-value-bind (id type) (%parse-declaration-query-response-key (car declaration))
(print (list :id id :type type))
(when id
(push
(unless (string= (cdr declaration) "")
(read-from-string (utf-8-decode (subst-substrings (cdr declaration)
'("<" "<" ">" ">" "&" "&")))))
(getf declarations id))
(print (list :value (subst-substrings (cdr declaration) '("<" "<" ">" ">"))))
(push type (getf declarations id)))))
(print declarations)
(loop for (id value) on declarations by #'cddr
do
(destructuring-bind (&key name set) value
(let ((sorted-set (when set (sort-definition (definition-remove-stars set)))))
(if (<= id dec-count) ; update an old declaration
(cond (set
(setf changed-p t)
(setf (gethash name encoded-set-declarations) (%encode-definition cg sorted-set)
(gethash name set-declarations) sorted-set))
(t
(setf changed-p t)
(remhash name set-declarations)
(remhash name encoded-set-declarations)))
(when (and name set)
(setf changed-p t)
(setf (gethash name encoded-set-declarations) (%encode-definition cg sorted-set)
(gethash name set-declarations) sorted-set))))))
(when changed-p
(setf change-date (get-universal-time))))))
;;; EOF