;; -*- Mode: lisp; Syntax: ansi-common-lisp; Package: cgp; Base: 10; Readtable: augmented-readtable -*-
;;
;; Copyright (C) Paul Meurer 2001 - 2005. All rights reserved.
;; paul.meurer@aksis.uib.no
;; Aksis, University of Bergen
;;
;; Web-interface to the Common Lisp-reimplementation of the CG parser/tagger system
;; (Oslo-tagger) developed at UiO (Dokumentasjonsprosjektet; Tekstlaboratoriet)
;;
;;-------------------------------------------------------------------------------------
;; TO DO: make fancier interface
;;-------------------------------------------------------------------------------------
;; PROBLEMS:
;;
;; Does not work in IE 5.00.2920.0000 (problems with persistent connections)
;;
;;-------------------------------------------------------------------------------------
;; The Perl script cgp.pl accesses the URL "/cl/cgp/oslo-bergen-tagger.html"
;; which calls tag-text-form()
(in-package :cgp)
#+test
(cl-user::run-shell-command
(concat "rsync --archive --rsh=ssh "
"paul@decentius.hit.uib.no:/home/paul/lisp/projects/cgp/rules/ "
"/home/paul/lisp/projects/cgp/rules/"))
(defconstant $max-idle-time (* 60 10))
(defmethod grab-lock ((cg constraint-grammar) (obj t))
nil)
(eval-when (:compile-toplevel :load-toplevel :execute)
(setf lxml::*encoding* :utf-8 ;;:entities
lxml::*newline-after-endtag-p* '(p td tr table title script head
|p| |td| |tr| |table| |title| |script| |head|)))
(defun cg-name-list ()
(collecting (maphash (lambda (name cg)
(declare (ignore cg))
(collect name))
*cg-table*)))
#+allegro-xx
(defmethod request-remote-host ((request http-request))
(socket::remote-host (request-socket request)))
(defmethod grab-lock ((cg constraint-grammar) (request http-request))
(let ((lock (cg-edit-lock cg))
(now (get-universal-time)))
(with-slots (user host last-access) lock
(cond ((null user)
(setf user (get-basic-authorization request)
host (request-remote-host request)
last-access now)
lock)
((and (equal user (get-basic-authorization request))
(equal host (request-remote-host request)))
(setf last-access now)
lock)
((< (+ last-access $max-idle-time) now)
(setf user (get-basic-authorization request)
host (request-remote-host request)
last-access now))
(t
nil)))))
(defmethod release-lock ((cg constraint-grammar) (request http-request))
(let ((lock (cg-edit-lock cg)))
(with-slots (user host last-access) lock
(when (or (null user)
(and (eq user (get-basic-authorization request))
(equal host (request-remote-host request))))
(setf user nil host nil last-access nil)
lock))))
(defun write-analyser-page-doc (stream)
#m((font :face "Arial, Helvetica, sans-serif")
((h2 :align "center")
"Oslo-Bergen-taggeren (for bokmål og nynorsk)")
((p :align "center")
((font :face "Arial, Helvetica, sans-serif")
"Et samarbeidsprosjekt mellom UiO og UiB."))
(p
((font :face "Arial, Helvetica, sans-serif")
"Taggeren består av en preprosessor, en multitagger og en Constraint Grammar-modul for morfologisk og syntaktisk disambiguering."))
(p ((span :style "font-weight: bold") "Preprosessor. ")
"Finner blant annet setningsgrenser. (Utviklet av Taggerprosjektet, ved Dokumentasjonsprosjektet og Tekstlaboratoriet, begge UiO. Nyprogrammert ved Aksis, UiB.)")
(p ((span :style "font-weight: bold") "Multitagger. ")
"Setter grammatiske tagger på ord. Basert på")
(ul (li (b "Norsk ordbank") ", som er satt sammen av:"
(ul (li "ordlister og lister over bøyningsmønstre for bokmål og nynorsk laget ved IBM Norge A/S")
(li "materiale fra Bokmålsordboka og Nynorskordboka laget ved Seksjon for leksikografi, INL, UiO.")
(li "opplysninger om hvilke argumentstruktur verb kan ta. Opplysningene er systematisert og samlet av NorKompLeks ved Universitetet i Trondheim."))
"Materialet er tilpasset og videreutviklet av Taggerprosjektet (ved Dokumentasjonsprosjektet og Tekstlaboratoriet (UiO)).")
(li "En "(b "sammensetningsanalysator") " utviklet ved Tekstlaboratoriet, UiO."))
(p "Multitaggeren er nyprogrammert ved Aksis, UiB, men ble opprinnelig laget av Dokumentasjonsprosjektet. ")
(p ((span :style "font-weight: bold") "Disambigueringsdel. ")
"Fjerner overflødige tagger v.h.a. morfologiske og syntaktiske Constraint Grammar- (føringsbaserte) regler. Utviklet ved Taggerprosjektet, ved Tekstlaboratoriet, UiO. Regeltolker utviklet ved Aksis, UiB.")
(p "Kontakt:"
(br/)
((a :href "http://www.hf.uio.no/tekstlab/index.html") "Tekstlaboratoriet") ", UiO: "
"Janne Bondi Johannessen."
(br/)
((a :href "http://www.aksis.uib.no") "Aksis") ", UiB: "
((a :href "http://www.aksis.uib.no/people/12") "Paul Meurer")
(br/)
((a :href "http://www.dokpro.uio.no/index.html") "Dokumentasjonsprosjektet") ", UiO."
(br/) "Seksjon for leksikografi, UiO.")
(hr/)
(p "Her kan du teste taggeren:")
))
#+copy
(defmethod request-uri-string ((request http-request))
(with-output-to-string (stream)
(net.uri:render-uri (request-raw-uri request) stream)))
(defun ensure-string (object)
(typecase object
(string object)
(symbol (string-downcase (string object)))
(t (with-output-to-string (stream) (write object :stream stream)))))
;; should be the same as the function defined in aserve/utilties.lisp, but this
;; one uses entity encoding, whearas HTML-SELECT uses utf-8. Should be fixed.
#+obsolete
(defun %html-select (stream &key name options default onchange)
(if onchange
#m((select :name #L name :onchange #L onchange)
#L(dolist (option options)
(if (consp option)
(let ((value (ensure-string (car option))))
(if (string-equal value (string default))
#m((option :value #L value :selected "true")
#L(write-string (cdr option) stream))
#m((option :value #L value)
#L(write-string (cdr option) stream))))
(let ((value (ensure-string option)))
(if (string-equal value (ensure-string default))
#m((option :value #L value :selected "true")
#L(write-string value stream))
#m((option :value #L value)
#L(write-string value stream)))))))
#m((select :name #L name)
#L(dolist (option options)
(if (consp option)
(let ((value (ensure-string (car option))))
(if (string-equal value (string default))
#m((option :value #L value :selected "true")
#L(write-string (cdr option) stream))
#m((option :value #L value)
#L(write-string (cdr option) stream))))
(let ((value (ensure-string option)))
(if (string-equal value (ensure-string default))
#m((option :value #L value :selected "true")
#L(write-string value stream))
#m((option :value #L value)
#L(write-string value stream)))))))))
(defmethod analyser-page-body ((request http-request) stream
&key (text "") (language :nbo)
(version "nbo") (compare-version "nbo")
compare-p eagles-p expand-tokens-p (print-rules-p t)
print-scarrie-styles-p
(tagging-niveau :morphological-disambiguation)
(mode :test) (file "") (encoding :macintosh))
(when (eq mode :edit)
#m(link/ :rel "stylesheet" :type "text/css" :href "/cl/cgp/styles/analyze-text.css"))
(when (and (eq mode :test) (equal text ""))
(write-analyser-page-doc stream))
(if *www-interface-disabled-p*
#m(p (b "Tagger-siden er midlertidig tatt ut av drift, men er snart oppe igjen."))
#m((form :method "post"
:name "form"
;;:enctype "multipart/form-data"
:action #L(request-uri-string request))
(p ((font :face "Verdana, Arial, Helvetica, Geneva, sans-serif;")
#L(write-string (if (eq mode :test) "Analyser tekst (maks 500 tegn):" "Analyser tekst:")
stream)))
((textarea :name "text" :rows "6" :cols "80"
:onkeydown "if (window.event.keyCode == 13) { window.event.keyCode = false; window.event.cancelBubble = false; form.submit() } ")
#L(write-string text stream))
(br/)
((table :id "tblChoicesTable")
#L(when (eq mode :edit)
#+ignore-yet
#m(tr ((td :align "right" :class "clsChoicesLabel") "Analyser fil ")
(td (accept-input 'file "FILE" :default file :size 54)))
#+ignore-yet
#m(tr ((td :align "right" :class "clsChoicesLabel") "Filformat ")
(td #L(html-select stream :name "encoding"
:options '("Windows" "Macintosh" "Unix")
:default encoding)))
#m(tr ((td :align "right" :class "clsChoicesLabel") "CG-versjon ")
(td #L(html-select stream :name "version"
:options (cg-name-list)
:default version)))
#m(tr ((td :align "right" :class "clsChoicesLabel")
((input :type "checkbox" :name "compare-p" :checked #L(when compare-p "on"))
"Sammenlign med "))
(td
#L(html-select stream
:name "compare-version"
:options (cg-name-list)
:default compare-version))))
#L(when (eq mode :test)
#m(tr ((td :align "right" :class "clsChoicesLabel") "Språk ")
(td #L(html-select stream
:name "language"
:options '(("nbo" . "Bokmål")
("nny" . "Nynorsk"))
:default language))))
(tr ((td :align "right" :class "clsChoicesLabel") "Nivå ")
(td
#L(html-select
stream
:name "tagging-niveau"
:options
(if (eq mode :edit)
`(("multi-tagging" . "Multitagging")
("morphological-disambiguation" . "Morfologisk disambiguering")
("syntactic-mapping" . "Syntaktisk mapping")
("syntactic-disambiguation" . "Syntaktisk disambiguering")
("named-entity-mapping" . "Morfologisk disambiguering og navnemapping")
("named-entity-disambiguation" . "Morfologisk og navnedisambiguering")
("syntactic-named-entity-mapping"
. "Syntaktisk disambiguering og navnemapping")
("syntactic-named-entity-disambiguation"
. "Syntaktisk og navnedisambiguering"))
`(("multi-tagging" . "Multitagging")
("morphological-disambiguation" . "Morfologisk disambiguering")
("syntactic-disambiguation" . "Syntaktisk disambiguering")
("syntactic-named-entity-disambiguation" . "Syntaktisk og navnedisambiguering")))
:default tagging-niveau))))
(p (input/ :type "submit" :name "analyse" :value "Analyser")
" "
((input :type "checkbox" :name "eagles-p" :checked #L(when eagles-p "t"))
" bruk Eagles-tagsett ")
((input :type "checkbox" :name "expand-tokens-p" :checked #L(when expand-tokens-p "t"))
" ekspander flerordsnavn ")
((input :type "checkbox" :name "print-rules-p" :checked #L(when print-rules-p "t"))
" vis brukte regler ")
((input :type "checkbox" :name "print-scarrie-styles-p"
:checked #L(when print-scarrie-styles-p "t"))
" vis "
((a :href "http://www.ling.uib.no/~~desmedt/scarrie/lexdb.html") "Scarrie")
"-stilnivåer"))
(hr/))))
(defun analyser-page-documentation-links (stream &key show-documentation-p)
(when show-documentation-p
#m(p (b (a :href "/cl/cgp/source/") "Kildekode og dokumentasjon")
" (for autoriserte brukere)" ))
#m (hr/)
#+cl-http
(p "Siden er generert av "
(ns4.0:note-anchor
"CL-HTTP"
:reference "http://www.ai.mit.edu/projects/iiip/doc/cl-http/home-page.html")))
#+ignore
(defmethod compute-cg-description-form ((request http-request) stream)
(destructuring-bind (version) url:search-keys
(compute-cg-description-form "/cl/cgp/constraint-grammars.html"
stream `((version ,version)))))
#+test
(let ((stream *standard-output*))
(js/edit-rule-popup-menu stream))
(defmethod cg-description-form ((request http-request) ent)
(with-http-response (request ent)
(with-http-body (request ent)
(let ((stream (request-reply-stream request)))
(bind-query-values
(version submit-form cg-clone-name cg-locked-p
backup-cg default-cg comment delete-cg documentation)
request nil t t t
;;(print version)
(unless version (setf version "nbo"))
#m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"")
#m(html
(head (title #L(format stream "Description of the Constraint Grammar \"~a\"" version))
(meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8")
((script :language "JavaScript") #L(js/edit-cg stream))
#+ignore
(ns4.0::write-script (ns4.0:intern-script :edit-cg :java-script) stream))
(link/ :rel "stylesheet" :type "text/css"
:href "/cl/cgp/styles/analyze-text.css") ;; change stylesheet!
(link/ :rel "stylesheet" :type "text/css"
:href "/cl/cgp/styles/edit-rules.css")
#L(if *www-interface-disabled-p*
#m(p (b "Siden er midlertidig tatt ut av drift, men er snart oppe igjen."))
(handler-case
(let ((cg (gethash version *cg-table*)))
(when (null cg) (error "The CG ~a seems to be deleted." version))
;; actions
(when submit-form
(when cg-clone-name
(when (gethash cg-clone-name *cg-table*)
(error "A Constraint Grammar named ~s exists already. Please choose another name"
cg-clone-name))
(make-named-cg-copy cg cg-clone-name))
(when (and (or delete-cg documentation (not (equal (cg-locked-p cg) cg-locked-p)))
(not (grab-lock cg request)))
(error "The CG \"~a\" is in use by user ~a@~a and can't be updated."
version (user-name request) (request-remote-host request)))
(setf (cg-locked-p cg) cg-locked-p)
(when backup-cg
(let ((now (get-universal-time)))
(write-cg-to-file
cg
:path (format nil "projects:cgp;rules;~a~s.lisp" version now)
:temp-path (format nil "projects:cgp;rules;~a~s.temp" version now)
:version-comment comment)))
(when default-cg
(let ((name (string-downcase (language cg)))
(now (get-universal-time)))
(write-cg-to-file
cg
:default-p t
:path (format nil "projects:cgp;rules;~a.lisp" name)
:temp-path (format nil "projects:cgp;rules;~a~s.temp" name now)
:version-comment "Default-versjon")
(load (concat "projects:cgp;rules;" name ".lisp"))))
(when delete-cg
(if cg-locked-p
(error "The CG \"~a\" is locked and can't be deleted." version)
(remhash version *cg-table*)))
(when documentation
(if cg-locked-p
(error "The CG \"~a\" is locked and can't be edited." version)
(setf (cg-documentation cg)
documentation
#+obsolete
(convert-string documentation :windows $encoding)
(change-date cg) (get-universal-time)))))
#m((form :method "post" :action "/cl/cgp/constraint-grammars.html")
((table :id "tblChoicesTable")
(tr ((td :align "right" :class "clsChoicesLabel") "CG-versjon: ")
(td
#L(html-select stream
:name "version"
:options (cg-name-list)
:onchange "ChangeCG()"
:default version)
#+ignore
((select :name "VERSION" :size "1" :onchange "ChangeCG()")
#L(maphash (lambda (name cg)
(declare (ignore cg))
(if (equal name version)
#m((option :selected nil) #L(write-string name stream))
#m(option #L(write-string name stream))))
*cg-table*))))
#+ignore
(td ((div :id "oDownload" :style "behavior:url(#default#download)")
((a :href #L(concat
"javascript:oDownload.startDownload('/cl/cgp/download-cg?version="
version "', onDownloadDone)"))
"Last ned CG som fil"))))
(br/) (hr/)
((table :bgcolor "#ffffef" :id "tblChoicesTable")
(tr ((td :align "right" :class "clsChoicesLabel") "Opprettet ")
(td #L(u::format-universal-time (creation-date cg) stream :timestamp :nbo)))
(tr ((td :align "right" :class "clsChoicesLabel") "Siste endring ")
(td #L(u::format-universal-time (change-date cg) stream :timestamp :nbo)))
#L(when (parent-cg cg)
#m(tr ((td :align "right" :class "clsChoicesLabel") "Klonet fra ")
(td
#L(let ((name (parent-cg cg)))
#m((a :href #L(concat "/cl/cgp/constraint-grammar?" name))
#L(write-string name stream))))))
(tr ((td :align "right" :class "clsChoicesLabel") "Skrivebeskyttet ")
(td #L(if (cg-locked-p cg)
#m(input/ :type "checkbox" :name "CG-LOCKED-P" :checked "true")
#m(input/ :type "checkbox" :name "CG-LOCKED-P"))))
(tr ((td :align "right")
((a :href #L(concat "/cl/cgp/download-cg?version=" version))
"Last ned CG som fil"))
(td "(OBS: høyreklikk på lenken)"))
(tr ((td :align "right" :class "clsChoicesLabel") "Dupliser CG som: ")
(td (input/ :type "text" :name "cg-clone-name" :size "20")))
(tr ((td :align "right" :class "clsChoicesLabel") "Slett fra minnet ")
(td (input/ :type "checkbox" :name "DELETE-CG")))
(tr ((td :align "right" :class "clsChoicesLabel")
"Lag sikkerhetskopi ")
(td (input/ :type "checkbox" :name "BACKUP-CG")
" Kommentar: "
(input/ :type "text" :name "comment" :size "30")
#+ignore
(accept-input 'string "COMMENT" :size "30")))
(tr ((td :align "right" :class "clsChoicesLabel")
"Lagre som default ")
(td (input/ :type "checkbox" :name "DEFAULT-CG")))
(tr ((td :align "right" :valign "top" :class "clsChoicesLabel")
"Dokumentasjon ")
(td ((font :face "arial, helvetica, geneva, sans-serif" :size "3")
((div :id "cgDocumentation" :onclick "EditDocumentation()")
#L(write-line (convert-string (cg-documentation cg)
$encoding :sgml '(#\newline "
"))
stream)))))
(tr (td (input/ :type "submit" :name "SUBMIT-FORM"
:value "Oppdater")))))
#m (hr/)
#m((a :href "/cl/cgp/site-map.html" :target "help-page") "Til hovedsiden"))
#-ignore
(error (cond)
#m(p #S(format nil "Error: ~a" cond)))))))))))
(defmethod download-cg ((request http-request) ent)
(bind-query-values
(version) request nil t t t
(with-http-response (request ent)
(with-http-body (request ent :headers `((content-disposition . ,(concat "attachment; filename="
version ".lisp"))))
(let ((stream (request-reply-stream request)))
(let ((cg (gethash version *cg-table*)))
(when cg
(let ((*print-pretty* nil))
(write-cg-to-stream cg :stream stream ; :encoding :windows
)))))))))
(defmethod cg-versions-form ((request http-request) ent)
(bind-query-values
(submit) request nil t t t
(with-http-response (request ent)
(with-http-body (request ent)
(let ((stream (request-reply-stream request))
(query-alist (request-query request)))
#m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"")
#m(html
(head (title "Liste over lagrete versjoner")
(meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8"))
(body
(link/ :rel "stylesheet" :type "text/css"
:href "/cl/cgp/styles/analyze-text.css") ;; change stylesheet!
(link/ :rel "stylesheet" :type "text/css"
:href "/cl/cgp/styles/edit-rules.css")
#L(if *www-interface-disabled-p*
#m(p (b "Siden er midlertidig tatt ut av drift, men er muligens snart oppe igjen."))
(progn ;;handler-case
#m((font :face "Arial, Helvetica, sans-serif" :size "3")
(h3 "Gå tilbake til en lagret CG-versjon")
#L(when submit
(loop for (filename . value) in query-alist
when (string-equal value "on")
do (load (concat "projects:cgp;rules;" filename ".lisp"))))
((form :method "post" :action #L(request-uri-string request))
(br/)
(input/ :type "submit" :name "submit" :value "Importer valgte")
(hr/)
((table :border "0" :cellpadding "2" :bgcolor "#ffffef" :id "tblChoicesTable")
((tr :bgcolor "#fafae8")
(td "Velg for import")
((td :align "center") "Navn")
((td :align "center") "Lagringsdato")
((td :align "center") "Kommentar"))
#L(dolist (cg-version (stored-cg-versions))
#m(tr ;; importp / name / store-date / comment
#L(destructuring-bind (file-name cg-name store-date comment) cg-version
#m((td :align "center")
(input/ :type "checkbox" :name #L file-name))
#m((td :align "right" :class "clsChoicesLabel")
#L(write-string cg-name stream))
#m(td #L(u::format-universal-time store-date stream :timestamp :nbo))
#m(td #L(when comment (write-string comment stream)))))))
)
(hr/)
((a :href "/cl/cgp/site-map.html" :target "help-page") "Til hovedsiden"))
#+ignore(error (cond)
#m(p #S(format nil "Error: ~a" cond))))))))))))
(defmethod compute-analyser-form ((request http-request) ent)
(%compute-analyser-form request ent :print-rules :test))
(defmethod compute-edit-analyser-form ((request http-request) ent)
(%compute-analyser-form request ent :print-rules :edit))
#+test
(defmethod tag-text-form ((request http-request) ent)
(bind-query-values
(text tagging-niveau language version) request nil t t t
(with-http-response (request ent)
(with-http-body (request ent)
(let ((stream (request-reply-stream request)))
(unwind-protect
(let* ((tagging-niveau (intern (string-upcase (or (print tagging-niveau)
"morphological-disambiguation"))
:keyword))
(language (intern (string-upcase (or language "NBO")) :keyword))
(cg (if version
(gethash version *cg-table*)
(ecase language
(:nbo *nbo-cg*)
(:nny *nny-cg*)))))
(when (consp text)
(labels ((concat-with-nl (strings)
(if (cdr strings)
(concat (car strings)
#.(format nil "~%")
(concat-with-nl (cdr strings)))
(car strings))))
(setf text (concat-with-nl text))))
(when text
(handler-case
(disambiguate-from-string
text #+test(convert-string text :win :mac) ; *** ??
:stream stream
:cg cg
:print-function
(lambda (sentence &key stream platform)
(print-sentence sentence
:stream stream
:platform platform
:print-rules nil))
:tagging-niveau (or tagging-niveau :morphological-disambiguation))
(error (cond)
(format stream "~a" (format nil "Error: ~a" cond))))))))))))
(defclass multipart-buffer ()
((request :initform nil :initarg :request)
(buffer :initform (make-array 4096 :element-type 'character))
(index :initform 0 :accessor buffer-index)
(size :initform 0 :accessor buffer-size)))
(defclass prefilled-multipart-buffer (multipart-buffer)
((lines :initform :empty :accessor multipart-lines)))
;; debug
(defparameter *request* nil)
(defmethod read-next-line ((stream multipart-buffer))
(let ((line :eof))
(with-slots (request buffer index size) stream
(setf *request* request)
(labels ((read-chunk ()
(cond ((null size)
:eof)
((= size index)
(setf size (get-multipart-sequence request buffer)
index 0)
(if size
(read-chunk)
line))
(t
(when (eq line :eof) (setf line ""))
(let* ((nl-pos (position-if (lambda (c)
(member c '(#\Newline #\Return)))
buffer :start index :end size)
#+ignore(position #\Return buffer :start index :end size))
(pos (or nl-pos size)))
(setf line (concat line
(subseq buffer index
(if (and (> pos index)
(char= (char buffer (1- pos))
#\Return))
(1- pos)
pos)))
index (if nl-pos (1+ nl-pos) size))
(if nl-pos
line
(read-chunk)))))))
(read-chunk)))))
(defmethod read-next-line ((stream prefilled-multipart-buffer))
(when (eq (multipart-lines stream) :empty)
(setf (multipart-lines stream)
(loop for line = (call-next-method)
;; do (print line)
collect line ;; (if (eq line :eof) :eof "disabled") ;; line
until (eq line :eof)))
(format t "~d lines read.~%" (length (multipart-lines stream))))
(pop (multipart-lines stream)))
#+test
(net.aserve::debug-on :info)
#+test
(net.aserve::debug-on :notrap)
#+test
(net.aserve::debug-off :notrap)
#+test
(with-open-file (stream "~/test.xml" :direction :output :if-exists :supersede)
#m(text (s "Dette er en setning.")
(s "Dette er en annen.")))
(defmethod tag-text-form ((request http-request) ent)
(bind-query-values
(lang mode in-format out-format version show-rules print-lc-features in-file) request nil t t t
#+debug(print (list lang mode in-format out-format show-rules in-file))
(let* ((mode (intern (string-upcase mode) :keyword))
(print-lc-features (or print-lc-features (find mode '(:sndl :neol))))
(tagging-niveau
(ecase mode
(:mt :multi-tagging)
(:md :morphological-disambiguation)
(:sm :syntactic-mapping)
(:sd :syntactic-disambiguation)
(:sdr :syntactic-disambiguation-regexp)
((:neo :neol) :named-entity-recognition-only)
(:mnd :named-entity-disambiguation)
(:snm :syntactic-named-entity-mapping)
(:sdnm :syntactic-disambiguation-named-entity-mapping)
((:snd :sndl) :syntactic-named-entity-disambiguation)))
(in-format (intern (string-upcase in-format) :keyword))
(out-format (intern (string-upcase out-format) :keyword))
(tokenizer-class
(ecase in-format
(:text (if (find mode '(:neo :neol)) 'pretagged-tokenizer 'tokenizer))
(:xml 'xml-tokenizer) ;; does not work yet
(:cwb 'pretokenized-tokenizer))) ;; change to cwb-tokenizer!
)
(cond ((eq out-format :oslo)
(let* ((language (intern (string-upcase (or lang "NBO")) :keyword))
(cg (if version
(gethash version *cg-table*)
(newest-cg language)))
(*cg* cg))
(when (null cg)
(error "Grammar ~a not loaded." (or version language)))
(progn ;;handler-case
(labels ((disambiguate (in-stream stream)
(disambiguate-stream
tokenizer-class
in-stream
:cg cg
:context-size
(when (find tagging-niveau
'(:named-entity-disambiguation
:named-entity-recognition-only
:syntactic-named-entity-mapping
:syntactic-disambiguation-named-entity-mapping
:syntactic-named-entity-disambiguation
:syntactic-disambiguation-regexp))
*context-size*)
:print-function
(lambda (sentence &key platform &allow-other-keys)
(let ((*package* (find-package :cgp)))
#+debug
(print-sentence
sentence
:stream *standard-output*
:expand-tokens-p (eq out-format :cwp)
:platform (or platform $encoding)
:print-rules show-rules)
(print-sentence
sentence
:stream stream
:expand-tokens-p (or (eq out-format :cwb)
(find mode '(:neol :sndl)))
:print-lc-features print-lc-features
:platform (or platform $encoding)
:print-rules show-rules)))
:tagging-niveau
(or tagging-niveau :morphological-disambiguation))))
(with-open-file (in-stream in-file)
(with-open-file (stream "/home/paul/cgp-out.txt"
:direction :output
:if-exists :supersede)
(disambiguate in-stream stream))))
#+debug
(error (cond)
(with-open-file (stream "/home/paul/cgp-out.txt"
:direction :output
:if-exists :supersede)
(format stream "Error: ~a" cond))
(error "~a" cond)
))))
(t
(with-http-response (request ent :timeout (* 60 60 2))
(with-http-body (request ent :headers
`((content-disposition
. ,(concat "attachment; filename=" "dis" ".txt"))))
(get-multipart-header request)
(let ((stream (request-reply-stream request))
(in-stream (make-instance 'prefilled-multipart-buffer :request request)))
(unwind-protect
(let* ((language (intern (string-upcase (or lang "NBO")) :keyword))
(cg (if version
(gethash version *cg-table*)
(newest-cg language))))
(handler-case
(labels ((disambiguate (stream)
(disambiguate-stream
tokenizer-class
in-stream
:cg cg
:context-size
(when (find tagging-niveau
'(:named-entity-disambiguation
:syntactic-named-entity-mapping
:syntactic-disambiguation-named-entity-mapping
:syntactic-named-entity-disambiguation))
*context-size*)
:print-function
(lambda (sentence &key platform &allow-other-keys)
;;(print sentence)
(let ((*package* (find-package :cgp)))
(case out-format
((:text :cwb :oslo)
(print-sentence
sentence
:stream stream
:expand-tokens-p (eq out-format :cwp)
:print-lc-features print-lc-features
:platform (or platform $encoding)
:print-rules show-rules))
(:html
(print-sentence-html
sentence
:stream stream
:print-lc-features print-lc-features
:print-rules show-rules))
(:xml
(print-sentence-xml
sentence
:print-lc-features print-lc-features
:stream stream)))))
:tagging-niveau
(or tagging-niveau :morphological-disambiguation))))
(cond ((and (eq in-format :text) (eq out-format :xml))
#m(?xml :version "1.0" :encoding "utf-8" :standalone "yes")
#m((text :date #L(now :format :date)
:tagging #L (string tagging-niveau))
#L(terpri stream)
#L(disambiguate stream)))
((eq out-format :oslo)
(with-open-file (stream "/home/paul/cgp-out.txt"
:direction :output
:if-exists :supersede)
(disambiguate stream)))
(t
(disambiguate stream))))
(error (cond)
(if (eq out-format :oslo)
(with-open-file (stream "/home/paul/cgp-out.txt"
:direction :output
:if-exists :supersede)
(format stream "Error: ~a" cond))
(format stream "Error: ~a" cond))))))))
(loop while (get-multipart-header request))))))))
(defmethod %compute-analyser-form ((request http-request) ent &key print-rules)
(setf *request* request)
;; *** change key to mode
(with-http-response (request ent)
(with-http-body (request ent)
(let ((stream (request-reply-stream request)))
(bind-query-values ;; *** put this into with-html-response!
(text file remote-file encoding tagging-niveau language
version compare-version compare-p eagles-p expand-tokens-p
print-rules-p print-scarrie-styles-p)
request nil t t t
#-debug
(print (list text file remote-file encoding tagging-niveau language
version compare-version compare-p eagles-p))
#+debug(print (map 'list #'identity (utf-8-decode text)))
(unwind-protect
(let ((tagging-niveau (intern (string-upcase tagging-niveau) :keyword))
(language (intern (string-upcase (or language "NBO")) :keyword)))
(when (consp text)
;; When encoding type is MULTIPART, text with newlines is returned as a list
;; of lines (see "file-uploading.lisp").
;; For the parser, the lines have to be joined again.
(labels ((concat-with-nl (strings)
(if (cdr strings)
(concat (car strings)
#.(format nil "~%")
(concat-with-nl (cdr strings)))
(car strings))))
(setf text (concat-with-nl text))))
#m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"")
#m(html
(head (title "Oslo-Bergen-taggeren")
(meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8"))
((body :onload "document.form.text.focus()")
#L(analyser-page-body request stream :text (or text "")
:tagging-niveau tagging-niveau
:language language :mode print-rules :version version
:compare-version compare-version :compare-p compare-p
:file remote-file :encoding encoding
:eagles-p eagles-p :expand-tokens-p expand-tokens-p
:print-rules-p print-rules-p
:print-scarrie-styles-p print-scarrie-styles-p)
#L(when (or text remote-file)
(progn ;;handler-case
(cond ((and remote-file (not (equal remote-file "")))
#m(pre
(teletype
#L(let ((cg (if version
(gethash version *cg-table*)
(or (ecase language
;; newest ones!
(:nbo (gethash "nbo" *cg-table*))
(:nny (gethash "nny" *cg-table*)))
(newest-cg language)))))
(with-open-file (file-stream file :direction :input)
(disambiguate-stream
*tokenizer*
file-stream ;; stream
:encoding
(when encoding
(intern (string-upcase encoding) :keyword))
:cg cg
:context-size
(when (find
tagging-niveau
'(:named-entity-disambiguation
:syntactic-named-entity-mapping
:syntactic-disambiguation-named-entity-mapping
:syntactic-named-entity-disambiguation))
*context-size*)
:compare-cg
(when compare-p (gethash compare-version *cg-table*))
:print-function
(lambda (sentence &key platform &allow-other-keys)
(when print-scarrie-styles-p
(add-scarrie-styles sentence))
(print-sentence-html sentence
:stream stream
:platform platform
:print-rules print-rules
:expand-tokens-p expand-tokens-p
:eagles-p eagles-p
:print-rules-p print-rules-p))
:tagging-niveau tagging-niveau))))))
((null text)
nil)
((string= text "")
nil)
((and (> (length text) 500) (eq print-rules :test))
nil)
(t
#m(pre
(teletype
#L(let ((cg (if version
(gethash version *cg-table*)
(or (ecase language
;; newest ones!
(:nbo (gethash "nbo" *cg-table*))
(:nny (gethash "nny" *cg-table*)))
(newest-cg language)))))
(disambiguate-from-string
(if remote-file text (utf-8-decode text))
:stream stream
:cg cg
:context-size
(when (find tagging-niveau
'(:named-entity-disambiguation
:syntactic-named-entity-mapping
:syntactic-disambiguation-named-entity-mapping
:syntactic-named-entity-disambiguation))
*context-size*)
:compare-cg (when compare-p
(gethash compare-version *cg-table*))
:print-function
(lambda (sentence &key stream platform &allow-other-keys)
(when print-scarrie-styles-p
(add-scarrie-styles sentence))
(print-sentence-html sentence
:stream stream
:platform platform
:print-rules print-rules
:expand-tokens-p expand-tokens-p
:eagles-p eagles-p
:print-rules-p print-rules-p))
:tagging-niveau tagging-niveau))))))
#+ignore
(error (cond)
#m(p #S(format nil "Error: ~a" cond))))
(when (eq print-rules :edit)
(analyser-page-documentation-links stream)))
(hr/)
"Tagger-programvaren og vev-tjeneren er skrevet i "
((a :href "http://www.lisp.org/alu/home")
"Common Lisp") ".")))
(when file (delete-file file))))))))
(defun respond-to-regexp-filter-test-form (url stream query-alist)
(%respond-to-regexp-filter-test-form url stream query-alist :print-rules :edit))
(defmethod show-match ((token token) stream)
(declare (ignore stream)))
(defmethod show-match ((token regexp-token) stream)
#m((font :color "green")
#L(case (match token)
(:match-start (write-string "> " stream))
(:match-end (write-string "< " stream))
(:match (write-string "| " stream))
(:whole-match (write-string "* " stream))
(otherwise nil))))
(defmethod print-rules ((token token) (mode t) str stream &key &allow-other-keys)
(format stream "\"<~a>\""
(convert-string str :mac :sgml)))
(defmethod print-rules ((token token) (mode (eql :print)) str stream &key &allow-other-keys)
(format stream "\"<~a>\"~{ ~a~}"
(utf-8-encode str)
;;(convert-string str :mac :sgml)
(reverse (token-used-rules token))))
(defmethod print-rules ((token token) (mode (eql :edit)) str stream &key print-rules-p &allow-other-keys)
(let ((sentence (token-chain token)))
(with-slots (cg) sentence
(write-string "\"<" stream)
#m(b ((font :color "blue") #s str))
(write-string ">\"" stream)
(when (and print-rules-p (token-used-rules token))
(destructuring-bind (mt-rule &rest rules)
(reverse (token-used-rules token))
(format stream " ~a" mt-rule)
(dolist (rule rules)
(unless (symbolp rule)
(write-char #\Space stream)
#m((a :href #L(format nil "/cl/cgp/show-rules.html?~a"
(query-to-form-urlencoded
(list (cons "show-rules" "yes")
(cons "lang" (name cg))
(cons "rule-id" (rule-id rule)))))
:target "rule-pane")
#L(write-string (rule-string rule) stream)))))))))
(defmethod print-rules ((token compare-token) (mode (eql :edit)) str stream &key &allow-other-keys)
(let ((sentence (token-chain token)))
(with-slots (cg compare-cg) sentence
(format stream "\"<")
#m(b ((font :color "blue")
#L(write-string (u:convert-string str :mac :sgml) stream)))
(destructuring-bind (mt-rule &rest rules)
(reverse (token-used-rules token))
(let ((compare-rules (cdr (reverse (compare-used-rules token)))))
(format stream ">\" ~a" mt-rule)
(labels ((walk (rules c-rules)
(let ((rule (car rules))
(c-rule (car c-rules)))
(cond ((and (null rule) (null c-rule))
nil)
((eq rule c-rule)
(write-char #\Space stream)
#m((a :href #L(format nil "/cl/cgp/show-rule?version=~a&rule-id=~d"
(name cg) (rule-id rule))
:target "rule-pane")
#L(rule-string rule))
(walk (cdr rules) (cdr c-rules)))
((< (length c-rules) (length rules))
(write-char #\Space stream)
#m((font :color "red") "[1]")
#m((a :href #L(format nil "/cl/cgp/show-rule?version=~a&rule-id=~d"
(name cg) (rule-id rule))
:target "rule-pane")
#L(rule-string rule))
(walk (cdr rules) c-rules))
(t
(write-char #\Space stream)
#m((font :color "green") "[2]")
#m((a :href #L(format nil "/cl/cgp/show-rule?version=~a&rule-id=~d"
(name compare-cg) (rule-id c-rule))
:target "rule-pane")
#L(rule-string rule))
(walk rules (cdr c-rules)))))))
(walk rules compare-rules)))))))
(defun stringify (str-or-symbol)
(typecase str-or-symbol
(string
str-or-symbol)
(integer ;; temporary fix until lexin-search works
(format nil "~a" str-or-symbol))
(t
(string-downcase (string str-or-symbol)))))
(defmethod print-features ((token token) stream &key lc-features-p &allow-other-keys)
(with-slots (features lc-features chain) token
(let ((features (remove-feature-inclusions features)))
(dolist (fl features)
(when (car fl)
(format stream "~% \"")
#m((font :color "red")
#S(car fl))
(write-char #\" stream)
(mapcar (lambda (f)
(write-string " " stream)
(if (consp f)
(mapcar (lambda (f) #m #s(stringify f)) f)
#m #s (stringify f)))
(code-features (cdr fl) (feature-vector chain)))))
(when lc-features-p
(dolist (fl (remove-feature-inclusions lc-features))
(when (car fl)
(format stream "~% \"")
#m((font :color "orange")
#S(car fl))
(write-char #\" stream)
(mapcar (lambda (f)
(write-string " " stream)
(if (consp f)
(mapcar (lambda (f) #m #s(stringify f)) f)
#m #s (stringify f)))
(code-features (cdr fl) (feature-vector chain)))))))))
;; rewrite using feature-filter!!
(defmethod print-eagles-features ((token token) stream)
(with-slots (features) token
(dolist (fl features)
(when (car fl)
(let* ((cg-features (code-features (cdr fl)))
(eagles-features (cg-to-eagles-features (car cg-features) (cdr cg-features))))
(format stream "~% \"")
#m((font :color "red")
#S(car fl)
#+ignore
#L(write-string (chars-to-entities (car fl)) stream))
(let ((firstp t))
(labels ((convert (f)
(if firstp
(progn (setf firstp nil)
(string-upcase (utf-8-encode (stringify f))))
(utf-8-encode (stringify f)))))
(format stream "\" ~{~a~^ ~}"
(mapcar (lambda (f)
(if (consp f)
(mapcar #'convert f)
(convert f)))
eagles-features)))))))))
(defmethod print-features ((token compare-token) stream &key &allow-other-keys)
(with-slots (features compare-features chain) token
(labels ((walk (features c-features)
(let ((fl (car features))
(c-fl (car c-features)))
(cond ((and (null fl) (null c-fl))
nil)
((and fl (null (car fl)))
(walk (cdr features) c-features))
((and c-fl (null (car c-fl)))
(walk features (cdr c-features)))
((equal fl c-fl)
(format stream "~% \"")
#m((font :color "green")
#S (car fl)
#+ignore
#L(write-string (chars-to-entities (car fl)) stream))
(format stream "\" ~{~a~^ ~}"
(mapcar (lambda (f)
(if (consp f)
(mapcar #'utf-8-encode #+ignore #'chars-to-entities f)
(utf-8-encode #+ignore chars-to-entities f)))
(code-features (cdr fl) (feature-vector chain))))
(walk (cdr features) (cdr c-features)))
((< (length c-features) (length features))
(format stream "~% ")
#m((font :color "red")
#L(write-string "[1]" stream))
(write-char #\" stream)
#m((font :color "green")
#S(car fl)
#+ignore
#L(write-string (chars-to-entities (car fl)) stream))
(format stream "\" ~{~a~^ ~}"
(mapcar (lambda (f)
(if (consp f)
(mapcar #'utf-8-encode #+ignore #'chars-to-entities f)
(utf-8-encode #+ignore chars-to-entities f)))
(code-features (cdr fl) (feature-vector chain))))
(walk (cdr features) c-features))
(t
(format stream "~% ")
#m((font :color "red")
#L(write-string "[2]" stream))
(write-char #\" stream)
#m((font :color "green")
#S(car c-fl)
#+ignore
#L(write-string (chars-to-entities (car c-fl)) stream))
(format stream "\" ~{~a~^ ~}"
(mapcar (lambda (f)
(if (consp f)
(mapcar #'utf-8-encode #+ignore #'chars-to-entities f)
(utf-8-encode #+ignore chars-to-entities f)))
(code-features (cdr c-fl) (feature-vector chain))))
(walk features (cdr c-features)))))))
(walk features compare-features))))
(defmethod token-expansion-end ((token token))
(with-slots (expansion) token
(if expansion
(token-expansion-end (cdr expansion))
token)))
(defmethod print-sentence-html ((sentence sentence)
&key stream (print-features t) print-rules
eagles-p expand-tokens-p print-lc-features print-rules-p &allow-other-keys)
"outputs a CG style tagged sentence as html"
(setf *sentence* sentence)
#+debug(print (list :printing sentence))
(labels ((walk (token concat-token)
#+debug
(unless (null token)
(print (list token :expansion (token-expansion token))))
(cond ((null token)
nil)
((and expand-tokens-p (token-expansion token)
;; avoid nesting
(not concat-token))
(labels ((expand (token first last)
(if (token-expansion first)
(expand token
(car (token-expansion first))
(cdr (token-expansion first)))
(walk first token))
(unless (eq first last)
(expand token (token-next first) last))))
(expand token
(car (token-expansion token))
(cdr (token-expansion token))))
(walk (token-next token) nil))
(t
(let ((str (token-value token))
(features (token-features token)))
(unless (symbolp str)
(if print-features
(let ((*package* (find-package :cgp)))
(terpri stream)
(show-match token stream)
(print-rules token print-rules str stream :print-rules-p print-rules-p))
(write-string str stream))
(when (and features print-features)
(if eagles-p
(print-eagles-features token stream)
(print-features token stream :lc-features-p (or expand-tokens-p print-lc-features))))
(write-char #\Space stream))
(unless (or (eq token (last-token sentence)) concat-token)
(walk (token-next token) nil)))))))
(walk (first-token sentence) nil)))
;; *sentence*
;;; -------------------------------------- rule editing -----------------------------------------
(defmethod show-rule-form ((request http-request) ent)
(bind-query-values
(lang version rule-id domain all-domains type heuristic-niveau features show-rules)
request nil t t t
(let ((version (or version "NBO"))
(cg (name-to-cg version))
(id (when rule-id (parse-integer rule-id :junk-allowed t)))
(type (intern (string-upcase type) :keyword))
(heuristic-niveau (cond ((equal heuristic-niveau "0") 0)
((equal heuristic-niveau "1") 1)
((equal heuristic-niveau "2") 2)
((equal heuristic-niveau "3") 3)
(t :alle)))
(features (string-parse features :whitespace '(#\Space)))
(error nil))
#+debug(print (list version show-rules ))
;; TO DO: put handler-case around here etc.!
(handler-case
(when (and cg (null show-rules)) ; save-changes
(%update-rules cg (request-query request)))
(error (cond)
(setf error cond)))
(show-rules request ent
:version (or lang version) :id id :rule-id rule-id
:heuristic-niveau heuristic-niveau
:type type
:domain domain
:all-domains all-domains
:features features
:keep-groups-together-p (not version)
:error error))))
(defmethod show-rules-page-body ((request http-request) stream &key version domain all-domains type
heuristic-niveau features rule-id)
(labels ((concat-with-space (strings)
(cond ((cdr strings)
(concat (car strings) " " (concat-with-space (cdr strings))))
((null strings) "")
(t
(car strings)))))
#m(p ((font :face "Verdana, Arial, Helvetica, Geneva, sans-serif;")
"Velg reglene som skal vises:"))
#m((table :id "tblQueryTable")
(tr
((td :align "right" :class "clsQueryLabel") "Versjon ")
(td #L(html-select stream
:name "version"
:options (collecting (maphash (lambda (name cg)
(declare (ignore cg))
(collect name))
*cg-table*))
:default version)
" "
((a :href #L(concat "/cl/cgp/constraint-grammars.html?version=" version))
"vis"))
(td (input/ :type "submit" :name "show-rules" :value "Vis reglene")))
(tr
((td :align "right" :class "clsQueryLabel") "Regel-ID ")
((td :valign "top")
(input/ :type "text" :name "rule-id" :size "10" :value #L(or rule-id ""))
((b :class "clsQueryLabel") " eller:")))
(tr
((td :align "right" :class "clsQueryLabel") " Regeltype ")
(td
#L(html-select stream
:name "type"
:options '(:select :strong-select :discard
:syntactic-map :syntactic-select :syntactic-discard
:named-entity-map :named-entity-select :named-entity-discard)
:default type)))
(tr
((td :align "right" :class "clsQueryLabel") " Heur. nivå ")
(td
#L(html-select stream
:name "heuristic-niveau"
:options '(:alle 0 1 2 3)
:default heuristic-niveau)
#+ignore
(accept-input 'select-choices "HEURISTIC-NIVEAU"
:choices '(:alle 0 1 2 3)
:default heuristic-niveau :size :pull-down-menu)))
(tr
((td :align "right" :class "clsQueryLabel") "Domene ")
(td
(input/ :type "text" :name "domain" :size "30" :value #L(or domain ""))
" alle"
#L(if all-domains
#m(input/ :type "checkbox" :name "ALL-DOMAINS" :checked "true")
#m(input/ :type "checkbox" :name "ALL-DOMAINS"))))
(tr
((td :align "right" :class "clsQueryLabel") "Trekk ")
(td (input/ :type "text" :name "features" :size "30" :value #L(concat-with-space features)))))))
;;;; ------------------------------------- Set declarations editing ------------------------------------------
(defmethod show-set-declarations-form ((request http-request) ent)
(with-http-response (request ent)
(with-http-body (request ent)
(let ((stream (request-reply-stream request)))
(bind-query-values
(version dec-count declaration-name-substring) request nil t t t
(show-set-declarations request stream
:version version
:substring (utf-8-decode (or declaration-name-substring ""))
:count (parse-integer dec-count :junk-allowed t)))))))
(defmethod show-set-declarations-page-body ((request http-request) stream &key version (substring ""))
#m(p ((font :face "Verdana, Arial, Helvetica, Geneva, sans-serif;")
"Velg deklarasjonene som skal vises:"))
#m((table :id "tblQueryTable")
(tr
((td :align "right" :class "clsQueryLabel") "Versjon ")
(td #L(html-select stream
:name "version"
:options (collecting (maphash (lambda (name cg)
(declare (ignore cg))
(collect name))
*cg-table*))
:default version)
" "
((a :href #L(concat "/cl/cgp/constraint-grammar?version=" version)) "vis"))
(td (input/ :type "submit" :name "show-set-declarations" :value "Vis deklarasjonene")))
(tr
((td :align "right" :class "clsQueryLabel") "Delstreng ")
(td (input/ :type "text" :name "declaration-name-substring" :size "30" :value #L substring)))))
(defmethod show-set-declarations ((request http-request) stream &key version substring count)
#m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"")
#m(html
(head (title "Redigeringsside for mengdedeklarasjonene")
(meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8"))
(body
#L(progn;handler-case
(progn
#m((form :method "post" :action #L(request-uri-string request))
(link/ :rel "stylesheet" :type "text/css" :href "/cl/cgp/styles/edit-declarations.css")
#L(show-set-declarations-page-body request stream :version version :substring substring)
#L(when version
(let* ((*cg* (name-to-cg version))
(*package* (find-package :cgp))
(*print-case* :downcase)
(%substring (string-upcase substring))
(declarations
(sort (collecting (maphash (lambda (name set)
(declare (ignore set))
(when (search %substring (symbol-name name))
(collect name)))
(set-declarations *cg*)))
#'string<)))
#-ignore
(when t ; update-alist ; put this here to get error message into browser
(%update-set-declarations (name-to-cg version) count (request-query request)))
#m((script :language "JavaScript")
#L(js/edit-declaration (length declarations) stream))
#m(input/ :type "hidden" :name "dec-count"
:value #L(ensure-string (length declarations)))
#m(br/)
#m((table :width "100%" :id "tblEditMenu")
(tr (td "|")
(td ((div :id "newDeclarations" :onclick "InsertNewDeclaration()")
"Lag ny deklarasjon"))
(td "|")
#L(unless (cg-locked-p *cg*)
#m(td ((div :id "saveChanges" :onclick "SaveChanges()") "Lagre endringer"))
#m(td "|"))
((td :id "testPage")
((a :href #L(concat "/cl/cgp/tagger.html?version="
version
"&tagging-niveau=morphological-disambiguation")
:target "test-rules")
"Test regelsettet")
#+ignore
((a :href #L(concat "/cl/cgp/test-rules?~a" version) :target "test-rules")
"Test regelsettet"))
(td "|")
(td ((div :id "mainPage")
((a :href "/cl/cgp/site-map.html" :target "site-map-page") "Hovedside")))
(td "|")
(td ((div :id "helpPage")
((a :href "/cl/cgp/declaration-edit-help.html"
:target "help-page") "Hjelp")))
(td "|")))
#m(br/)
#m((table :width "100%" :id "tblDeclarationsTable")
(tr ((td :valign "top" :class "clsDeclarationNameLabel") " Deklarasjon")
((td :valign "top" :class "clsDeclarationSetLabel") " Definisjon"))
#L(let ((id 0)) ;; IDs are necessary for Javascript editing
(dolist (declaration declarations)
(incf id)
#m(tr ((td :valign "top" :class "clsDeclarationName")
((div :id #L(format nil "DeclarationName~d" id)
:onclick #L(format nil "EditDeclaration('~d', 'Name')" id))
#L(write-string
(convert-string
(string-downcase declaration)
$encoding :sgml '(#\< "<" #\> ">")) stream)))
((td :valign "top" :class "clsDeclarationSet")
((div :id #L(format nil "DeclarationSet~d" id)
:onclick #L(format nil "EditDeclaration('~d', 'Set')" id))
#L(write-string
(convert-string
(with-output-to-string (str)
(write (gethash declaration (set-declarations *cg*))
:stream str))
$encoding :sgml '(#\< "<" #\> ">")) stream)))))))))))
#+ignore
(error (cond)
#m(p #L(format stream "Error: ~a" cond)))))))
;;;; -------------------------------- Test corpus ------------------------------------
(defmethod test-corpus-form ((request http-request) ent)
(with-http-response (request ent :timeout 6000)
(with-http-body (request ent)
(let ((stream (request-reply-stream request)))
#m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"")
#m(html
(head (title "Tagging av testkorpuset")
(meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8"))
(body #L(test-corpus-page-body request stream ent)))))))
(defmethod test-corpus-page-body ((request http-request) stream entity)
(declare (ignore entity))
(bind-query-values
(version (tagging-niveau "morphological-disambiguation") test-corpus tag-corpus) request nil t t t
#m(p ((font :face "Verdana, Arial, Helvetica, Geneva, sans-serif;")
"Velg CG-versjon og prøvekorpus:"))
#+ignore
(when tag-corpus
(show-tagging-result request stream :entity entity :version version :corpus test-corpus))
#m((form :method "post" :action #L(request-uri-string request))
((table :id "tblQueryTable")
(tr
((td :align "right" :class "clsQueryLabel") "Versjon ")
(td #L(html-select stream
:name "version"
:options (cg-name-list)
:default version)
" "
((a :href #L(concat "/cl/cgp/constraint-grammar?version=" version)) "vis"))
(tr
((td :align "right" :class "clsQueryLabel") "Nivå ")
(td #L(html-select stream
:name "tagging-niveau"
:options (list "morphological-disambiguation" "syntactic-disambiguation")
:default tagging-niveau))))
(tr
((td :align "right" :class "clsQueryLabel") "Prøvekorpus ")
(td #L(html-select stream
:name "test-corpus"
:options (mapcar #'pathname-name
(directory (concat "/home/" (user-name request)
"/test-corpus/*.cor")))
;;*test-corpora*
:default test-corpus)))
(tr
(td (input/ :type "submit" :name "tag-corpus" :value "Tagg korpuset"))))
#L(when tag-corpus
(let ((niveau (intern (string-upcase tagging-niveau) :keyword)))
#m(br)
#m(hr)
#m((font :face "Verdana, Arial, Helvetica, Geneva, sans-serif;")
(p
#L(write-string (concat "Resultatet skrives til \"~/test-corpus/"
test-corpus (ecase niveau
(:morphological-disambiguation ".dis\".")
(:syntactic-disambiguation ".syn-dis\".")))
stream))))))
(process-run-function
"show-tagging-result"
#'show-tagging-result request stream
:version version
:tagging-niveau (intern (string-upcase tagging-niveau) :keyword)
:corpus test-corpus)))
;; write results to file in ~/terst-corpus until we find out how to keep the page alive
;; and send progress messages
(defmethod show-tagging-result ((request http-request) stream
&key entity version corpus
(tagging-niveau :morphological-disambiguation))
(when corpus
(labels ((message (text) ;; does not work.
#m(p #L(write-string text stream))))
(let ((file (concat "/home/" (user-name request)
"/test-corpus/" corpus ".cor")))
(when (probe-file file)
(multiple-value-bind (precision recall sentence-count token-count error-count)
(with-open-file (in-stream file)
(with-open-file (out-stream (concat "/home/" (user-name request)
"/test-corpus/" corpus
(ecase tagging-niveau
(:morphological-disambiguation ".dis")
(:syntactic-disambiguation ".syn-dis")))
:direction :output :if-exists :supersede
:if-does-not-exist :create)
(run-test-corpus (gethash version *cg-table*)
:in-stream in-stream
:out-stream out-stream
;;:message-fn #'message
:tagging-niveau tagging-niveau
:print-only-sentences-with-errors-p nil)))
(with-open-file (out-stream (concat "/home/" (user-name request)
"/test-corpus/" corpus
(ecase tagging-niveau
(:morphological-disambiguation ".res")
(:syntactic-disambiguation ".syn-res")))
:direction :output :if-exists :supersede
:if-does-not-exist :create)
(format out-stream
"Antall setninger: ~d~%Antall ord: ~d~%Antall feil: ~d~%Presisjon: ~,2f~%Recall: ~,2f~%"
sentence-count token-count
error-count
(* precision 100.0)
(* recall 100.0))))
#+mcl
#m(tr (td "CPU-tid:")
(td #L(format stream "~:d ms" (- (get-internal-run-time) start-run-time)))))))))
;;(error (cond) (p (format stream "Error: ~a" cond)))))))))
;;;; ------------------------------------------ Rules editing ------------------------------------------------
;; rename!
(defun name-to-cg (name)
(gethash name *cg-table*)
#+ignore
(ecase language
(:nbo *nbo-cg*)
(:nny *nny-cg*)))
(defmethod show-rules ((request http-request) ent
&key version id rule-id domain all-domains
type heuristic-niveau features error (keep-groups-together-p t))
(with-http-response (request ent)
(with-http-body (request ent)
(let ((stream (request-reply-stream request)))
#m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"")
#m(html
(head (title "Redigeringsside for CG-reglene")
(meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8"))
(body
#L(handler-case
(let* ((*cg* (name-to-cg version))
(*package* (find-package :cgp))
(rules (when *cg*
(get-rules *cg* :id id
:domain (if all-domains :alle (utf-8-decode domain))
:type type
:heuristic-niveau heuristic-niveau
:features (mapcar (lambda (f) (intern (string-upcase f)))
features)
:keep-groups-together-p keep-groups-together-p))))
(when *cg*
#m((script :language "JavaScript")
#L(js/edit-rule (string-downcase (symbol-name (language *cg*))) stream)))
#m((form :method "post"
:action "/cl/cgp/show-rules.html" #+ignore(request-uri-string request))
(link/ :rel "stylesheet" :type "text/css"
:href "/cl/cgp/styles/edit-rules.css")
#L(when error
#m((font :color "red") (h3 "Det oppstod en feil ved lagring:"))
#m(p #S(format stream "~a" error)))
#L(show-rules-page-body
request stream :version version :rule-id rule-id
:domain (utf-8-decode domain) :all-domains all-domains
:type type :heuristic-niveau heuristic-niveau :features features)
#L(when rules
#m(br/)
#m((table :width "100%" :id "tblEditMenu")
(tr (td "|")
(td ((div :id "newRule"
:onclick #L(format nil "InsertNewRule('~a')"
(language *cg*)))
"Lag ny regel"))
(td "|")
#L(unless (cg-locked-p *cg*)
#m(td ((div :id "saveChanges" :onclick "SaveChanges()")
"Lagre endringer"))
#m(td "|"))
((td :id "testPage")
((a :href #L(concat "/cl/cgp/tagger.html?version="
version
"&tagging-niveau=morphological-disambiguation")
:target "test-rules")
"Test regelsettet")
(td "|")
(td ((div :id "mainPage")
((a :href "/cl/cgp/site-map.html"
:reference "/cl/cgp/site-map.html"
:target "help-page")
"Hovedside")))
(td "|")
(td ((div :id "helpPage")
((a :href "/cl/cgp/rule-edit-help.html"
:target "help-page") "Hjelp")))
(td "|")))
(br/)
((table :width "100%" :id "tblRulesTable")
#L(let ((group-comment nil))
(dolist (rule rules)
(when rule
(let ((new-group-comment
(aref (rule-group-comments *cg*) (rule-id rule))
#+ignore(rule-group-comment rule)))
(cond ((null new-group-comment)
(when group-comment
(setf group-comment nil)
#m(tr ((td :valign "top" :align "right"
:class "clsRuleGroupCommentLabel")
" ")
((td :valign "top" :class "clsRuleGroupComment") " "))))
((eq new-group-comment group-comment)
nil)
(t
(setf group-comment new-group-comment)
#m(tr ((td :valign "top" :align "right"
:class "clsRuleGroupCommentLabel")
"Regelgruppe "
(br/) " ")
((td :valign "top" :class "clsRuleGroupComment")
((font :color "blue"
:face "verdana, arial, helvetica, geneva, sans-serif" :size "3")
((pre :id #L(format nil "ruleGroupComment~a~d"
(language rule) id)
:onclick
#l(format nil "EditRule('ruleGroupComment~a~d', 'group-comment')"
(language rule) id))
#L(write-string
(convert-string (car group-comment)
:mac :sgml '(#\Newline "
"))
stream)))))))))
(when rule (write-rule-edit-html rule stream)))))
((script :language "JavaScript")
#L(js/edit-rule-popup-menu stream))
((div :id "divMenu1" :class "clsMenu"
:onmouseover "Menu_hover()"
:onmouseout "Menu_hover()"
:onclick "Menu_click()")
((div :id "EditRule") "Rediger regelen")
((div :id "EditRuleComment") "Rediger kommentar")
((div :id "DeleteRule") "Slett regelen")
((div :id "CloneRule") "Dupliser regelen")
((div :id "UndoRuleChanges") "Angre siste endring"))))))
(error (cond)
#+debug(error cond)
#m(p #s(format stream "Error: ~a" cond))))))))))
(defmethod write-rule-edit-html ((rule rule) stream)
(let ((*package* (find-package :cgp))
(*print-case* :downcase)
(language (language rule))
(id (rule-id rule)))
#m((tr :id #L(format nil "RuleRow~a~d" language id))
((td :width "80pt" :valign "top" :align "right" :class "clsRuleNameLabel") "Regel ")
((td :valign "top" :id #L(format nil "RuleName~a~d" language id)
:class "clsRuleName" :expitemnum "1")
" "
#L(write (class-name (class-of rule)) :stream stream)
(b #L(format stream " ~a " language))
#L(write (rule-id rule) :stream stream)))
#m((tr :id #L(format nil "RuleDefRow~a~d" language id))
((td :valign "top" :align "right" :class "clsRuleLabel") "Definisjon ")
((td :valign "top" :class "clsRuleBody")
((font :face "courier, verdana, arial, helvetica, geneva, sans-serif" :size "3")
((pre :id #L(format nil "~a~d" language id)
:onclick #L(format nil "EditRule('~a~d', 'body')" language id))
#L(write-rule-body rule stream :language language :html-p t)))))
(when (rule-comment rule)
#m(tr ((td :valign "top" :align "right" :class "clsRuleLabel") " Kommentar ")
((td :valign "top" :class "clsRuleComment")
((font :color "blue" :face "verdana, arial, helvetica, geneva, sans-serif" :size "3")
((pre :id #L(format nil "ruleComment~a~d" language id)
:onclick #L(format nil "EditRule('ruleComment~a~d', 'comment')" language id))
#L(write-string (convert-string (rule-comment rule) :mac :sgml '(#\Newline "
"))
stream))))))))
(defmethod display-rule-edit-help ((request http-request) ent)
(with-http-response (request ent)
(with-http-body (request ent)
(let ((stream (request-reply-stream request)))
#m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"")
#m(html
(head (title "Hjelpeside")
(meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8"))
(body
(h3 "Hjelp for regelredigering")
(p "Det finnes tre måter å redigere regelsettet på: Du kan slette en regel, redigere en eksisterende regel, "
"og definere en ny regel. I tillegg kan du redigere kommentarer. "
"Alle redigeringskommandoer er tilgjengelige fra menyen som kommer opp når du klikker "
"på feltet med regeltype og -id. "
"Når du har redigert en regel etc. og er gått ut av regelfeltet, oppdateres statusfeltet i regelen. ")
(h4 "Slette regler")
(p "Som alltid er det enklest å slette. Bruk kommandoen fra menyen.")
(h4 "Redigere regler")
(p "For å redigere en eksisterende regeldefinisjon velger du enten \"Rediger regelen\" fra menyen, "
"eller du klikker på regelteksten.")
(h4 "Lage nye regler")
(p "Du kan lage nye regler ved å duplisere en eksisterende regel og redigere den. (Bruk menyen.) Den nye regelen legges inn "
"umiddelbart foran regelen du tok utgangspunkt i. At regelen kommer på rett plass er særdeles viktig for "
"mapping-regler, og her anbefales det forøvrig at du går ut fra et søk som i det minste viser alle reglene "
"med samme trekk. Hvis regelen du dupliserer har en gruppekommentar, vil den nye regelen få samme kommentar.")
(p "En alternativ måte å lage en ny regel på er å klikke på \"Lag ny regel\". Dette lager en tomt tekstfelt som "
"du kan skrive i. Regelen kommer til å havne aller først blant reglene av samme type og med samme første trekk.")
(h4 "Angre endringer")
(p "Du kan angre endringer du har gjort ved å bruke \"Angre siste endring\" fra menyen. "
"Endringer i kommentarfelt kan ikke angres via menyen. (Dette kommer senere.)")
(h4 "Lagring")
(p "Ved å klikke \"Lagre endringer\" sender du endringene til tjeneren.")))))))
#+cl-http
(defmethod user-access-controls ((user standard-user))
(let ((groups (user-groups user))
(access-controls ()))
(maphash (lambda (name access-control)
(declare (ignore name))
(when (intersection groups (access-control-default-groups access-control))
(pushnew access-control access-controls)))
(realm-url-group-table (user-realm user)))
access-controls))
(defmethod display-cgp-site-map ((request http-request) ent)
(with-http-response (request ent)
(with-http-body (request ent)
(let ((stream (request-reply-stream request))
(user (user-name request)))
(labels ((describe-uris (stream &rest uris)
(mapcar (lambda (uri)
(when-let (entity (cdar (aserve::uri-string-to-entities uri)))
(describe-entity entity :html-table-row stream)))
uris)))
#m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"")
#m(html
(head (title "Oversiktskart CG-tagger")
(meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8"))
(body
((font :face "Arial, Helvetica, sans-serif" :size "3")
(h2 "Oversiktskart CG-tagger")
(p "Oversikten lister opp de viktigste sidene for CG-taggeren. "
"De fleste sidene er passordbeskyttet."
#+cl-http
" (Du har rettigheten"
#+cl-http
(when (> (length access-controls) 1) "e")
#+cl-http
(loop for (access-control . rest) on access-controls
do (i (fast-format stream " ~a" (access-control-name access-control))
(if rest "," ")."))))
((table :border "0" :cellpadding "2" :bgcolor "#ffffef")
((tr :bgcolor "#fafae8")
(td " URL")
(td "Tilgang")
(td "Beskrivelse"))
(tr ((td :colspan "3") (b "Testsider")))
#L(describe-uris stream
"/cl/cgp/test.html" #+ignore"/oslo-bergen-tagger.html"
"/cl/cgp/ranked-analyses.html")
(tr ((td :colspan "3") (b "Redigeringssider")))
#L(describe-uris stream
"/cl/cgp/tagger.html"
"/cl/cgp/constraint-grammars.html"
"/cl/cgp/cg-versions.html"
"/cl/cgp/tagger-edit.html"
"/cl/cgp/show-rules.html"
"/cl/cgp/show-set-declarations.html"
"/cl/cgp/run-test-corpus.html"
"/cl/cgp/lexicon-search.xml")))
(br/) (hr/)
#+orig
((font :face "Arial, Helvetica, sans-serif" :size "2")
"Oppdatert " #L(format stream #.(u:now :format :date :language :nbo)) " "
((a :href "mailto:paul.meurer@aksis.uib.no") "Paul Meurer")))))))))
;;; ------------------------------ xml output --------------------------------------
(eval-when (:compile-toplevel :load-toplevel :execute)
(setf lxml::*encoding* :utf-8
lxml::*newline-after-endtag-p* nil))
;; debugging
(defparameter *break-p* nil)
(defparameter *warn-on-end-found-p* nil)
(defparameter *warn-on-token-mismatch-p* nil)
#+debug(defparameter *sl* ())
;; find places where and tags are to be inserted
(defmethod find-sentence-boundary-tokens ((sentence sentence) &key (stack ()) (expand-tokens-p t)
&allow-other-keys)
#+debug(push sentence *sl*)
(with-slots (in-sentence-elements) (tokenizer sentence)
(let ((first nil)
(last nil)
(last-in-sentence nil)
(first-original-token nil)
(last-original-token nil))
#+debug(print (list :in-sentence-elements in-sentence-elements :stack stack))
(when stack
(setf first-original-token (first-token sentence))
;; remove spurious elements
(loop while
(and stack
(first-token sentence)
(eq (token-value (first-token sentence)) :insignificant)
(let ((atts (token-attributes (first-token sentence))))
;;(print (list :a (caddr atts) :b (caddr (car stack))))
(and (eq (car atts) :%end)
(eq (caddr atts) (caddr (car stack))))))
do
(remove-token (first-token sentence))
(setf stack (cdr stack)))
;; reopen closed elements on (remaining) stack
(let ((first-pos (cgp::token-stream-position (first-token sentence))))
(dolist (xml-token stack)
(push-token sentence :insignificant
:attributes (list :%start first-pos (caddr xml-token))
:position first-pos)))
(setf stack ()))
(labels ((walk (token concat-token)
#+debug(print (list token :concat-token concat-token
:next (when token (token-next token))
:expansion (token-expansion token)
:first first :last last :stack stack))
(cond ((null token)
nil)
((and expand-tokens-p (token-expansion token))
(do ((ex-token (car (token-expansion token)) (token-next ex-token)))
((eq ex-token (cdr (token-expansion token)))
(walk ex-token token))
(walk ex-token token))
#+debug(print (list :walk-expanded (token-next token) nil))
(walk (token-next token) nil))
(t
(let ((value (token-value token))
(xml-token (token-attributes token)))
(cond ((eq value :newline)
nil)
((null value)
nil)
((not (eq value :insignificant))
#+debug(print token)
(unless first (setf first token)))
(t ;; xml tag
(destructuring-bind (tag-type xml-file-pos tag . attributes) xml-token
(declare (ignore xml-file-pos attributes))
(case tag-type
(:%string nil)
(:%start
(cond (first
(when *warn-on-end-found-p*
(when last (warn "End already found: ~s." xml-token))
(print (list :>> tag)))
#+debug(print (list token :pushing1 stack))
(push xml-token stack))
((find tag in-sentence-elements)
(setf first token)
(when *warn-on-end-found-p*
(print (list :> tag)))
#+debug(print (list token :pushing2 stack))
(push xml-token stack))
(t
nil)))
(:%end
(cond (last
#+debug(print (list token :popping1 stack))
(pop stack)
nil)
((not first)
nil)
((null stack)
;;(print (cons :last token))
(setf last token))
(t
(when *warn-on-token-mismatch-p*
(unless (eq (caddr (car stack)) tag)
(warn "Token-mismatch: ~s not in ~s." tag stack)))
(when *warn-on-end-found-p*
(print (list :< (car stack))))
#+debug(print (list token :popping2 stack))
(pop stack)))))))))
(unless (or (eq token (last-token sentence)) concat-token)
(walk (token-next token) nil))))))
(walk (first-token sentence) nil)
(when stack
(setf last-original-token (last-token sentence))
#+debug(print (cons :stack stack))
;; give each auxiliary token the pos of the last original token before
#+debug(print (cons :last-token (last-token sentence)))
(let* ((stream-pos (cgp::token-stream-position (last-token sentence)))
(last-pos (if stream-pos
(if (or (eq (token-value (last-token sentence)) :newline)
(insignificant-token-p (last-token sentence)))
stream-pos
(+ stream-pos (length (token-value (last-token sentence)))))
#+ignore
(cadr (token-attributes (last-token sentence))))))
#+debug(print (list :stream-pos stream-pos (last-token sentence) last-pos))
(dolist (xml-token stack)
(add-token sentence :insignificant :attributes (list :%end last-pos (caddr xml-token))))))
(values first last stack first-original-token last-original-token last-in-sentence)))))
#||
;; problems:
ad 1)
Det er et
Det er et