;;;
;;; this file should be `Index.lisp' and reside in the directory containing the
;;; tsdb(1) test suite skeleton databases (typically a subdirectory `skeletons'
;;; in the tsdb(1) database root directory `*tsdb-home*').
;;;
;;; the file should contain a single un-quote()d Common-Lisp list enumerating
;;; the available skeletons, e.g.
;;;
;;;   (((:path . "english") (:content . "English TSNLP test suite"))
;;;    ((:path . "csli") (:content . "CSLI (ERGO) test suite"))
;;;    ((:path . "vm") (:content . "English VerbMobil data")))
;;;
;;; where the individual entries are assoc() lists with at least two elements:
;;;
;;;   - :path --- the (relative) directory name containing the skeleton;
;;;   - :content --- a descriptive comment.
;;;
;;; the order of entries is irrelevant as the `tsdb :skeletons' command sorts
;;; the list lexicographically before output.
;;;

;;;
;;; to make MaxEnt experimentation across data sets (and generally reference) a
;;; little easier, item identifiers need to be globally unique.  to accomodate
;;; existing data sets in this scheme, dan and i came up with a break-down of
;;; identifiers as follows
;;;
;;; [                1 --             1,999] --- various test suites
;;; [            2,000 --             9,999] --- The Cathedral and the Bazaar
;;; [           10,000 --            29,999] --- Conan Doyle Corpus (CDC)
;;; [          102,000 --           116,000] --- Le Petit Prince (LPP)
;;; [        1,000,000 --         1,999,999] --- VerbMobil
;;; [        2,000,000 --         2,999,999] --- ecommerce
;;; [        3,000,000 --         3,999,999] --- LOGON
;;; [        4,000,000 --         4,999,999] --- SciBorg
;;; [        5,000,000 --         5,099,999] --- BioScope
;;; [       10,000,000 --        19,999,999] --- WeScience
;;; [       20,000,000 --        29,999,999] --- PTB WSJ
;;; [       30,000,000 --        30,999,999] --- Hinoki
;;; [       40,000,000 --        49,999,999] --- Brown Corpus
;;; [      120,000,000 --     1,099,999,999] --- GENIA
;;; [  100,000,000,000 --   299,999,999,999] --- WeSearch Data Collection
;;; [1,000,010,000,000 -- 1,128,928,700,510] --- WikiWoods
;;; 
;;; _fix_me_
;;; some of the old skeletons (and existing treebanks, where applicable) still
;;; need to be converted.  this will require a re-numbering script and close
;;; synchronization with dan.                                   (8-nov-08; oe)
;;;
(((:path . "verbmobil") (:content . "Scheduling Dialogues (VerbMobil)"))
 ((:path . "yy") (:content . "ECommerce Email (YY)"))
 ((:path . "logon") (:content . "Tourism Brochures (LOGON)"))
 #+:null
 ((:path . "handon") (:content . "Tourism Web Sites (HandOn)"))
 ((:path . "wescience") (:content . "Scholarly Literature (WeScience)"))
 ((:path . "wdc") (:content . "WeSearch Data Collection"))
 ((:path . "wsj") (:content . "Wall Street Journal"))
 ((:path . "ptb") (:content . "Penn Treebank (WSJ)"))
 ((:path . "semcor") (:content . "SemCor Sampler (via Melbourne)"))
 ((:path . "genia") (:content . "GENIA Treebank"))
 ((:path . "bioscope") (:content . "Bio-Medical Texts (BioScope)"))
 ((:path . "starsem") (:content . "2012 *SEM Shared Task on Negation"))
 ((:path . "english") (:content . "TSNLP English Test Suite"))
 ((:path . "csli") (:content . "CSLI (HP) Test Suite"))
 ((:path . "mrs") (:content . "DELPH-IN MRS Test Suite"))
 ((:path . "esd") (:content . "ERG Semantic Documentation Test Suite"))
 ((:path . "cb") (:content . "The Cathedral and the Bazaar"))
 ((:path . "trec") (:content . "TREC QA Questions (Ninth Conference)"))
 ((:path . "fracas") (:content . "FraCaS Semantics Test Suite"))
 ((:path . "peted") (:content . "Evaluation By Textual Entailment (Development)"))
 ((:path . "petet") (:content . "Evaluation By Textual Entailment (Test)"))
 #+:null
 ((:path . "tanaka") (:content . "Tanaka Corpus (English Translations)"))
 ((:path . "lpp") (:content . "Le Petit Prince"))
 ((:path . "redwoods") (:content . "Core Redwoods (JHPSTG & WeScience)")))