;;; -*- Mode: tdl; Coding: utf-8; -*-

;;;
;;; Copyright (c) 2009 -- 2010 Stephan Oepen (oe@ifi.uio.no); 
;;; see `LICENSE' for conditions.
;;;


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; a second pass at lightweight NEs, now that we have further split up tokens
;;; at hyphens and dashes.
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;;
;;; numerals, including some sub-sets (days of the month or years).  in some of
;;; the rules |1| is deliberately excluded, because (unlike everyone else) it
;;; requires singular agreement.  hence, we assume that all usages of |1| need
;;; be accounted for in the native lexicon, including as hours or days of the
;;; month.
;;;

;;
;; (candidate) days of the month: |2| to |9|, |10| to |29|, |30|, and |31|
;;
card_or_dom_ne_tmr := ne_tmt &
[ +INPUT < [ +FORM ^([2-9]|[1-2][0-9]|3[01])$ ] >,
  +OUTPUT < [ +CLASS card_or_dom_ne ] > ].

;;
;; (candidate) years: |950|, |1805|, |1957|, |2005|, et al.
;;
card_or_year_ne_tmr := ne_tmt &
[ +INPUT < [ +FORM ^[12]?[0-9]{3}$ ] >,
  +OUTPUT < [ +CLASS card_or_year_ne ] > ].

;;
;; any sequence of digits, with optional sign and optional decimal point.  to
;; accept measure phrases like |900-pound|, where at this point |900-| is one
;; token, allow an optional trailing hyphen.
;;
card_ne_1_tmr := ne_tmt &
[ +INPUT < [ +FORM ^[+-±~]?([2-9]|[1-9][0-9][0-9]*)(\.|-)?$ ] >,
  +OUTPUT < [ +CLASS card_ne ] > ].

;;
;; floating point numbers, with optional sign and at least one decimal.  again
;; for measures phrases, allow optional hyphen: |1.5-inch pianist|.
;;
card_ne_2_tmr := ne_tmt &
[ +INPUT < [ +FORM ^[+-±~]?[0-9]*\.[0-9]+$ ] >,
  +OUTPUT < [ +CLASS card_ne ] > ].

;;
;; US-style or German separators, optional sign and decimals: e.g. |23,000.-|
;;
card_ne_3_tmr := ne_tmt &
[ +INPUT < [ +FORM ^[+-±]?[1-9][0-9]{0,2}([,.][0-9]{3})+([,.]([0-9]*|-))?$ ] >,
  +OUTPUT < [ +CLASS card_ne ] > ].

;;
;; ordinals, mostly parallel to the numerals
;;
ord_or_dom_ne_tmr := ne_tmt &
[ +INPUT < [ +FORM ^([23]?(1st|2nd|3rd)|(2?[1-9]|1[04-9]|20|30|31)th)$ ] >,
  +OUTPUT < [ +CLASS ord_or_dom_ne ] > ].

ord_ne_tmr := ne_tmt &
[ +INPUT < [ +FORM ^[0-9]*((^|[^1])(1st|2nd|3rd)|(11|12|13|[04-9])th)$ ] >,
  +OUTPUT < [ +CLASS ord_ne ] > ].