; ; NSF new document summary ; ; As a special feature, any article whose type matches ; nndoc-nsf-type-reject, or whose subtype matches ; nndoc-nsf-subtype-reject, will be skipped during undigestifying. ; (defun nndoc-nsf-summary-type-p nil "Decide whether an article is an NSF new-document summary" (let ((case-fold-search nil)) (not (not (and (re-search-forward "^From: MyNSF" nil t) (re-search-forward "^Subject: Weekly new document summary$" nil t) (re-search-forward "^The National Science Foundation" nil t) ))) )) (defvar nndoc-nsf-summary-type-reject nil "Regexp matching NSF article types that should be skipped") (defvar nndoc-nsf-summary-subtype-reject nil "Regexp matching NSF article subtypes that should be skipped") (defun nndoc-nsf-summary-head-begin () (forward-line -2)) (require 'nndoc-generic-functions "nndoc/nndoc-generic-functions.el") (setq nndoc-nsf-summary-article-begin-re "\\(Monday\\|Tuesday\\|Wednesday\\|Thursday\\|Friday\\|Saturday\\|Sunday\\),? \\(January\\|February\\|March\\|April\\|May\\|June\\|July\\|August\\|September\\|October\\|November\\|December\\) [0-9]+,? [0-9]+\n\n") (defun nndoc-nsf-summary-article-begin nil "Find the beginning of an NSF summary, rejecting unwanted areas" (let ((case-fold-search nil) here limit rejected (keep-looking t)) (while (and keep-looking (re-search-forward nndoc-nsf-summary-article-begin-re nil t)) (if (not (or nndoc-nsf-summary-type-reject nndoc-nsf-summary-subtype-reject)) (setq keep-looking nil) (setq here (point)) (nndoc-nsf-summary-body-end) (setq limit (point)) (goto-char here) (setq rejected nil) (if (and nndoc-nsf-summary-type-reject (re-search-forward "^\t\tType ?: *" limit 'x) (looking-at nndoc-nsf-summary-type-reject)) (setq rejected t)) (goto-char here) (if nndoc-nsf-summary-subtype-reject (while (and (not rejected) (re-search-forward "^\t\tSubtype ?: *" limit 'x)) (setq rejected (looking-at nndoc-nsf-summary-subtype-reject)))) (when (not rejected) (setq keep-looking nil) (goto-char here)) )) (beginning-of-line) (not keep-looking) )) (defun nndoc-nsf-summary-body-end () (and (re-search-forward (concat nndoc-nsf-summary-article-begin-re "\\|" nndoc-file-end) nil t) (goto-char (match-beginning 0)) (skip-chars-backward " \t\n") (if (eq (following-char) ?\n) (forward-char 1))) t) ; ; Alist of two-character subtype abbreviations. This list is known to ; be incomplete. By default, everything has equal priority. ; ; Note that the list elements could be writen more compactly as "a b . c"; ; however, I find the current notation clearer. ; (defvar nndoc-nsf-summary-subtype-alist '( ("Administrative" . ("AD" . 1)) ("Biology" . ("BI" . 1)) ("Clerical and Technical" . ("CT" . 1)) ("Computer/Information Sciences" . ("CS" . 1)) ("Crosscutting Programs" . ("CC" . 1)) ("Data Brief" . ("DB" . 1)) ("Education" . ("ED" . 1)) ("Engineering" . ("EN" . 1)) ("Executive" . ("EX" . 1)) ("Geosciences" . ("GE" . 1)) ("International" . ("IN" . 1)) ("Math/Physical Sciences" . ("MP" . 1)) ("NSF-wide" . ("WD" . 1)) ("Scientific and Professional" . ("SP" . 1)) ("Social/Behavioral Sciences" . ("SS" . 1)) ("Statistical Reports on the Education of Scientists and Engineers" . ("SR" . 1)) ("Statistical Reports on the Science and Engineering Work Force" . ("SR" . 1)) ) "Alist of article subtypes and the two-letter abbreviations that should be used to indicate them. Car of each element is the subtype string. Cdr is a cons of the two-letter abbreviation with a priority. If the article has multiple subtypes, the subtype with the numerically smallest priority will be used. Ties are broken in favor of the first-found subtype.") (defun nndoc-generate-nsf-summary-head (article) "Generate headers for an NSF digest article" (let ((entry (cdr (assq article nndoc-dissection-alist))) best-subtype subtype prefix subject from date here) (save-excursion (set-buffer nndoc-current-buffer) (save-restriction (narrow-to-region (car entry) (nth 3 entry)) (goto-char (point-min)) (end-of-line) (setq date (buffer-substring (point-min) (point))) (skip-chars-forward "\n \t") (setq here (point)) (end-of-line) (setq subject (buffer-substring here (point))) (re-search-forward "Type ?: *" nil t) (setq here (point)) (end-of-line) (setq from (buffer-substring here (point))) (while (re-search-forward "Subtype ?: *" nil t) (setq here (point)) (end-of-line) (setq subtype (cdr (assoc (buffer-substring here (point)) nndoc-nsf-summary-subtype-alist))) (when subtype (if (not (consp subtype)) (setq subtype (cons subtype "9999"))) (if (or (not best-subtype) (< (cdr subtype) (cdr best-subtype))) (setq best-subtype subtype)) )) (if best-subtype (setq prefix (car best-subtype)) (setq prefix " ")) (setq subject (concat prefix " " subject)) )) (insert "From: " from "\n" "Subject: " subject "\n" "Date: " date "\n") )) (defun nndoc-transform-nsf-summary (article) "Transform an NSF summary article so that it matches nndoc expectations." (let (date from subject here) (goto-char (point-min)) (end-of-line) (setq date (buffer-substring (point-min) (point))) (skip-chars-forward "\n \t") (delete-region (point-min) (point)) (end-of-line) (setq subject (buffer-substring (point-min) (point))) (skip-chars-forward "\n \t") (delete-region (point-min) (point)) (re-search-forward "Type ?: *") (setq here (point)) (end-of-line) (setq from (buffer-substring here (point))) (goto-char (point-min)) (while (re-search-forward "^[ \t]+" nil t) (replace-match "" nil nil)) (goto-char (point-min)) (insert "From: " from "\n" "Subject: " subject "\n" "Date: " date "\n" "\n") )) (nndoc-add-type `(nsf-summary (article-begin-function . nndoc-nsf-summary-article-begin) (head-begin-function . nndoc-nsf-summary-head-begin) (body-end-function . nndoc-nsf-summary-body-end) (file-end . "^====") (generate-head-function . nndoc-generate-nsf-summary-head) (article-transform-function . nndoc-transform-nsf-summary) ))