Start better parsing of common misspellings list

This commit is contained in:
Gerard Vermeulen 2022-09-18 12:47:54 +02:00
parent a4be679c4b
commit 7ae70fca70

View File

@ -3197,6 +3197,50 @@ Listing [[lst:word-games]] defines the =anagram-p= function that migth be used f
better exploitation of [[https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines][Wikipedia's list of common misspellings for machines]] in
the future.
#+caption[Better parsing of the common misspellings list]:
#+caption: Better parsing of the common misspellings list.
#+name: lst:better-misspellings
#+begin_src emacs-lisp
(with-eval-after-load 'emacs
(defun browse-common-misspellings ()
"Open the Wikipedia page of common misspellings for machines in EWW."
(interactive)
(eww (concat "https://en.wikipedia.org/wiki/Wikipedia"
":Lists_of_common_misspellings/For_machines")))
(defun wordnet-check (word)
"Check whether WORD occurs in the wordnet lexical database."
(cl-destructuring-bind (exit-code output)
(shell-command-with-exit-code "wn" word "-grepn" "-grepv" "-grepa" "-grepr")
(if (= 0 exit-code)
(< 0 (length (string-trim output)))
(error "%s" (string-trim output)))))
(defun better-misspellings ()
"Pick the best from each common misspelling multiple choice of corrections."
(interactive)
(let (start end lines)
(save-excursion
(if (re-search-forward (rx bol "abandon"))
(setq start (match-beginning 0))
(user-error "Failed to find beginning of misspellings"))
(if (re-search-forward (rx bol "Retrieved from"))
(setq end (match-beginning 0))
(user-error "Failed to find end of misspellings")))
(dolist (line (split-string
(buffer-substring-no-properties start end) "\n"))
(pcase-let ((`(,lhs . (,rhs . ,rest)) (split-string line "->")))
(when (and (stringp rhs)
(string-match ", " rhs))
(message "%s -> %s" lhs rhs)
(let ((words (split-string rhs ", "))
(stack))
(dolist (word words)
(when (wordnet-check word)
(message "wordnet finds: %s" word)
(push word stack))))))))))
#+end_src
#+caption[Word games]:
#+caption: Word games.
#+name: lst:word-games