diff --git a/README.org b/README.org index d1105fd..adaac75 100644 --- a/README.org +++ b/README.org @@ -3197,6 +3197,50 @@ Listing [[lst:word-games]] defines the =anagram-p= function that migth be used f better exploitation of [[https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines][Wikipedia's list of common misspellings for machines]] in the future. +#+caption[Better parsing of the common misspellings list]: +#+caption: Better parsing of the common misspellings list. +#+name: lst:better-misspellings +#+begin_src emacs-lisp + (with-eval-after-load 'emacs + (defun browse-common-misspellings () + "Open the Wikipedia page of common misspellings for machines in EWW." + (interactive) + (eww (concat "https://en.wikipedia.org/wiki/Wikipedia" + ":Lists_of_common_misspellings/For_machines"))) + + (defun wordnet-check (word) + "Check whether WORD occurs in the wordnet lexical database." + (cl-destructuring-bind (exit-code output) + (shell-command-with-exit-code "wn" word "-grepn" "-grepv" "-grepa" "-grepr") + (if (= 0 exit-code) + (< 0 (length (string-trim output))) + (error "%s" (string-trim output))))) + + (defun better-misspellings () + "Pick the best from each common misspelling multiple choice of corrections." + (interactive) + (let (start end lines) + (save-excursion + (if (re-search-forward (rx bol "abandon")) + (setq start (match-beginning 0)) + (user-error "Failed to find beginning of misspellings")) + (if (re-search-forward (rx bol "Retrieved from")) + (setq end (match-beginning 0)) + (user-error "Failed to find end of misspellings"))) + (dolist (line (split-string + (buffer-substring-no-properties start end) "\n")) + (pcase-let ((`(,lhs . (,rhs . ,rest)) (split-string line "->"))) + (when (and (stringp rhs) + (string-match ", " rhs)) + (message "%s -> %s" lhs rhs) + (let ((words (split-string rhs ", ")) + (stack)) + (dolist (word words) + (when (wordnet-check word) + (message "wordnet finds: %s" word) + (push word stack)))))))))) +#+end_src + #+caption[Word games]: #+caption: Word games. #+name: lst:word-games