emacs/lisp/titlecase.el

158 lines
7.2 KiB
EmacsLisp
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;;; titlecase.el -*- lexical-binding: t; -*-
;; https://hungyi.net/posts/programmers-way-to-title-case/
(require 'cl-lib)
(require 'subr-x)
;;;###autoload
(defun titlecase-string (str)
"Convert string STR to title case and return the resulting string."
(let* ((case-fold-search nil)
(str-length (length str))
;; A list of markers that indicate start of a new phrase within the
;; title, e.g. "The Lonely Reindeer: A Christmas Story"
;; must be followed by one of word-boundary-chars
(new-phrase-chars '(?: ?. ?? ?\; ?\n ?\r))
;; immediately triggers new phrase behavior without waiting for word
;; boundary
(immediate-new-phrase-chars '(?\n ?\r))
;; A list of characters that indicate "word boundaries"; used to split
;; the title into processable segments
(word-boundary-chars (append '(? ? ?— ?- ? ?/)
immediate-new-phrase-chars))
;; A list of small words that should not be capitalized (in the right
;; conditions)
(small-words '("a" "an" "and" "as" "at" "but" "by" "en" "for" "if"
"in" "of" "on" "or" "the" "to" "v" "v." "vs" "vs."
"via"))
;; Fix if str is ALL CAPS
(str (if (string-match-p "[a-z]" str) str (downcase str)))
;; Reduce over a state machine to do title casing
(final-state
(cl-reduce
(lambda (state char)
(let* ((result (aref state 0))
(last-segment (aref state 1))
(first-word-p (aref state 2))
(was-in-path-p (aref state 3))
(last-char (car last-segment))
(in-path-p (or (and (eq char ?/)
(or (not last-segment)
(member last-char '(?. ?~))))
(and was-in-path-p
(not
(or (eq char ? )
(member
char
immediate-new-phrase-chars))))))
(end-p
;; are we at the end of the input string?
(eq (+ (length result) (length last-segment) 1)
str-length))
(pop-p
;; do we need to pop a segment onto the output result?
(or end-p (and (not in-path-p)
(member char word-boundary-chars))))
(segment
;; add the current char to the current segment
(cons char last-segment))
(segment-string
;; the readable version of the segment
(apply #'string (reverse segment)))
(small-word-p
;; was the last segment a small word?
(member (downcase (substring segment-string 0 -1))
small-words))
(capitalize-p
;; do we need to capitalized this segment or lowercase it?
(or end-p first-word-p (not small-word-p)))
(ignore-segment-p
;; ignore explicitly capitalized segments
(or (string-match-p "[a-zA-Z].*[A-Z]" segment-string)
;; ignore URLs
(string-match-p "^https?:" segment-string)
;; ignore hostnames and namespaces.like.this
(string-match-p "\\w\\.\\w" segment-string)
;; ignore windows filesystem paths
(string-match-p "^[A-Za-z]:\\\\" segment-string)
;; ignore unix filesystem paths
was-in-path-p
;; ignore email addresses and user handles with @ symbol
(member ?@ segment)))
(next-result
(if pop-p
(concat result
(if ignore-segment-p
;; pop segment onto the result without
;; processing
segment-string
;; titlecase the segment before popping onto
;; result
(titlecase--segment
segment-string capitalize-p)))
result))
(next-segment
(unless pop-p segment))
(will-be-first-word-p
(if pop-p
(or (not last-segment)
(member last-char new-phrase-chars)
(member char immediate-new-phrase-chars))
first-word-p)))
(vector
next-result next-segment will-be-first-word-p in-path-p)))
str
:initial-value
(vector nil ; result stack
nil ; current working segment
t ; is it the first word of a phrase?
nil)))) ; are we inside of a filesystem path?
(aref final-state 0)))
(defun titlecase--segment (segment capitalize-p)
"Convert a title's inner SEGMENT to capitalized or lower case
depending on CAPITALIZE-P, then return the result."
(let* ((case-fold-search nil)
(ignore-chars '(?' ?\" ?\( ?\[ ? ?“ ? ?” ?_))
(final-state
(cl-reduce
(lambda (state char)
(let ((result (aref state 0))
(downcase-p (aref state 1)))
(cond
(downcase-p
;; already upcased start of segment, so lowercase the rest
(vector (cons (downcase char) result) t))
((member char ignore-chars)
;; check if start char of segment needs to be ignored
(vector (cons char result) downcase-p))
(t
;; haven't upcased yet, and we can, so do it
(vector (cons (upcase char) result) t)))))
segment
:initial-value (vector nil (not capitalize-p)))))
(thread-last (aref final-state 0)
(reverse)
(apply #'string))))
;;;###autoload
(defun titlecase-region (begin end)
"Convert text in region from BEGIN to END to title case."
(interactive "*r")
(let ((pt (point)))
(insert (titlecase-string (delete-and-extract-region begin end)))
(goto-char pt)))
;;;###autoload
(defun titlecase-dwim ()
"Convert the region or current line to title case.
If Transient Mark Mode is on and there is an active region, convert
the region to title case. Otherwise, work on the current line."
(interactive)
(if (and transient-mark-mode mark-active)
(titlecase-region (region-beginning) (region-end))
(titlecase-region (point-at-bol) (point-at-eol))))
(provide 'titlecase)