158 lines
7.2 KiB
EmacsLisp
158 lines
7.2 KiB
EmacsLisp
;;; titlecase.el -*- lexical-binding: t; -*-
|
||
|
||
;; https://hungyi.net/posts/programmers-way-to-title-case/
|
||
|
||
(require 'cl-lib)
|
||
(require 'subr-x)
|
||
|
||
;;;###autoload
|
||
(defun titlecase-string (str)
|
||
"Convert string STR to title case and return the resulting string."
|
||
(let* ((case-fold-search nil)
|
||
(str-length (length str))
|
||
;; A list of markers that indicate start of a new phrase within the
|
||
;; title, e.g. "The Lonely Reindeer: A Christmas Story"
|
||
;; must be followed by one of word-boundary-chars
|
||
(new-phrase-chars '(?: ?. ?? ?\; ?\n ?\r))
|
||
;; immediately triggers new phrase behavior without waiting for word
|
||
;; boundary
|
||
(immediate-new-phrase-chars '(?\n ?\r))
|
||
;; A list of characters that indicate "word boundaries"; used to split
|
||
;; the title into processable segments
|
||
(word-boundary-chars (append '(? ?– ?— ?- ?‑ ?/)
|
||
immediate-new-phrase-chars))
|
||
;; A list of small words that should not be capitalized (in the right
|
||
;; conditions)
|
||
(small-words '("a" "an" "and" "as" "at" "but" "by" "en" "for" "if"
|
||
"in" "of" "on" "or" "the" "to" "v" "v." "vs" "vs."
|
||
"via"))
|
||
;; Fix if str is ALL CAPS
|
||
(str (if (string-match-p "[a-z]" str) str (downcase str)))
|
||
;; Reduce over a state machine to do title casing
|
||
(final-state
|
||
(cl-reduce
|
||
(lambda (state char)
|
||
(let* ((result (aref state 0))
|
||
(last-segment (aref state 1))
|
||
(first-word-p (aref state 2))
|
||
(was-in-path-p (aref state 3))
|
||
(last-char (car last-segment))
|
||
(in-path-p (or (and (eq char ?/)
|
||
(or (not last-segment)
|
||
(member last-char '(?. ?~))))
|
||
(and was-in-path-p
|
||
(not
|
||
(or (eq char ? )
|
||
(member
|
||
char
|
||
immediate-new-phrase-chars))))))
|
||
(end-p
|
||
;; are we at the end of the input string?
|
||
(eq (+ (length result) (length last-segment) 1)
|
||
str-length))
|
||
(pop-p
|
||
;; do we need to pop a segment onto the output result?
|
||
(or end-p (and (not in-path-p)
|
||
(member char word-boundary-chars))))
|
||
(segment
|
||
;; add the current char to the current segment
|
||
(cons char last-segment))
|
||
(segment-string
|
||
;; the readable version of the segment
|
||
(apply #'string (reverse segment)))
|
||
(small-word-p
|
||
;; was the last segment a small word?
|
||
(member (downcase (substring segment-string 0 -1))
|
||
small-words))
|
||
(capitalize-p
|
||
;; do we need to capitalized this segment or lowercase it?
|
||
(or end-p first-word-p (not small-word-p)))
|
||
(ignore-segment-p
|
||
;; ignore explicitly capitalized segments
|
||
(or (string-match-p "[a-zA-Z].*[A-Z]" segment-string)
|
||
;; ignore URLs
|
||
(string-match-p "^https?:" segment-string)
|
||
;; ignore hostnames and namespaces.like.this
|
||
(string-match-p "\\w\\.\\w" segment-string)
|
||
;; ignore windows filesystem paths
|
||
(string-match-p "^[A-Za-z]:\\\\" segment-string)
|
||
;; ignore unix filesystem paths
|
||
was-in-path-p
|
||
;; ignore email addresses and user handles with @ symbol
|
||
(member ?@ segment)))
|
||
(next-result
|
||
(if pop-p
|
||
(concat result
|
||
(if ignore-segment-p
|
||
;; pop segment onto the result without
|
||
;; processing
|
||
segment-string
|
||
;; titlecase the segment before popping onto
|
||
;; result
|
||
(titlecase--segment
|
||
segment-string capitalize-p)))
|
||
result))
|
||
(next-segment
|
||
(unless pop-p segment))
|
||
(will-be-first-word-p
|
||
(if pop-p
|
||
(or (not last-segment)
|
||
(member last-char new-phrase-chars)
|
||
(member char immediate-new-phrase-chars))
|
||
first-word-p)))
|
||
(vector
|
||
next-result next-segment will-be-first-word-p in-path-p)))
|
||
str
|
||
:initial-value
|
||
(vector nil ; result stack
|
||
nil ; current working segment
|
||
t ; is it the first word of a phrase?
|
||
nil)))) ; are we inside of a filesystem path?
|
||
(aref final-state 0)))
|
||
|
||
(defun titlecase--segment (segment capitalize-p)
|
||
"Convert a title's inner SEGMENT to capitalized or lower case
|
||
depending on CAPITALIZE-P, then return the result."
|
||
(let* ((case-fold-search nil)
|
||
(ignore-chars '(?' ?\" ?\( ?\[ ?‘ ?“ ?’ ?” ?_))
|
||
(final-state
|
||
(cl-reduce
|
||
(lambda (state char)
|
||
(let ((result (aref state 0))
|
||
(downcase-p (aref state 1)))
|
||
(cond
|
||
(downcase-p
|
||
;; already upcased start of segment, so lowercase the rest
|
||
(vector (cons (downcase char) result) t))
|
||
((member char ignore-chars)
|
||
;; check if start char of segment needs to be ignored
|
||
(vector (cons char result) downcase-p))
|
||
(t
|
||
;; haven't upcased yet, and we can, so do it
|
||
(vector (cons (upcase char) result) t)))))
|
||
segment
|
||
:initial-value (vector nil (not capitalize-p)))))
|
||
(thread-last (aref final-state 0)
|
||
(reverse)
|
||
(apply #'string))))
|
||
|
||
;;;###autoload
|
||
(defun titlecase-region (begin end)
|
||
"Convert text in region from BEGIN to END to title case."
|
||
(interactive "*r")
|
||
(let ((pt (point)))
|
||
(insert (titlecase-string (delete-and-extract-region begin end)))
|
||
(goto-char pt)))
|
||
|
||
;;;###autoload
|
||
(defun titlecase-dwim ()
|
||
"Convert the region or current line to title case.
|
||
If Transient Mark Mode is on and there is an active region, convert
|
||
the region to title case. Otherwise, work on the current line."
|
||
(interactive)
|
||
(if (and transient-mark-mode mark-active)
|
||
(titlecase-region (region-beginning) (region-end))
|
||
(titlecase-region (point-at-bol) (point-at-eol))))
|
||
|
||
(provide 'titlecase)
|