Change "event" to "interval", use "ht" for hash-table functions

This commit is contained in:
contrapunctus 2022-02-20 01:09:09 +05:30
parent 73477552cc
commit a911dbf16e
2 changed files with 89 additions and 92 deletions

View File

@ -217,10 +217,10 @@ Return new position of point."
;; [[file:chronometrist.org::*current-task][current-task:1]]
(cl-defun chronometrist-current-task (&optional (backend (chronometrist-active-backend)))
"Return the name of the active task as a string, or nil if not clocked in."
(let ((last-event (chronometrist-latest-record backend)))
(if (plist-member last-event :stop)
(let ((last-interval (chronometrist-latest-record backend)))
(if (plist-member last-interval :stop)
nil
(plist-get last-event :name))))
(plist-get last-interval :name))))
;; current-task:1 ends here
;; [[file:chronometrist.org::*install-directory][install-directory:1]]
@ -305,12 +305,12 @@ Return value is a ts struct (see `ts.el')."
((&plist :start start-2 :stop stop-2) (cl-second split-time))
;; `plist-put' modifies lists in-place. The resulting bugs
;; left me puzzled for a while.
(event-1 (cl-copy-list plist))
(event-2 (cl-copy-list plist)))
(list (-> event-1
(interval-1 (cl-copy-list plist))
(interval-2 (cl-copy-list plist)))
(list (-> interval-1
(plist-put :start start-1)
(plist-put :stop stop-1))
(-> event-2
(-> interval-2
(plist-put :start start-2)
(plist-put :stop stop-2))))))))
;; split-plist:1 ends here
@ -322,8 +322,8 @@ If REPLACE is non-nil, replace the last interval with PLIST."
(let* ((date (->> (plist-get plist :start)
(chronometrist-iso-to-ts )
(ts-format "%F" )))
(events-today (gethash date hash-table)))
(--> (if replace (-drop-last 1 events-today) events-today)
(intervals-today (gethash date hash-table)))
(--> (if replace (-drop-last 1 intervals-today) intervals-today)
(append it (list plist))
(puthash date it hash-table))
hash-table))
@ -331,23 +331,22 @@ If REPLACE is non-nil, replace the last interval with PLIST."
;; [[file:chronometrist.org::*ht-last-date][ht-last-date:1]]
(defun chronometrist-ht-last-date (hash-table)
"Return an ISO-8601 date string for the latest date present in `chronometrist-events'."
"Return an ISO-8601 date string for the latest date present in HASH-TABLE."
(--> (hash-table-keys hash-table)
(last it)
(car it)))
(cl-first it)))
;; ht-last-date:1 ends here
;; [[file:chronometrist.org::*ht-last][ht-last:1]]
(cl-defun chronometrist-ht-last (&optional (backend (chronometrist-active-backend)))
"Return the last plist from `chronometrist-events'."
(let* ((hash-table (chronometrist-backend-hash-table backend))
(last-date (chronometrist-ht-last-date hash-table)))
(--> (gethash last-date hash-table)
(last it)
(car it))))
(cl-defun chronometrist-ht-last (&optional hash-table)
"Return the last plist from HASH-TABLE."
(--> (chronometrist-ht-last-date hash-table)
(gethash it hash-table)
(last it)
(cl-first it)))
;; ht-last:1 ends here
;; [[file:chronometrist.org::#program-data-structures-events-subset][ht-subset:1]]
;; [[file:chronometrist.org::#program-data-structures-ht-subset][ht-subset:1]]
(defun chronometrist-ht-subset (start end hash-table)
"Return a subset of HASH-TABLE.
The subset will contain values between dates START and END (both
@ -1013,8 +1012,10 @@ OLD-PATH and NEW-PATH are the old and new values of
(cl-defgeneric chronometrist-to-hash-table (backend)
"Return data in BACKEND as a hash table in chronological order.
Hash table keys are ISO-8601 date strings. Hash table values are
lists of records, represented by plists. Both hash table keys and
hash table values must be in chronological order.")
lists of intervals (plists), optionally preceded by keyword-value
pairs. Both hash table keys and hash table values must be in
chronological order. Intervals in hash table values must not span
across days.")
;; to-hash-table:1 ends here
;; [[file:chronometrist.org::*to-list][to-list:1]]
@ -1058,7 +1059,8 @@ hash table values must be in chronological order.")
:documentation "Full path to backend file, with extension.")
(hash-table :initform (chronometrist-make-hash-table)
:initarg :hash-table
:accessor chronometrist-backend-hash-table)
:accessor chronometrist-backend-hash-table
:documentation "Hash table as returned by `chronometrist-to-hash-table'.")
(file-watch :initform nil
:initarg :file-watch
:accessor chronometrist-backend-file-watch
@ -1532,7 +1534,7 @@ This is meant to be run in `chronometrist-file' when using an s-expression backe
`chronometrist-plist-backend' file is modified."
(with-slots (hash-table) backend
(-let (((new-plist &as &plist :name new-task) (chronometrist-latest-record backend))
((&plist :name old-task) (chronometrist-ht-last backend)))
((&plist :name old-task) (chronometrist-ht-last hash-table)))
(setf hash-table (chronometrist-ht-update new-plist hash-table t))
(chronometrist-remove-from-task-list old-task backend)
(chronometrist-add-to-task-list new-task backend))))
@ -1543,7 +1545,7 @@ This is meant to be run in `chronometrist-file' when using an s-expression backe
"Function run when the newest plist in a
`chronometrist-plist-backend' file is deleted."
(with-slots (hash-table) backend
(-let (((&plist :name old-task) (chronometrist-ht-last))
(-let (((&plist :name old-task) (chronometrist-ht-last hash-table))
(date (chronometrist-ht-last-date hash-table)))
;; `chronometrist-remove-from-task-list' checks the hash table to determine
;; if `chronometrist-task-list' is to be updated. Thus, the hash table must

View File

@ -81,7 +81,7 @@ One of the earliest 'optimizations' of great importance turned out to simply be
*** Preserve hash table state for some commands
NOTE - this has been replaced with a more general optimization - see next section.
The next one was released in v0.5. Till then, any time the [[* chronometrist-file][=chronometrist-file=]] was modified, we'd clear the =chronometrist-events= hash table and read data into it again. The reading itself is nearly-instant, even with ~2 years' worth of data [fn:2] (it uses Emacs' [[elisp:(describe-function 'read)][=read=]], after all), but the splitting of [[#explanation-midnight-spanning-intervals][midnight-spanning events]] is the real performance killer.
The next one was released in v0.5. Till then, any time the [[* chronometrist-file][=chronometrist-file=]] was modified, we'd clear the hash table and read data into it again. The reading itself is nearly-instant, even with ~2 years' worth of data [fn:2] (it uses Emacs' [[elisp:(describe-function 'read)][=read=]], after all), but the splitting of [[#explanation-midnight-spanning-intervals][midnight-spanning intervals]] is the real performance killer.
After the optimization...
1. Two backend functions (=chronometrist-sexp-new= and =chronometrist-sexp-replace-last=) were modified to set a flag (=chronometrist--inhibit-read-p=) before saving the file.
@ -93,14 +93,14 @@ After the optimization...
There are still some operations which [[* refresh-file][=chronometrist-refresh-file=]] runs unconditionally - which is to say there is scope for further optimization, if or when required.
[fn:2] As indicated by exploratory work in the =parsimonious-reading= branch, where I made a loop to only =read= and collect s-expressions from the file. It was near-instant...until I added event splitting to it.
[fn:2] As indicated by exploratory work in the =parsimonious-reading= branch, where I made a loop to only =read= and collect s-expressions from the file. It was near-instant...until I added interval splitting to it.
*** Determine type of change made to file
Most changes, whether made through user-editing or by Chronometrist commands, happen at the end of the file. We try to detect the kind of change made - whether the last expression was modified, removed, or whether a new expression was added to the end - and make the corresponding change to =chronometrist-events=, instead of doing a full parse again (=chronometrist-events-populate=). The increase in responsiveness has been significant.
Most changes, whether made through user-editing or by Chronometrist commands, happen at the end of the file. We try to detect the kind of change made - whether the last expression was modified, removed, or whether a new expression was added to the end - and make the corresponding change to the hash table, instead of doing a full parse again (=chronometrist-to-hash-table=). The increase in responsiveness has been significant.
When =chronometrist-refresh-file= is run by the file system watcher, it uses =chronometrist-file-hash= to assign indices and a hash to =chronometrist--file-state=. The next time the file changes, =chronometrist-file-change-type= compares this state to the current state of the file to determine the type of change made.
Challenges -
**** Challenges
1. Correctly detecting the type of change
2. Updating =chronometrist-task-list= and the Chronometrist buffer, when a new task is added or the last interval for a task is removed (v0.6.4)
3. Handling changes made to an active interval after midnight
@ -109,7 +109,7 @@ Challenges -
* =:modify= - normally, replace in table; for spanning intervals, split and replace
* =:remove= - normally, remove from table; for spanning intervals, split and remove
Effects on the task list
**** Effects on the task list
1. When a plist is added, the =:name= might be new, in which case we need to add it to the task list.
2. When the last plist is modified, the =:name= may have changed -
1. the =:name= might be new and require addition to the task list.
@ -118,10 +118,10 @@ Effects on the task list
** Midnight-spanning intervals
:PROPERTIES:
:DESCRIPTION: Events starting on one day and ending on another
:DESCRIPTION: Intervals starting on one day and ending on another
:CUSTOM_ID: explanation-midnight-spanning-intervals
:END:
A unique problem in working with Chronometrist, one I had never foreseen, was tasks which start on one day and end on another. For instance, you start working on something at =2021-01-01 23:00= hours and stop on =2021-01-02 01:00=.
A unique problem in working with Chronometrist, one I had never foreseen, was intervals which start on one day and end on another. For instance, you start working on something at =2021-01-01 23:00= hours and stop on =2021-01-02 01:00=.
These mess up data consumption in all sorts of unforeseen ways, especially interval calculations and acquiring intervals for a specific date. In case of two of the most common operations throughout the program -
1. finding the intervals recorded on a given date
@ -129,21 +129,21 @@ These mess up data consumption in all sorts of unforeseen ways, especially inter
There are a few different approaches of dealing with them. (Currently, Chronometrist uses #3.)
*** Check the code of the first event of the day (timeclock format)
*** Check the code of the first interval of the day (timeclock format)
:PROPERTIES:
:DESCRIPTION: When the code of the first event in the day is "o", it's a midnight-spanning event.
:DESCRIPTION: When the code of the first interval in the day is "o", it's a midnight-spanning interval.
:END:
+ Advantage - very simple to detect
+ Disadvantage - "in" and "out" events must be represented separately
+ Disadvantage - "in" and "out" intervals must be represented separately
*** Split them at the file level
+ Advantage - operation is performed only once for each such event + simpler data-consuming code + reduced post-parsing load.
+ What happens when the user changes their day-start-time? The split-up events are now split wrongly, and the second event may get split /again./
+ Advantage - operation is performed only once for each such interval + simpler data-consuming code + reduced post-parsing load.
+ What happens when the user changes their day-start-time? The split-up intervals are now split wrongly, and the second interval may get split /again./
Possible solutions -
1. Add function to check if, for two events A and B, the =:stop= of A is the same as the =:start= of B, and that all their other tags are identical. Then we can re-split them according to the new day-start-time.
1. Add function to check if, for two intervals A and B, the =:stop= of A is the same as the =:start= of B, and that all their other tags are identical. Then we can re-split them according to the new day-start-time.
+ Implemented as [[#program-data-structures-plists-split-p][plist-split-p]]
2. Add a =:split= tag to split events. It can denote that the next event was originally a part of this one.
2. Add a =:split= tag to split intervals. It can denote that the next interval was originally a part of this one.
3. Re-check and update the file when the day-start-time changes.
- Possible with ~add-variable-watcher~ or ~:custom-set~ in Customize (thanks bpalmer)
@ -153,7 +153,7 @@ This strategy is implemented in the [[#program-backend-plist-group][plist-group]
Handled by ~chronometrist-sexp-events-populate~
+ Advantage - simpler data-consuming code.
*** Split them at the data-consumer level (e.g. when calculating time for one day/getting events for one day)
*** Split them at the data-consumer level (e.g. when calculating time for one day/getting intervals for one day)
+ Advantage - reduced repetitive post-parsing load.
** Point restore behaviour
@ -254,18 +254,16 @@ Further details are stored in properties -
:END:
*** ts
ts.el struct
=ts.el= struct
* Used by nearly all internal functions
*** iso-timestamp
="YYYY-MM-DDTHH:MM:SSZ"=
* Used in the s-expression file format
* Read by chronometrist-sexp-events-populate
* Used in the plists in the chronometrist-events hash table values
* Used for intervals
*** iso-date
="YYYY-MM-DD"=
* Used as hash table keys in chronometrist-events - can't use ts structs for keys, you'd have to make a hash table predicate which uses ts=
* Used as hash table keys - can't use =ts= structs for keys, you'd have to make a hash table predicate which uses =ts==
*** seconds
integer seconds as duration
@ -538,7 +536,6 @@ If FIRSTONLY is non-nil, return only the first keybinding found."
#+END_SRC
*** day-start-time :custom:variable:
=chronometrist-events-maybe-split= refers to this, but I'm not sure this has the desired effect at the moment—haven't even tried using it.
#+BEGIN_SRC emacs-lisp
(defcustom chronometrist-day-start-time "00:00:00"
"The time at which a day is considered to start, in \"HH:MM:SS\".
@ -689,10 +686,10 @@ Return new position of point."
#+BEGIN_SRC emacs-lisp
(cl-defun chronometrist-current-task (&optional (backend (chronometrist-active-backend)))
"Return the name of the active task as a string, or nil if not clocked in."
(let ((last-event (chronometrist-latest-record backend)))
(if (plist-member last-event :stop)
(let ((last-interval (chronometrist-latest-record backend)))
(if (plist-member last-interval :stop)
nil
(plist-get last-event :name))))
(plist-get last-interval :name))))
#+END_SRC
*** install-directory :variable:
@ -751,12 +748,6 @@ FORMAT-STRING and ARGS are passed to `format'."
:PROPERTIES:
:CUSTOM_ID: program-data-structures
:END:
Reading directly from the file could be difficult, especially when your most common query is "get all intervals recorded on <date>" [fn:4] - and so, we maintain the hash table =chronometrist-events=, where each key is a date in the ISO-8601 format. The plists in this hash table are free of [[#explanation-midnight-spanning-intervals][midnight-spanning intervals]], making code which consumes it easier to write.
The data from =chronometrist-events= is used by most (all?) interval-consuming functions, but is never written to the user's file itself.
[fn:4] it might be the case that the [[#program-backend][file format]] is not suited to our most frequent operation...
*** reset :command:
#+BEGIN_SRC emacs-lisp
(defun chronometrist-reset ()
@ -802,12 +793,12 @@ Return value is a ts struct (see `ts.el')."
((&plist :start start-2 :stop stop-2) (cl-second split-time))
;; `plist-put' modifies lists in-place. The resulting bugs
;; left me puzzled for a while.
(event-1 (cl-copy-list plist))
(event-2 (cl-copy-list plist)))
(list (-> event-1
(interval-1 (cl-copy-list plist))
(interval-2 (cl-copy-list plist)))
(list (-> interval-1
(plist-put :start start-1)
(plist-put :stop stop-1))
(-> event-2
(-> interval-2
(plist-put :start start-2)
(plist-put :stop stop-2))))))))
#+END_SRC
@ -844,8 +835,8 @@ If REPLACE is non-nil, replace the last interval with PLIST."
(let* ((date (->> (plist-get plist :start)
(chronometrist-iso-to-ts )
(ts-format "%F" )))
(events-today (gethash date hash-table)))
(--> (if replace (-drop-last 1 events-today) events-today)
(intervals-today (gethash date hash-table)))
(--> (if replace (-drop-last 1 intervals-today) intervals-today)
(append it (list plist))
(puthash date it hash-table))
hash-table))
@ -854,21 +845,20 @@ If REPLACE is non-nil, replace the last interval with PLIST."
*** ht-last-date :reader:
#+BEGIN_SRC emacs-lisp
(defun chronometrist-ht-last-date (hash-table)
"Return an ISO-8601 date string for the latest date present in `chronometrist-events'."
"Return an ISO-8601 date string for the latest date present in HASH-TABLE."
(--> (hash-table-keys hash-table)
(last it)
(car it)))
(cl-first it)))
#+END_SRC
*** ht-last :reader:
#+BEGIN_SRC emacs-lisp
(cl-defun chronometrist-ht-last (&optional (backend (chronometrist-active-backend)))
"Return the last plist from `chronometrist-events'."
(let* ((hash-table (chronometrist-backend-hash-table backend))
(last-date (chronometrist-ht-last-date hash-table)))
(--> (gethash last-date hash-table)
(last it)
(car it))))
(cl-defun chronometrist-ht-last (&optional hash-table)
"Return the last plist from HASH-TABLE."
(--> (chronometrist-ht-last-date hash-table)
(gethash it hash-table)
(last it)
(cl-first it)))
#+END_SRC
*** ht-subset :reader:
@ -899,12 +889,12 @@ treated as though their time is 00:00:00."
(cl-defun chronometrist-task-time-one-day (task &optional (date (chronometrist-date-ts)) (backend (chronometrist-active-backend)))
"Return total time spent on TASK today or on DATE, an ISO-8601 date.
The return value is seconds, as an integer."
(let ((task-events (chronometrist-task-records-for-date backend task date)))
(if task-events
(->> (chronometrist-plists-to-durations task-events)
(let ((task-intervals (chronometrist-task-records-for-date backend task date)))
(if task-intervals
(->> (chronometrist-plists-to-durations task-intervals)
(-reduce #'+)
(truncate))
;; no events for this task on DATE, i.e. no time spent
;; no intervals for this task on DATE, i.e. no time spent
0)))
#+END_SRC
@ -921,16 +911,16 @@ Return value is seconds as an integer."
*** count-active-days :function:
#+BEGIN_SRC emacs-lisp
(cl-defun chronometrist-statistics-count-active-days (task table)
(cl-defun chronometrist-count-active-days (task hash-table)
"Return the number of days the user spent any time on TASK.
TABLE must be a hash table - if not supplied, `chronometrist-events' is used.
HASH-TABLE must be a hash table as returned by `chronometrist-to-hash-table'.
This will not return correct results if TABLE contains records
which span midnights."
(cl-loop for events being the hash-values of table
This will not return correct results if HASH-TABLE contains
records which span midnights."
(cl-loop for intervals being the hash-values of hash-table
count (seq-find (lambda (event)
(equal task (plist-get event :name)))
events)))
intervals)))
#+END_SRC
*** task-list :variable:
@ -1039,12 +1029,11 @@ TS should be a ts struct (see `ts.el')."
Optional argument UNIX-TIME should be a time value (see
`current-time') accepted by `format-time-string'."
(format-time-string "%FT%T%z" unix-time))
;; Note - this assumes that an event never crosses >1 day. This seems
;; sufficient for all conceivable cases.
#+END_SRC
*** FIXME split-time :reader:
Note - this assumes that an event never crosses >1 day. This seems sufficient for all conceivable cases.
It does not matter here that the =:stop= dates in the returned plists are different from the =:start=, because =chronometrist-events-populate= uses only the date segment of the =:start= values as hash table keys. (The hash table keys form the rest of the program's notion of "days", and that of which plists belong to which day.)
#+BEGIN_SRC emacs-lisp
@ -1061,13 +1050,13 @@ Return a list in the form
(:start <day start time on second day>
:stop STOP-TIME))"
;; FIXME - time zones are ignored; may cause issues with
;; time-zone-spanning events
;; time-zone-spanning intervals
;; The time on which the first provided day starts (according to `chronometrist-day-start-time')
(let* ((stop-ts (chronometrist-iso-to-ts stop-time))
(first-day-start (chronometrist-apply-time day-start-time start-time))
(next-day-start (ts-adjust 'hour 24 first-day-start)))
;; Does the event stop time exceed the next day start time?
;; Does the interval stop time exceed the next day start time?
(when (ts< next-day-start stop-ts)
(let ((split-time (ts-format "%FT%T%z" next-day-start)))
(list `(:start ,start-time :stop ,split-time)
@ -1117,8 +1106,8 @@ SECONDS must be a positive integer."
*** interval :function:
#+BEGIN_SRC emacs-lisp
(defun chronometrist-interval (plist)
"Return the period of time covered by EVENT as a time value.
EVENT should be a plist (see `chronometrist-file')."
"Return the period of time covered by PLIST as a time value.
PLIST should be a plist (see `chronometrist-file')."
(let* ((start-ts (chronometrist-iso-to-ts (plist-get plist :start)))
(stop-iso (plist-get plist :stop))
;; Add a stop time if it does not exist.
@ -1703,8 +1692,10 @@ OLD-PATH and NEW-PATH are the old and new values of
(cl-defgeneric chronometrist-to-hash-table (backend)
"Return data in BACKEND as a hash table in chronological order.
Hash table keys are ISO-8601 date strings. Hash table values are
lists of records, represented by plists. Both hash table keys and
hash table values must be in chronological order.")
lists of intervals (plists), optionally preceded by keyword-value
pairs. Both hash table keys and hash table values must be in
chronological order. Intervals in hash table values must not span
across days.")
#+END_SRC
***** to-list :generic:function:
@ -1742,6 +1733,9 @@ These can be implemented in terms of the minimal protocol above.
:PROPERTIES:
:CUSTOM_ID: file-backend-mixin
:END:
Reading directly from the file could be difficult, especially when your most common query is "get all intervals recorded on <date>" - and so, each backend maintains a hash table, where each key is a date in the ISO-8601 format. The plists in this hash table are free of [[#explanation-midnight-spanning-intervals][midnight-spanning intervals]], making code which consumes it easier to write.
The data from the backend hash table is used by most (all?) interval-consuming functions, but is never written to the user's file itself.
#+BEGIN_SRC emacs-lisp
(defclass chronometrist-file-backend-mixin ()
@ -1764,7 +1758,8 @@ These can be implemented in terms of the minimal protocol above.
:documentation "Full path to backend file, with extension.")
(hash-table :initform (chronometrist-make-hash-table)
:initarg :hash-table
:accessor chronometrist-backend-hash-table)
:accessor chronometrist-backend-hash-table
:documentation "Hash table as returned by `chronometrist-to-hash-table'.")
(file-watch :initform nil
:initarg :file-watch
:accessor chronometrist-backend-file-watch
@ -2469,7 +2464,7 @@ This is meant to be run in `chronometrist-file' when using an s-expression backe
`chronometrist-plist-backend' file is modified."
(with-slots (hash-table) backend
(-let (((new-plist &as &plist :name new-task) (chronometrist-latest-record backend))
((&plist :name old-task) (chronometrist-ht-last backend)))
((&plist :name old-task) (chronometrist-ht-last hash-table)))
(setf hash-table (chronometrist-ht-update new-plist hash-table t))
(chronometrist-remove-from-task-list old-task backend)
(chronometrist-add-to-task-list new-task backend))))
@ -2481,7 +2476,7 @@ This is meant to be run in `chronometrist-file' when using an s-expression backe
"Function run when the newest plist in a
`chronometrist-plist-backend' file is deleted."
(with-slots (hash-table) backend
(-let (((&plist :name old-task) (chronometrist-ht-last))
(-let (((&plist :name old-task) (chronometrist-ht-last hash-table))
(date (chronometrist-ht-last-date hash-table)))
;; `chronometrist-remove-from-task-list' checks the hash table to determine
;; if `chronometrist-task-list' is to be updated. Thus, the hash table must
@ -4136,7 +4131,7 @@ It simply operates on the entire hash table TABLE (see
reduced to the desired range using
`chronometrist-ht-subset'."
(cl-loop for task in (chronometrist-task-list) collect
(let* ((active-days (chronometrist-statistics-count-active-days task table))
(let* ((active-days (chronometrist-count-active-days task table))
(active-percent (cl-case (plist-get chronometrist-statistics--ui-state :mode)
('week (* 100 (/ active-days 7.0)))))
(active-percent (if (zerop active-days)