Skip to content

Commit f5ca6a0

Browse files
committed
Drastically optimize queries by using a local index
1 parent de94f1d commit f5ca6a0

File tree

7 files changed

+104
-44
lines changed

7 files changed

+104
-44
lines changed

src/clojurians_log/application.clj

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
[system.components.endpoint :refer [new-endpoint]]
55
[clojurians-log.components.server-info :refer [server-info]]
66
[clojurians-log.components.datomic-schema :refer [new-datomic-schema]]
7+
[clojurians-log.components.indexer :refer [new-indexer]]
78
[system.components.handler :refer [new-handler]]
89
[system.components.middleware :refer [new-middleware]]
910
[system.components.http-kit :refer [new-web-server]]
@@ -38,7 +39,9 @@
3839
:server-info (server-info (:port http))
3940
:datomic (new-datomic-db (:uri datomic))
4041
:datomic-schema (-> (new-datomic-schema)
41-
(component/using [:datomic]))))
42+
(component/using [:datomic]))
43+
:indexer (-> (new-indexer)
44+
(component/using [:datomic]))))
4245

4346
(defn -main [& [config-file]]
4447
(let [conf (if (and config-file (.exists (io/file config-file)))
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
(ns clojurians-log.components.indexer
2+
(:require [com.stuartsierra.component :as component]
3+
[datomic.api :as d]
4+
[clojurians-log.db.queries :as queries]))
5+
6+
(defrecord Indexer [datomic]
7+
component/Lifecycle
8+
(start [this]
9+
(let [thread
10+
(Thread.
11+
(fn []
12+
(queries/build-indexes! (d/db (:conn datomic)))
13+
(Thread/sleep 3600)
14+
(recur)))]
15+
(.start thread)
16+
(assoc this :thread thread)))
17+
18+
(stop [this]
19+
(when-let [thread (:thread this)]
20+
(.interrupt thread))
21+
(dissoc this :thread)))
22+
23+
(defn new-indexer []
24+
(map->Indexer {}))

src/clojurians_log/components/server_info.clj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@
88
component)
99
(stop [component]
1010
component))
11+
1112
(defn server-info [http-port]
1213
(->ServerInfoPrinter http-port))

src/clojurians_log/db/queries.clj

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,46 @@
22
(:require [datomic.api :as d]
33
[clojurians-log.time-util :as time-util]))
44

5+
(defonce !indexes (atom {}))
6+
7+
(defn channels-dates-msgcounts [db]
8+
(d/q '[:find ?slack-id ?chan-name ?day (count ?msg)
9+
:in $
10+
:where
11+
[?chan :channel/slack-id ?slack-id]
12+
[?chan :channel/name ?chan-name]
13+
[?msg :message/channel ?chan]
14+
[?msg :message/day ?day]]
15+
db))
16+
17+
(defn build-indexes [db]
18+
(let [cdm (channels-dates-msgcounts db)]
19+
(reduce
20+
(fn [acc [slack-id chan-name day msgcount]]
21+
(-> acc
22+
(assoc-in [:chan-day-cnt slack-id day] msgcount)
23+
(assoc-in [:day-chan-cnt day slack-id] msgcount)
24+
(assoc-in [:chan-id->name slack-id] chan-name)
25+
(assoc-in [:chan-name->id chan-name] slack-id)))
26+
{}
27+
cdm)))
28+
29+
(defn build-indexes! [db]
30+
(reset! !indexes (build-indexes db)))
31+
532
(defn channel-list
633
([db]
7-
(->> (d/q '[:find [(pull ?chan [:channel/slack-id :channel/name]) ...]
8-
:in $
9-
:where
10-
[?msg :message/channel ?chan]]
11-
db)
34+
(->> (map (fn [[id name]]
35+
{:channel/slack-id id
36+
:channel/name name})
37+
(:chan-id->name @!indexes))
1238
(sort-by :channel/name)))
1339
([db day]
14-
(->> (d/q '[:find (pull ?chan [:channel/slack-id :channel/name]) (count ?msg)
15-
:in $ ?day
16-
:where
17-
[?msg :message/day ?day]
18-
[?msg :message/channel ?chan]]
19-
db
20-
day)
21-
(map #(assoc (first %) :channel/message-count (last %))))))
40+
(let [{:keys [day-chan-cnt chan-id->name]} @!indexes]
41+
(for [[ch-id cnt] (get day-chan-cnt day)]
42+
#:channel{:slack-id ch-id
43+
:name (chan-id->name ch-id)
44+
:message-count cnt}))))
2245

2346
(defn- assoc-inst [message]
2447
(assoc message :message/inst (time-util/ts->inst (:message/ts message))))
@@ -75,15 +98,12 @@
7598
(compare y x))
7699

77100
(defn channel-days [db chan-name]
78-
(->> (d/q '[:find ?day (count ?msg)
79-
:in $ ?chan-name
80-
:where
81-
[?chan :channel/name ?chan-name]
82-
[?msg :message/channel ?chan]
83-
[?msg :message/day ?day]]
84-
db
85-
chan-name)
86-
(sort-by first reverse-compare)))
101+
(let [{:keys [chan-day-cnt chan-name->id]} @!indexes]
102+
(some->> chan-name
103+
chan-name->id
104+
chan-day-cnt
105+
keys
106+
(sort reverse-compare))))
87107

88108
(defn channel [db name]
89109
(d/q '[:find (pull ?chan [*]) .
@@ -94,22 +114,14 @@
94114
name))
95115

96116
(defn user-names
97-
[db names]
117+
[db ids]
98118
(d/q '[:find ?id ?username
99119
:in $ [?id ...]
100120
:where
101121
[?user :user/slack-id ?id]
102122
[?user :user/name ?username]]
103123
db
104-
names))
105-
106-
(defn message-by-ts [db ts]
107-
(d/q '[:find (pull ?msg [*]) .
108-
:in $ ?ts
109-
:where
110-
[?msg :message/ts ?ts]]
111-
db
112-
ts))
124+
ids))
113125

114126
(defn thread-messages
115127
"Retrieve all child messages for the given parent threads"

src/clojurians_log/db/schema.clj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
#:db{:ident :message/thread-ts
3131
:valueType :db.type/string
3232
:cardinality :db.cardinality/one
33-
:doc "Thread parent message timestamp (seconds since epoch up to 6 decimals). Stored as string because it is used by slack as a kind of identifier. Unique per channel."}
33+
:doc "Thread parent message timestamp (seconds since epoch up to 6 decimals). Stored as string because it is used by slack as a kind of identifier. Unique per channel."
34+
:index true}
3435
#:db{:ident :message/thread-inst
3536
:valueType :db.type/instant
3637
:cardinality :db.cardinality/one

src/clojurians_log/repl.clj

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,22 +173,45 @@
173173
(use 'clojurians-log.repl)
174174
(load-slack-data!)
175175
(def result (load-files! (log-files)))
176-
@(second result)
177176
result
178177

178+
(def result (load-files! (drop 1508 (log-files))))
179+
180+
(while (not (realized? (second result)))
181+
(println (java.util.Date.) "\t" @(first result))
182+
(Thread/sleep 5000))
183+
179184
;; old way (slower)
180185
(run! load-log-file! (log-files))
181186

182187
;; incremental
183-
(load-from "2016-08-04")
188+
(load-from "2019-08-23")
184189

185190

186191

187192
(load-demo-data! "/home/arne/github/clojurians-log-demo-data")
188-
193+
(build-indexes! (d/db (conn)))
189194

190195
(do
191196
(write-edn "users.edn" (slack/users))
192197
(write-edn "channels.edn" (slack/channels)))
193198

194-
)
199+
(time
200+
(do
201+
(time (clojurians-log.db.queries/channel-day-messages db "clojurescript" "2018-02-04"))
202+
(time (clojurians-log.db.queries/thread-messages db '("1517722327.000023" "1517722363.000043" "1517722613.000012" "1517724278.000043" "1517724340.000044" "1517724770.000024" "1517724836.000023" "1517725105.000054")))
203+
(time (clojurians-log.db.queries/channel db "clojurescript"))
204+
(time (clojurians-log.db.queries/channel-list db "2018-02-04"))
205+
(time (clojurians-log.db.queries/user-names db #{"U2TUBBPNU"}))
206+
(time (clojurians-log.db.queries/channel-days db "clojurescript"))
207+
208+
nil)
209+
210+
"Elapsed time: 18.166254 msecs"
211+
"Elapsed time: 631.458841 msecs"
212+
"Elapsed time: 1.568807 msecs"
213+
"Elapsed time: 16.425878 msecs"
214+
"Elapsed time: 1.126005 msecs"
215+
"Elapsed time: 1535.355001 msecs"
216+
"Elapsed time: 2205.20762 msecs"
217+
))

src/clojurians_log/views.clj

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,15 +95,11 @@
9595
`offset` positions away. Returns nil if the applying the offset goes out of
9696
bounds."
9797
[channel-days today offset]
98-
9998
(as-> channel-days $
10099
(map vector (range) $)
101-
(some (fn [[index [a-date msg-count]]] (when (and (= a-date today)
102-
(not (zero? msg-count)))
103-
index)) $)
100+
(some (fn [[index a-date]] index) $)
104101
(+ $ offset)
105-
(nth channel-days $ nil)
106-
(first $)))
102+
(nth channel-days $ nil)))
107103

108104
(defn- log-page-header [{:data/keys [channel date channel-days]}]
109105
[:div.header

0 commit comments

Comments
 (0)