-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtdb.cl
176 lines (153 loc) · 5.99 KB
/
tdb.cl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
;;;a collection of some code using sqlite ;use: mike.bobak@gmail
;;;==> db.cl <==
;get a few base fncs into utils, eg. count-items/count-a
;==============> di2.cl <==
;[email protected] have a doc-term matrix, with docid,term,count; given2docid's return overlap cnt
(ql 'sqlite)
(lu)
;(in-package :sqlite)
(defvar *c* (sqlite:connect "reuters.db"))
(defun ft2alst (t3)
"freq table, docid term count -> term,count alist"
(cons (second t3) (third t3)))
;should set up for any doc
;(defvar *l1* (sqlite:execute-to-list *c* "select * from frequency where docid='17035_txt_earn'"))
;(defvar *l2* (sqlite:execute-to-list *c* "select * from frequency where docid='10080_txt_crude'"))
(defun di-rows (di)
"rows for a docid"
(sqlite:execute-to-list *c* (str-cat "select * from frequency where docid='" di "'")))
(defun di_rows (di)
"alst of term count, from di"
(mapcar #'ft2alst (di-rows di)))
(defun int2docs (&optional (d1 "17035_txt_earn") (d2 "10080_txt_crude"))
"could cosine overlap, but only asked for simple sum of count*count for overlap terms"
(let* ((a1 (di_rows d1))
(a2 (di_rows d2))
(int1 (intersection a1 a2 :key #'car :test #'equal)))
;int1 is from a1, so now need to find in a2, and mult the vals, and sum
(sum-l
(mapcar #'(lambda (li)
(let ((li2 (assoc (car li) a2 :test #'equal)))
(* (cdr li) (cdr li2))))
int1))))
;==============> count-qry.cl <==
;http://compgroups.net/comp.lang.lisp/increment-hash-value-2/703023
(defun count-items (lst)
"sum up lst occurances &print"
(let ((ht (make-hash-table)))
(loop :for item :in lst :do
(incf (gethash item ht 0))
:finally
(maphash #'(lambda (k v) (format t "~A: ~A~%" k v)) ht))))
(defun count-alst (lst)
"sum up alst vals &print"
(let ((ht (make-hash-table)))
(loop :for item :in lst :do
;maximizing
(incf (gethash (car item) ht 0) (cdr item))
;into mx
:finally
(maphash #'(lambda (k v) (format t "~A: ~A~%" k v)) ht)
;mx
))) ;get a loop max in there too ;have a version that does it
;(ql 'sqlite)
;;(in-package :sqlite)
;(defvar *c* (sqlite:connect "reuters.db"))
(defvar *qs* "select * from v where term='washington' union select * from v where term='taxes' union select * from v where term='treasury' group by docid")
(defun f-dc2alst (t3)
"freq table, docid term count -> term,count alist"
(cons (first t3) (third t3)))
(defvar *l1* (mapcar #'f-dc2alst (sqlite:execute-to-list *c* *qs*)))
(defun count-qry (&optional (f2alst #'f-dc2alst) (qs *qs*) (db *c*))
"qry db convert2alst and count values"
(count-alst (mapcar f2alst (sqlite:execute-to-list db qs))))
;==============> cnt-mx.cl <==
;[email protected] doing a group-by sum of values
;(defun count-items (lst)
; (let ((ht (make-hash-table)))
; (loop :for item :in lst :do
; (incf (gethash item ht 0))
; :finally
; (maphash #'(lambda (k v) (format t "~A: ~A~%" k v)) ht))))
;rename this, or pass in fnc or ..
#+ignore ;already above now
(defun count-alst (lst)
(let ((ht (make-hash-table)))
(loop :for item :in lst :do
;(let ((iv
(incf (gethash (car item) ht 0) (cdr item))
; ))
; maximizing iv into mx
; )
:finally
(let ((mx 0))
(maphash #'(lambda (k v)
(when (> v mx)
(setf mx v)
(format t "~A: ~A~%" k v))
)
ht)
)
;mx
)))
;get a loop max in there too
;(ql 'sqlite)
;;(in-package :sqlite)
;(defvar *c* (sqlite:connect "reuters.db"))
;(defvar *qs* "select * from v where term='washington' union select * from v where term='taxes' union select * from v where term='treasury' group by docid")
;;use one below if another view named v hasn't been constructed
;;(defvar *qs* "select * from frequency where term='washington' union select * from frequency where term='taxes' union select * from frequency where term='treasury' group by docid")
;(defun f-dc2alst (t3)
; "freq table, docid term count -> term,count alist"
; (cons (first t3) (third t3)))
;
;(defvar *l1* (mapcar #'f-dc2alst (sqlite:execute-to-list *c* *qs*)))
;
;(defun count-qry (&optional (f2alst #'f-dc2alst) (qs *qs*) (db *c*))
; "qry db convert2alst and count values"
; (count-alst (mapcar f2alst (sqlite:execute-to-list db qs))))
;;;==> cnt-mx.cl <==
;[email protected] doing a group-by sum of values
#+ignore ;already above now
(defun count-items (lst)
(let ((ht (make-hash-table)))
(loop :for item :in lst :do
(incf (gethash item ht 0))
:finally
(maphash #'(lambda (k v) (format t "~A: ~A~%" k v)) ht))))
#+ignore ;already above now
(defun count-alst (lst)
(let ((ht (make-hash-table)))
(loop :for item :in lst :do
;(let ((iv
(incf (gethash (car item) ht 0) (cdr item))
; ))
; maximizing iv into mx
; )
:finally
(let ((mx 0))
(maphash #'(lambda (k v)
(when (> v mx)
(setf mx v)
(format t "~A: ~A~%" k v))
)
ht)
)
;mx
)))
;get a loop max in there too
;(ql 'sqlite)
;;(in-package :sqlite)
;(defvar *c* (sqlite:connect "reuters.db"))
;(defvar *qs* "select * from v where term='washington' union select * from v where term='taxes' union select * from v where term='treasury' group by docid")
;;use one below if another view named v hasn't been constructed
;;(defvar *qs* "select * from frequency where term='washington' union select * from frequency where term='taxes' union select * from frequency where term='treasury' group by docid")
(defun f-dc2alst (t3)
"freq table, docid term count -> term,count alist"
(cons (first t3) (third t3)))
(defvar *l1* (mapcar #'f-dc2alst (sqlite:execute-to-list *c* *qs*)))
(defun count-qry (&optional (f2alst #'f-dc2alst) (qs *qs*) (db *c*))
"qry db convert2alst and count values"
(count-alst (mapcar f2alst (sqlite:execute-to-list db qs))))
;;====
(defun tdb () (int2docs))