Skip to content

Commit 0e6b5de

Browse files
committed
refactor: sexp uses string streams internally
This approach performs the best overall for larger sexps, and it yields the favourable return type `(simple-array character (*))`.
1 parent db49f5e commit 0e6b5de

3 files changed

Lines changed: 305 additions & 7 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
#### Added
66

7-
- Transducer: `sexp` for interpretting a streams of chars or strings as separate
7+
- Transducer: `sexp` for interpretting streams of chars or strings as separate
88
SEXP strings.
99
- Transducer: `safe` for handling conditions within individual transducers.
1010
- Reducer: `quantities` for counting unique occurrences of streamed items.

transducers/benchmarks.lisp

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
(in-package :transducers)
2+
3+
#+sbcl
4+
(require :sb-sprof)
5+
6+
;; --- Function Composition --- ;;
7+
8+
(defun comp-orig (function &rest functions)
9+
"Function composition.
10+
11+
(funcall (comp #'1+ #'length) \"foo\") == (1+ (length \"foo\"))"
12+
(reduce (lambda (f g)
13+
(let ((f (ensure-function f))
14+
(g (ensure-function g)))
15+
(lambda (&rest arguments)
16+
(funcall f (apply g arguments)))))
17+
functions
18+
:initial-value function))
19+
20+
#+nil
21+
(progn
22+
(format t "--- ORIG ---~%")
23+
(time (dotimes (n 100000)
24+
(transduce
25+
(comp-orig (drop 1)
26+
(filter #'evenp)
27+
(map #'1+)
28+
(once 37)
29+
(intersperse 0)
30+
(take 100))
31+
#'+ (ints 0))))
32+
(format t "--- MACRO ---~%")
33+
(time (dotimes (n 100000)
34+
(transduce
35+
(comp (drop 1)
36+
(filter #'evenp)
37+
(map #'1+)
38+
(once 37)
39+
(intersperse 0)
40+
(take 100))
41+
#'+ (ints 0)))))
42+
43+
;; --- FILE READING --- ;;
44+
45+
;; Hmm, this one is dominated memory-wise by reading the strings from the file.
46+
;; 99% of the allocation is "not me".
47+
48+
(defun file-reading ()
49+
(transduce
50+
(comp (step 2) (map #'length) (filter #'evenp))
51+
#'+
52+
#p"README.org"))
53+
54+
#+nil
55+
(file-reading)
56+
57+
#+nil
58+
(time (dotimes (n 10000)
59+
(file-reading)))
60+
61+
#+nil
62+
(sb-sprof:with-profiling (:max-samples 100000 :sample-interval 0.00001 :report :graph :mode :alloc)
63+
(dotimes (n 10000)
64+
(file-reading)))
65+
66+
;; (0) Base: 2.1b, 2.0s
67+
68+
;; --- Summation --- ;;
69+
70+
;; Fast and allocates nothing if the result doesn't overflow fixnum.
71+
72+
(defun summation ()
73+
(transduce (comp (filter #'oddp)
74+
(take 1000000)
75+
(map (lambda (n) (* n n))))
76+
#'+
77+
(ints 1)))
78+
79+
#+nil
80+
(time (summation))
81+
82+
#+nil
83+
(sb-sprof:with-profiling (:max-samples 100000 :sample-interval 0.00001 :report :graph)
84+
(dotimes (n 10)
85+
(summation)))
86+
87+
;; (0) Base: 271m, 1.19s
88+
89+
;; --- STRINGS --- ;;
90+
91+
(defun string-cons (&optional (acc nil a?) (input #\z i?))
92+
"Reducer: Collect a stream of characters into to a single string."
93+
(cond ((and a? i?) (cl:cons input acc))
94+
((and a? (not i?)) (cl:concatenate 'cl:string (nreverse acc)))
95+
(t '())))
96+
97+
(defun string-push (&optional (acc nil a?) (input #\z i?))
98+
"Reducer: Collect a stream of characters into to a single string."
99+
(cond ((and a? i?) (vector-push-extend input acc) acc)
100+
((and a? (not i?)) acc)
101+
(t (make-array 16 :element-type 'character :adjustable t :fill-pointer 0))))
102+
103+
(defun string-with-stream (&optional (acc nil a?) (input #\z i?))
104+
"Reducer: Collect a stream of characters into to a single string."
105+
(cond ((and a? i?)
106+
(write-char input acc)
107+
acc)
108+
((and a? (not i?)) (get-output-stream-string acc))
109+
(t (make-string-output-stream :element-type 'character))))
110+
111+
#+nil
112+
(progn
113+
(format t "--- CONS ---~%")
114+
(time (dotimes (n 100)
115+
(transduce (comp (take 10000) (map #'char-upcase)) #'string-cons (repeat #\a))))
116+
(format t "--- PUSH ---~%")
117+
(time (dotimes (n 100)
118+
(transduce (comp (take 10000) (map #'char-upcase)) #'string-push (repeat #\a))))
119+
(format t "--- STREAM ---~%")
120+
(time (dotimes (n 100)
121+
(transduce (comp (take 10000) (map #'char-upcase)) #'string-with-stream (repeat #\a)))))
122+
123+
;; OBSERVATIONS
124+
;;
125+
;; - SBCL: stream is fastest and uses the least memory beyond strings of length 1000 or so.
126+
;; - ECL: About the same.
127+
;; - Allegro: Tons of consing for CONS. PUSH slightly less memory than STREAM?
128+
129+
;; CONCLUSIONS
130+
;;
131+
;; I will adopt the stream approach overall as it is more efficient for large
132+
;; strings and yields a better string type.
133+
134+
#+nil
135+
(transduce (comp (take 10000) (map #'char-upcase)) #'string-cons (repeat #\a))
136+
137+
#+nil
138+
(transduce (comp (take 10000) (map #'char-upcase)) #'string-with-stream (repeat #\a))
139+
140+
#+nil
141+
(transduce (comp (take 10000) (map #'char-upcase)) #'string-push (repeat #\a))
142+
143+
#+nil
144+
(progn
145+
(format t "--- string with base-char ---~%")
146+
(time (dotimes (n 1000)
147+
(transduce (comp (take 10000) (map #'char-upcase)) #'string (repeat #\a))))
148+
(format t "--- base-string with base-char ---~%")
149+
(time (dotimes (n 1000)
150+
(transduce (comp (take 10000) (map #'char-upcase)) #'base-string (repeat #\a))))
151+
(format t "--- string with unicode ---~%")
152+
(time (dotimes (n 1000)
153+
(transduce (comp (take 10000) (map #'char-upcase)) #'string (repeat #\天)))))
154+
155+
#+nil
156+
(let ((s (transduce (take 100) #'string (repeat #\a))))
157+
(format t "--- string-transduce ---~%")
158+
(time (dotimes (n 10000)
159+
(string-transduce #'pass #'string s)))
160+
(format t "--- simple-string-transduce ---~%")
161+
(time (dotimes (n 10000)
162+
(simple-string-transduce #'pass #'string s))))
163+
164+
;; CONCLUSIONS
165+
;;
166+
;; Even for strings of length 100, the simple-string variant is faster.
167+
168+
;; --- SEXP --- ;;
169+
170+
(defun sexp-push (reducer)
171+
"Transducer: Interpret the data stream as S-expressions, yielding one at a time.
172+
The stream can consist of either individual characters or whole strings. The
173+
former would occur when transducing over a string directly. The latter would
174+
occur when transducing over a stream/file line-by-line."
175+
(let ((acc (short-string))
176+
(parens 0))
177+
(lambda (result &optional (input nil i?))
178+
(declare (type fixnum parens))
179+
(labels ((one-char (res c)
180+
(case c
181+
(#\(
182+
(incf parens)
183+
(vector-push-extend c acc)
184+
res)
185+
(#\)
186+
(decf parens)
187+
(vector-push-extend c acc)
188+
(cond ((zerop parens)
189+
(let ((curr acc))
190+
(setf acc (short-string))
191+
(funcall reducer res curr)))
192+
((< parens 0) (error 'unmatched-closing-paren))
193+
(t res)))
194+
(t (cond ((zerop parens) res)
195+
(t (vector-push-extend c acc)
196+
res)))))
197+
(a-string (res i)
198+
(declare (type fixnum i))
199+
(cond ((= i (length input)) res)
200+
(t (let ((res (one-char res (char input i))))
201+
(cond ((reduced? res) res)
202+
(t (a-string res (1+ i)))))))))
203+
(cond (i? (etypecase input
204+
(character (one-char result input))
205+
(cl:string (a-string result 0))))
206+
(t (funcall reducer result)))))))
207+
208+
(defun sexp-push-convert (reducer)
209+
"Transducer: Interpret the data stream as S-expressions, yielding one at a time.
210+
The stream can consist of either individual characters or whole strings. The
211+
former would occur when transducing over a string directly. The latter would
212+
occur when transducing over a stream/file line-by-line."
213+
(let ((acc (short-string))
214+
(parens 0))
215+
(lambda (result &optional (input nil i?))
216+
(declare (type fixnum parens))
217+
(labels ((one-char (res c)
218+
(case c
219+
(#\(
220+
(incf parens)
221+
(vector-push-extend c acc)
222+
res)
223+
(#\)
224+
(decf parens)
225+
(vector-push-extend c acc)
226+
(cond ((zerop parens)
227+
(let ((curr (coerce acc '(simple-array character (*)))))
228+
(setf acc (short-string))
229+
(funcall reducer res curr)))
230+
((< parens 0) (error 'unmatched-closing-paren))
231+
(t res)))
232+
(t (cond ((zerop parens) res)
233+
(t (vector-push-extend c acc)
234+
res)))))
235+
(a-string (res i)
236+
(declare (type fixnum i))
237+
(cond ((= i (length input)) res)
238+
(t (let ((res (one-char res (char input i))))
239+
(cond ((reduced? res) res)
240+
(t (a-string res (1+ i)))))))))
241+
(cond (i? (etypecase input
242+
(character (one-char result input))
243+
(cl:string (a-string result 0))))
244+
(t (funcall reducer result)))))))
245+
246+
#+nil
247+
(let ((s (uiop:read-file-string #p"tests/sexp.txt")))
248+
(format t "--- PUSH ---~%")
249+
(time (dotimes (n 1000)
250+
(transduce #'sexp-push #'cons s)))
251+
#+nil
252+
(time (dotimes (n 100000)
253+
(transduce #'sexp-push #'cons "(+ 1 1) (+ 2 2) (+ 3 (* 4 5)) (+ 1 1) (+ 1 1) (+ 1 2 3 4)")))
254+
#+nil
255+
(time (dotimes (n 100000)
256+
(transduce #'sexp-push #'cons "(+ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) (+ 1 1)")))
257+
(format t "--- PUSH CONVERT ---~%")
258+
(time (dotimes (n 1000)
259+
(transduce #'sexp-push-convert #'cons s)))
260+
#+nil
261+
(time (dotimes (n 100000)
262+
(transduce #'sexp-push-convert #'cons "(+ 1 1) (+ 2 2) (+ 3 (* 4 5)) (+ 1 1) (+ 1 1) (+ 1 2 3 4)")))
263+
#+nil
264+
(time (dotimes (n 100000)
265+
(transduce #'sexp-push-convert #'cons "(+ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) (+ 1 1)")))
266+
(format t "--- STREAM ---~%")
267+
(time (dotimes (n 1000)
268+
(transduce #'sexp #'cons s)))
269+
#+nil
270+
(time (dotimes (n 100000)
271+
(transduce #'sexp #'cons "(+ 1 1) (+ 2 2) (+ 3 (* 4 5)) (+ 1 1) (+ 1 1) (+ 1 2 3 4)")))
272+
#+nil
273+
(time (dotimes (n 100000)
274+
(transduce #'sexp #'cons "(+ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) (+ 1 1)"))))
275+
276+
;; OBSERVATIONS
277+
;;
278+
;; SBCL: For only a few sexps, PUSH uses less memory, but STREAM is a bit
279+
;; faster. Adding more sexps actually makes STREAM. use more memory. If the
280+
;; sexps are few but long, STREAM is faster yet, although PUSH is still using
281+
;; the least memory. STREAM the best for the "large file" scenario.
282+
;;
283+
;; ECL: For tiny sexps, PUSH is faster and uses far less memory.
284+
;;
285+
;; Allegro: STREAM is significantly slower and uses several times more memory.
286+
;; PUSH CONVERT a happy medium? But for a file with multiple large function
287+
;; definitions, STREAM is the winner again.
288+
;;
289+
;; CONCLUSION
290+
;;
291+
;; Memory usage depends on how big the sexps are and how many of them there are.
292+
;; Overall, rapidly allocating many streams seems to take more memory. However,
293+
;; for large files of large sexps, the STREAM approach basically always wins.
294+
;; I will go with STREAM because:
295+
;;
296+
;; - For SBCL it performs the best.
297+
;; - It is competitive with other compilers.
298+
;; - It yields a favourable return type without extra conversions.

transducers/transducers.lisp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -460,27 +460,27 @@ applications of a given function F.
460460
The stream can consist of either individual characters or whole strings. The
461461
former would occur when transducing over a string directly. The latter would
462462
occur when transducing over a stream/file line-by-line."
463-
(let ((acc (short-string))
463+
(let ((acc (make-string-output-stream :element-type 'character))
464464
(parens 0))
465465
(lambda (result &optional (input nil i?))
466466
(declare (type fixnum parens))
467467
(labels ((one-char (res c)
468468
(case c
469469
(#\(
470470
(incf parens)
471-
(vector-push-extend c acc)
471+
(write-char c acc)
472472
res)
473473
(#\)
474474
(decf parens)
475-
(vector-push-extend c acc)
475+
(write-char c acc)
476476
(cond ((zerop parens)
477-
(let ((curr acc))
478-
(setf acc (short-string))
477+
(let ((curr (get-output-stream-string acc)))
478+
(setf acc (make-string-output-stream :element-type 'character))
479479
(funcall reducer res curr)))
480480
((< parens 0) (error 'unmatched-closing-paren))
481481
(t res)))
482482
(t (cond ((zerop parens) res)
483-
(t (vector-push-extend c acc)
483+
(t (write-char c acc)
484484
res)))))
485485
(a-string (res i)
486486
(declare (type fixnum i))

0 commit comments

Comments
 (0)