aboutsummaryrefslogtreecommitdiff
path: root/module/vcomponent
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2021-08-12 12:45:06 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2021-08-15 19:44:59 +0200
commit036ffa651fb847c92c9e243a6d34744e92a167b8 (patch)
treec6fb373b2c90fd4be71e9b36025796232c26b783 /module/vcomponent
parentAdd TODO about bad string-append performance. (diff)
downloadcalp-036ffa651fb847c92c9e243a6d34744e92a167b8.tar.gz
calp-036ffa651fb847c92c9e243a6d34744e92a167b8.tar.xz
Refactor read-file to use string ports.
Like the diff mentions; appending strings leads to a huge number of allocations and deallocations, killing performance in the GC for large strings. String ports on the other hand say nothing about how the work internally, allowing Guile to have efficient C code which completely bypasses the GC (this was briefly checked in Guile's source code, commit 118ee0c50ba90ea7ad7ff1fd2a212bbbb7a66d99 (3.0.7 + a few commits). The approximate speed increase is at least two orders of magnitude, if not three.
Diffstat (limited to 'module/vcomponent')
-rw-r--r--module/vcomponent/ical/parse.scm66
1 files changed, 40 insertions, 26 deletions
diff --git a/module/vcomponent/ical/parse.scm b/module/vcomponent/ical/parse.scm
index 207c9ae8..b67ae593 100644
--- a/module/vcomponent/ical/parse.scm
+++ b/module/vcomponent/ical/parse.scm
@@ -35,32 +35,46 @@
(define (read-file port)
(define fname (port-filename port))
(let loop ((line-number 1) (done '()))
- (let ((line (read-line port)))
- (if (eof-object? line)
- (reverse! done)
- (let ((line (string-trim-right line)))
- (loop
- (1+ line-number)
- (if (char=? #\space (string-ref line 0))
- ;; Line Wrapping
- ;; TODO if the line is split inside a unicode character
- ;; then this produces multiple broken unicode characters.
- ;; It could be solved by checking the start of the new line,
- ;; and the tail of the old line for broken char
- ;;
- ;; TODO This gets really slow when concatenating huge
- ;; numbers of strings (31862 lines gave ~100s runtime).
- ;; Almost all which was spent in repaing pipes in the GC.
- ;; Possible solution would be to instead write to output-strings.
- ;; Those should be faster since Guile then controlls the memory
- ;; of the buffer manually, allowing efficient C memory operations.
- (cons (make-line (string-append (get-string (car done))
- (string-drop line 1))
- fname
- (get-line (car done)))
- (cdr done))
- (cons (make-line line fname line-number)
- done))))))))
+ (let ((ostr (open-output-string)))
+ (define ret
+ (let loop ((line (read-line port)))
+ (if (eof-object? line)
+ 'eof
+ (let ((line (string-trim-right line #\return)))
+ (let ((next (peek-char port)))
+ (display line ostr)
+ (cond ((eof-object? next)
+ 'final-line)
+ ;; Line Wrapping
+ ;; If the first character on a line is space (whitespace?)
+ ;; then it's a continuation line, and should be merged
+ ;; with the one preceeding it.
+ ;; TODO if the line is split inside a unicode character
+ ;; then this produces multiple broken unicode characters.
+ ;; It could be solved by checking the start of the new line,
+ ;; and the tail of the old line for broken char
+ ;; TODO what about other leading whitespace?
+ ((char=? next #\space)
+ (read-char port) ; discard leading whitespace
+ (loop (read-line port)))
+ (else
+ ;; (unread-char next)
+ 'line)))))))
+ (case ret
+ ((line)
+ (let ((str (get-output-string ostr)))
+ (close-port ostr)
+ (loop (1+ line-number)
+ (cons (make-line str fname line-number)
+ done))))
+ ((eof)
+ (close-port ostr)
+ (reverse! done))
+ ((final-line)
+ (let ((str (get-output-string ostr)))
+ (close-port ostr)
+ (reverse! (cons (make-line str fname line-number)
+ done))))))))
(define-immutable-record-type <tokens>
(make-tokens metadata data)