From 634502e7246f8850ad6c649b79ae9f072f45baf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Mon, 11 Sep 2023 17:54:35 +0200 Subject: Introduce stream-split-by. This procedure isn't currently used, but as noted is really useful for grouping a character stream into a word stream, which is a later commit will use for it for justifying posibly infinite streams of text. --- doc/ref/guile/srfi-41.texi | 20 ++++++++++++++++++++ module/srfi/srfi-41/util.scm | 15 ++++++++++++++- tests/test/srfi-41-util.scm | 20 ++++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/doc/ref/guile/srfi-41.texi b/doc/ref/guile/srfi-41.texi index 310751ec..d8020ecc 100644 --- a/doc/ref/guile/srfi-41.texi +++ b/doc/ref/guile/srfi-41.texi @@ -73,6 +73,26 @@ times. stream cons, but eval arguments beforehand. @end defun +@defun stream-split-by pred strm +Chunks the content of @var{strm} into lists, breaking on @var{pred}. +If the end of the stream is reached, the remaining objects +are put into a final chunk. + +Can for example be used to split a stream of characters into a stream +of words. + +@lisp +(stream-split-by (lambda (c) (char=? c #\space)) + (-> "This is a short test" + string->list list->stream)) +⇒ # +@end lisp +@end defun + @defun stream-timeslice-limit stream timeslice Wrap a stream in time limits. Each element has at most @var{timeslice} seconds to produce a value, otherwise the stream ends. Useful for finding the diff --git a/module/srfi/srfi-41/util.scm b/module/srfi/srfi-41/util.scm index cecbb3b3..1571cc4c 100644 --- a/module/srfi/srfi-41/util.scm +++ b/module/srfi/srfi-41/util.scm @@ -18,7 +18,8 @@ stream-partition stream-split stream-paginate - stream-timeslice-limit)) + stream-timeslice-limit + stream-split-by)) (define (stream-car+cdr stream) (values (stream-car stream) @@ -146,3 +147,15 @@ (stream-timeslice-limit (stream-cdr strm) timeslice))) (lambda _ stream-null))) + +(define-stream (stream-split-by pred strm) + (let loop ((accumulated '()) + (strm strm)) + (stream-match strm + (() (if (null? accumulated) + stream-null + (stream (reverse accumulated)))) + ((x . xs) (pred x) + (stream-cons (reverse (cons x accumulated)) (loop '() xs))) + ((x . xs) + (loop (cons x accumulated) xs))))) diff --git a/tests/test/srfi-41-util.scm b/tests/test/srfi-41-util.scm index ff0e3cce..9a753b03 100644 --- a/tests/test/srfi-41-util.scm +++ b/tests/test/srfi-41-util.scm @@ -8,6 +8,7 @@ :use-module (srfi srfi-88) :use-module (srfi srfi-41 util) :use-module (srfi srfi-41) + :use-module ((srfi srfi-1) :select (circular-list)) :use-module ((ice-9 sandbox) :select (call-with-time-limit))) (test-equal "Finite stream" @@ -86,3 +87,22 @@ (test-equal "time limited stream" '(1 2 3) (stream->list strm)))) + + +(test-group "stream-split-by" + (let ((hello-chars-stream (stream-unfold + car + (const #t) + cdr + (apply circular-list + (string->list "Hello "))))) + (test-equal "Check that test list looks as expected" + (string->list "Hello Hell") + (stream->list 10 hello-chars-stream)) + (test-equal "Check that it splits correctly" + '("Hello " "Hello " "Hello ") + (stream->list + 3 + (stream-map list->string + (stream-split-by (lambda (c) (char=? c #\space)) + hello-chars-stream)))))) -- cgit v1.2.3