Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions packages/buffered/_test.pony
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,14 @@ class iso _TestReader is UnitTest
h.assert_eq[U128](b.u128_be()?, 0xDEADBEEFFEEDFACEDEADBEEFFEEDFACE)
h.assert_eq[U128](b.u128_le()?, 0xDEADBEEFFEEDFACEDEADBEEFFEEDFACE)

h.assert_eq[String](b.line()?, "hi")
(var line: String val, _) = b.line()?
h.assert_eq[String]("hi", line)
try
b.read_until(0)?
h.fail("should fail reading until 0")
end
h.assert_eq[String](b.line()?, "there")
(line, _) = b.line()?
h.assert_eq[String]("there", line)

b.append(['h'; 'i'])

Expand All @@ -179,7 +181,8 @@ class iso _TestReader is UnitTest
h.assert_eq[U8](b.u8()?, 'i')

b.append(['!'; '\n'])
h.assert_eq[String](b.line()?, "!")
(line, _) = b.line()?
h.assert_eq[String](line, "!")

b.append(['s'; 't'; 'r'; '1'])
try
Expand Down Expand Up @@ -221,9 +224,10 @@ class iso _TestWriter is UnitTest
.> u128_le(0xDEADBEEFFEEDFACEDEADBEEFFEEDFACE)

wb.write(['h'; 'i'])
wb.writev([
let chars: Array[ByteSeq] val = [
['\n'; 't'; 'h'; 'e']
['r'; 'e'; '\r'; '\n']])
['r'; 'e'; '\r'; '\n']]
wb.writev(chars)

for bs in wb.done().values() do
b.append(bs)
Expand Down Expand Up @@ -254,8 +258,10 @@ class iso _TestWriter is UnitTest
h.assert_eq[U128](b.u128_be()?, 0xDEADBEEFFEEDFACEDEADBEEFFEEDFACE)
h.assert_eq[U128](b.u128_le()?, 0xDEADBEEFFEEDFACEDEADBEEFFEEDFACE)

h.assert_eq[String](b.line()?, "hi")
h.assert_eq[String](b.line()?, "there")
(var line: String val, _) = b.line()?
h.assert_eq[String](line, "hi")
(line, _) = b.line()?
h.assert_eq[String](line, "there")

b.append(['h'; 'i'])

Expand All @@ -265,4 +271,6 @@ class iso _TestWriter is UnitTest
end

b.append(['!'; '\n'])
h.assert_eq[String](b.line()?, "hi!")

(line, _) = b.line()?
h.assert_eq[String](line, "hi!")
75 changes: 60 additions & 15 deletions packages/buffered/reader.pony
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,8 @@ class Reader
"""
Add a chunk of data.
"""
let data_array =
match data
| let data': Array[U8] val => data'
| let data': String => data'.array()
end

_available = _available + data_array.size()
_chunks.push((data_array, 0))
_available = _available + data.size()
_chunks.push((data, 0))

fun ref skip(n: USize) ? =>
"""
Expand Down Expand Up @@ -167,16 +161,64 @@ class Reader
u8()?
b

fun ref line(keep_line_breaks: Bool = false): String iso^ ? =>
fun ref codepoint[D: StringDecoder = UTF8StringDecoder](): (U32, U8) ? =>
"""
Return a \n or \r\n terminated line as a string. By default the newline is not
Return a pair containing a unicode codepoint, and the number of bytes consumed to produce
the codepoint. Depending on how bytes are decoded into characters, the number of bytes consumed
may be greater than one. If the bytes cannot be converted to a codepoint, codepoint 0xFFFD
is returned, and 1 byte is consumed.
"""
let decoder_bytes = StringDecoderBytes.create()
while (decoder_bytes.bytes_loaded() < 4) do
try
decoder_bytes.pushByte(peek_u8(decoder_bytes.bytes_loaded().usize())?)
else
if decoder_bytes.bytes_loaded() > 0 then
(let c, let sz) = D.decode(decoder_bytes.decode_bytes())
block(sz.usize())? // We ignore the bytes returned, but this will mark the bytes decoded into a character as consumed
return (c, sz)
else
error
end
end
end

try
(let c, let sz) = D.decode(decoder_bytes.decode_bytes())
block(sz.usize())? // We ignore the bytes returned, but this will mark the bytes decoded into a character as consumed
return (c, sz)
end
(0,0) // This should never happen

fun ref string[D: StringDecoder = UTF8StringDecoder](len: USize): (String iso^, USize) ? =>
"""
Return a pair containing a string of the specified length in characters, and the number of bytes consumed
to produce the string. Depending on how bytes are decoded into characters, the number of bytes consumed
may be greater than the number of characters in the string. Invalid byte sequences may result in 0xFFFD
codepoints appearing in the string.
"""
var chars_read: USize = 0
var bytes_read: USize = 0
var result: String iso = recover String(len) end
while (chars_read < len) do
(let c, let sz) = codepoint[D]()?
result.push(c)
chars_read = chars_read + 1
bytes_read = bytes_read + sz.usize()
end
(consume result, bytes_read)

fun ref line[D: StringDecoder = UTF8StringDecoder](keep_line_breaks: Bool = false): (String iso^, USize) ? =>
"""
Return a pair containing a \n or \r\n terminated line as a string, and the number
of bytes consumed to produce the string. By default the newline is not
included in the returned string, but it is removed from the buffer.
Set `keep_line_breaks` to `true` to keep the line breaks in the returned line.
"""
let len = _search_length()?

_available = _available - len
var out = recover String(len) end
var outb = recover Array[U8](len) end
var i = USize(0)

while i < len do
Expand All @@ -187,7 +229,7 @@ class Reader
let need = len - i
let copy_len = need.min(avail)

out.append(data, offset, copy_len)
outb.append(data, offset, copy_len)

if avail > need then
node()? = (data, offset + need)
Expand All @@ -201,14 +243,16 @@ class Reader
let trunc_len: USize =
if keep_line_breaks then
0
elseif (len >= 2) and (out.at_offset(-2)? == '\r') then
elseif (len >= 2) and (outb.apply(outb.size()-2)? == '\r') then
2
else
1
end
out.truncate(len - trunc_len)
outb.truncate(len - trunc_len)

var out = recover String.from_iso_array[D](consume outb) end

consume out
(consume out, len)

fun ref u8(): U8 ? =>
"""
Expand Down Expand Up @@ -758,6 +802,7 @@ class Reader

error

// TODO: Fix to handle multi-byte sequences
fun ref _distance_of(byte: U8): USize ? =>
"""
Get the distance to the first occurrence of the given byte
Expand Down
38 changes: 27 additions & 11 deletions packages/buffered/writer.pony
Original file line number Diff line number Diff line change
Expand Up @@ -251,35 +251,51 @@ class Writer
"""
u128_be(data.u128())

fun ref write(data: ByteSeq) =>
fun ref write[E: StringEncoder val = UTF8StringEncoder](data: (String | ByteSeq)) =>
"""
Write a ByteSeq to the buffer.
Write a String or a ByteSeq to the buffer. String characters will be converted to bytes using
the specified encoding (UTF-8 by default).
"""

// if `data` is 1 cacheline or less in size
// copy it into the existing `_current` array
// to coalesce multiple tiny arrays
// into a single bigger array
if data.size() <= 64 then
match data
| let d: String =>
let a = d.array()
let a = d.array[E]()
_current.copy_from(a, 0, _current.size(), a.size())
| let d: Array[U8] val =>
_size = _size + a.size()
| let d: ByteSeq =>
_current.copy_from(d, 0, _current.size(), d.size())
_size = _size + data.size()
end
_size = _size + data.size()
else
_append_current()
_chunks.push(data)
_size = _size + data.size()
match data
| let s: String =>
_chunks.push(s.array[E]())
_size = _size + s.byte_size()
| let d: ByteSeq =>
_chunks.push(d)
_size = _size + d.size()
end
end

fun ref writev(data: ByteSeqIter) =>
fun ref writev[E: StringEncoder val = UTF8StringEncoder](data: (StringIter | ByteSeqIter)) =>
"""
Write ByteSeqs to the buffer.
Write Strings or ByteSeqs to the buffer.
"""
for chunk in data.values() do
write(chunk)
match data
| let si: StringIter =>
for chunk in si.values() do
write[E](chunk)
end
| let bsi: ByteSeqIter =>
for chunk in bsi.values() do
write(chunk)
end
end

fun ref done(): Array[ByteSeq] iso^ =>
Expand Down
7 changes: 6 additions & 1 deletion packages/builtin/array.pony
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,12 @@ class Array[A] is Seq[A]
Truncate an array to the given length, discarding excess elements. If the
array is already smaller than len, do nothing.
"""
_size = _size.min(len)
if len >= _alloc then
_size = len.min(_alloc)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if len >= _alloc then len.min(_alloc) is just _alloc.
Or these two len.min(_alloc) lines could be pulled out

reserve(_alloc)
else
_size = len.min(_alloc)
end

fun ref trim_in_place(from: USize = 0, to: USize = -1) =>
"""
Expand Down
18 changes: 18 additions & 0 deletions packages/builtin/ascii_string_encoder.pony
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
primitive ASCIIStringEncoder is StringEncoder

fun encode(value: U32): (USize, U32) =>
if value < 0x80 then
return (1, value)
else
return (1, 0x3F)
end

primitive ASCIIStringDecoder is StringDecoder

fun decode(b: U32): (U32, U8) =>
let byte = ((b and 0xFF000000) >> 24)
if (byte < 0x80) then
return (byte, 1)
else
(0xFFFD, 1)
end
13 changes: 13 additions & 0 deletions packages/builtin/iso-8859-1_string_encoder.pony
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
primitive ISO88591StringEncoder is StringEncoder

fun encode(value: U32): (USize, U32) =>
if value < 0x100 then
return (1, value)
else
return (1, 0x3F)
end

primitive ISO88591StringDecoder is StringDecoder

fun decode(b: U32): (U32, U8) =>
(((b and 0xFF000000) >> 24), 1)
Loading