Skip to content

Commit b2b2e5d

Browse files
committed
Add a BufferedIO type since peek/read are slow on IOStreams and possibly other IO types
1 parent 7d87847 commit b2b2e5d

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed

src/Parsers.jl

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,76 @@ function fastseek!(io::IOBuffer, n::Integer)
5353
return
5454
end
5555

56+
# buffered IO type
57+
mutable struct BufferedIO{T} <: IO
58+
io::T
59+
iopos::Int
60+
buffer::Vector{UInt8}
61+
pos::Int
62+
nbytes::Int
63+
end
64+
65+
function BufferedIO(io::IO)
66+
buffer = zeros(UInt8, 8192)
67+
nbytes = readbytes!(io, buffer)
68+
return BufferedIO(io, position(io), buffer, 0, nbytes)
69+
end
70+
71+
function Base.eof(io::BufferedIO)
72+
return io.pos == io.nbytes
73+
end
74+
75+
function peekbyte(io::BufferedIO)
76+
@inbounds b = io.buffer[io.pos + 1]
77+
return b
78+
end
79+
80+
function readbyte(io::BufferedIO)
81+
io.pos += 1
82+
@inbounds b = io.buffer[io.pos]
83+
if io.pos == io.nbytes
84+
io.nbytes = readbytes!(io.io, io.buffer)
85+
io.iopos = position(io.io)
86+
io.pos = 0
87+
end
88+
return b
89+
end
90+
91+
function Base.position(io::BufferedIO)
92+
return (io.iopos - io.nbytes) + io.pos
93+
end
94+
95+
function fastseek!(io::BufferedIO, pos::Int)
96+
p = position(io)
97+
if p == pos
98+
return
99+
elseif pos < p
100+
# seek backwards
101+
relpos = p - pos
102+
if relpos <= io.pos
103+
# seeking within buffered bytes
104+
io.pos -= relpos
105+
else
106+
seek(io.io, pos)
107+
io.nbytes = readbytes!(io.io, io.buffer)
108+
io.iopos = position(io.io)
109+
io.pos = 0
110+
end
111+
else
112+
# seek forwards
113+
relpos = pos - p
114+
if relpos < (io.nbytes - io.pos)
115+
io.pos += relpos
116+
else
117+
seek(io.io, pos)
118+
io.nbytes = readbytes!(io.io, io.buffer)
119+
io.iopos = position(io.io)
120+
io.pos = 0
121+
end
122+
end
123+
return
124+
end
125+
56126
"""
57127
Each `Parsers.Result` has a `r.code` field which has type `Parsers.ReturnCode` and is a set of bit flags for various parsing states.
58128
The top bit is used to indicate "SUCCESS" (0) and "INVALID" (1), so all failed parsing attempts will have a code < 0, while successful parsings will be > 0.

test/runtests.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,4 +724,20 @@ r = Parsers.parse(Parsers.Delimited(Parsers.Quoted(Parsers.Sentinel(["-"]))), IO
724724

725725
end # @testset
726726

727+
@testset "BufferedIO" begin
728+
729+
io = IOBuffer("hey there sally")
730+
b = Parsers.BufferedIO(io)
731+
@test !eof(b)
732+
@test Parsers.peekbyte(b) === UInt8('h')
733+
@test position(b) == 0
734+
@test Parsers.readbyte(b) === UInt8('h')
735+
@test position(b) == 1
736+
Parsers.fastseek!(b, 4)
737+
@test Parsers.readbyte(b) === UInt8('t')
738+
Parsers.fastseek!(b, 2)
739+
@test Parsers.readbyte(b) === UInt8('y')
740+
741+
end
742+
727743
end # @testset

0 commit comments

Comments
 (0)