Skip to content

Commit 89beded

Browse files
committed
fix incorrect header size assumption
1 parent 48ce39f commit 89beded

File tree

3 files changed

+510
-15
lines changed

3 files changed

+510
-15
lines changed

copied_std.go

Lines changed: 371 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,371 @@
1+
// Copyright 2009 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package tarfs
6+
7+
import (
8+
"archive/tar"
9+
"bytes"
10+
"io"
11+
"strconv"
12+
"strings"
13+
)
14+
15+
// start -- common.go
16+
17+
const (
18+
// Keywords for GNU sparse files in a PAX extended header.
19+
paxGNUSparse = "GNU.sparse."
20+
paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
21+
paxGNUSparseOffset = "GNU.sparse.offset"
22+
paxGNUSparseNumBytes = "GNU.sparse.numbytes"
23+
paxGNUSparseMap = "GNU.sparse.map"
24+
paxGNUSparseName = "GNU.sparse.name"
25+
paxGNUSparseMajor = "GNU.sparse.major"
26+
paxGNUSparseMinor = "GNU.sparse.minor"
27+
paxGNUSparseSize = "GNU.sparse.size"
28+
paxGNUSparseRealSize = "GNU.sparse.realsize"
29+
)
30+
31+
type sparseEntry struct{ Offset, Length int64 }
32+
33+
func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length }
34+
35+
// A sparse file can be represented as either a sparseDatas or a sparseHoles.
36+
// As long as the total size is known, they are equivalent and one can be
37+
// converted to the other form and back. The various tar formats with sparse
38+
// file support represent sparse files in the sparseDatas form. That is, they
39+
// specify the fragments in the file that has data, and treat everything else as
40+
// having zero bytes. As such, the encoding and decoding logic in this package
41+
// deals with sparseDatas.
42+
//
43+
// However, the external API uses sparseHoles instead of sparseDatas because the
44+
// zero value of sparseHoles logically represents a normal file (i.e., there are
45+
// no holes in it). On the other hand, the zero value of sparseDatas implies
46+
// that the file has no data in it, which is rather odd.
47+
//
48+
// As an example, if the underlying raw file contains the 10-byte data:
49+
//
50+
// var compactFile = "abcdefgh"
51+
//
52+
// And the sparse map has the following entries:
53+
//
54+
// var spd sparseDatas = []sparseEntry{
55+
// {Offset: 2, Length: 5}, // Data fragment for 2..6
56+
// {Offset: 18, Length: 3}, // Data fragment for 18..20
57+
// }
58+
// var sph sparseHoles = []sparseEntry{
59+
// {Offset: 0, Length: 2}, // Hole fragment for 0..1
60+
// {Offset: 7, Length: 11}, // Hole fragment for 7..17
61+
// {Offset: 21, Length: 4}, // Hole fragment for 21..24
62+
// }
63+
//
64+
// Then the content of the resulting sparse file with a Header.Size of 25 is:
65+
//
66+
// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
67+
type (
68+
sparseDatas []sparseEntry
69+
sparseHoles []sparseEntry
70+
)
71+
72+
func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry {
73+
dst := src[:0]
74+
var pre sparseEntry
75+
for _, cur := range src {
76+
if cur.Length == 0 {
77+
continue // Skip empty fragments
78+
}
79+
pre.Length = cur.Offset - pre.Offset
80+
if pre.Length > 0 {
81+
dst = append(dst, pre) // Only add non-empty fragments
82+
}
83+
pre.Offset = cur.endOffset()
84+
}
85+
pre.Length = size - pre.Offset // Possibly the only empty fragment
86+
return append(dst, pre)
87+
}
88+
89+
// end -- common.go
90+
91+
// start -- format.go
92+
93+
// Size constants from various tar specifications.
94+
const (
95+
blockSize = 512 // Size of each block in a tar stream
96+
nameSize = 100 // Max length of the name field in USTAR format
97+
prefixSize = 155 // Max length of the prefix field in USTAR format
98+
)
99+
100+
type block [blockSize]byte
101+
102+
type headerV7 [blockSize]byte
103+
104+
// Convert block to any number of formats.
105+
func (b *block) toV7() *headerV7 { return (*headerV7)(b) }
106+
func (b *block) toGNU() *headerGNU { return (*headerGNU)(b) }
107+
108+
// func (b *block) toSTAR() *headerSTAR { return (*headerSTAR)(b) }
109+
// func (b *block) toUSTAR() *headerUSTAR { return (*headerUSTAR)(b) }
110+
func (b *block) toSparse() sparseArray { return sparseArray(b[:]) }
111+
112+
// func (h *headerV7) name() []byte { return h[000:][:100] }
113+
// func (h *headerV7) mode() []byte { return h[100:][:8] }
114+
// func (h *headerV7) uid() []byte { return h[108:][:8] }
115+
// func (h *headerV7) gid() []byte { return h[116:][:8] }
116+
func (h *headerV7) size() []byte { return h[124:][:12] }
117+
118+
// func (h *headerV7) modTime() []byte { return h[136:][:12] }
119+
// func (h *headerV7) chksum() []byte { return h[148:][:8] }
120+
func (h *headerV7) typeFlag() []byte { return h[156:][:1] }
121+
122+
// func (h *headerV7) linkName() []byte { return h[157:][:100] }
123+
124+
type headerGNU [blockSize]byte
125+
126+
// func (h *headerGNU) v7() *headerV7 { return (*headerV7)(h) }
127+
// func (h *headerGNU) magic() []byte { return h[257:][:6] }
128+
// func (h *headerGNU) version() []byte { return h[263:][:2] }
129+
// func (h *headerGNU) userName() []byte { return h[265:][:32] }
130+
// func (h *headerGNU) groupName() []byte { return h[297:][:32] }
131+
// func (h *headerGNU) devMajor() []byte { return h[329:][:8] }
132+
// func (h *headerGNU) devMinor() []byte { return h[337:][:8] }
133+
// func (h *headerGNU) accessTime() []byte { return h[345:][:12] }
134+
// func (h *headerGNU) changeTime() []byte { return h[357:][:12] }
135+
func (h *headerGNU) sparse() sparseArray { return sparseArray(h[386:][:24*4+1]) }
136+
137+
// func (h *headerGNU) realSize() []byte { return h[483:][:12] }
138+
139+
// type headerSTAR [blockSize]byte
140+
141+
// func (h *headerSTAR) v7() *headerV7 { return (*headerV7)(h) }
142+
// func (h *headerSTAR) magic() []byte { return h[257:][:6] }
143+
// func (h *headerSTAR) version() []byte { return h[263:][:2] }
144+
// func (h *headerSTAR) userName() []byte { return h[265:][:32] }
145+
// func (h *headerSTAR) groupName() []byte { return h[297:][:32] }
146+
// func (h *headerSTAR) devMajor() []byte { return h[329:][:8] }
147+
// func (h *headerSTAR) devMinor() []byte { return h[337:][:8] }
148+
// func (h *headerSTAR) prefix() []byte { return h[345:][:131] }
149+
// func (h *headerSTAR) accessTime() []byte { return h[476:][:12] }
150+
// func (h *headerSTAR) changeTime() []byte { return h[488:][:12] }
151+
// func (h *headerSTAR) trailer() []byte { return h[508:][:4] }
152+
153+
// type headerUSTAR [blockSize]byte
154+
155+
// func (h *headerUSTAR) v7() *headerV7 { return (*headerV7)(h) }
156+
// func (h *headerUSTAR) magic() []byte { return h[257:][:6] }
157+
// func (h *headerUSTAR) version() []byte { return h[263:][:2] }
158+
// func (h *headerUSTAR) userName() []byte { return h[265:][:32] }
159+
// func (h *headerUSTAR) groupName() []byte { return h[297:][:32] }
160+
// func (h *headerUSTAR) devMajor() []byte { return h[329:][:8] }
161+
// func (h *headerUSTAR) devMinor() []byte { return h[337:][:8] }
162+
// func (h *headerUSTAR) prefix() []byte { return h[345:][:155] }
163+
164+
type sparseArray []byte
165+
166+
func (s sparseArray) entry(i int) sparseElem { return sparseElem(s[i*24:]) }
167+
func (s sparseArray) isExtended() []byte { return s[24*s.maxEntries():][:1] }
168+
func (s sparseArray) maxEntries() int { return len(s) / 24 }
169+
170+
type sparseElem []byte
171+
172+
func (s sparseElem) offset() []byte { return s[0o0:][:12] }
173+
func (s sparseElem) length() []byte { return s[12:][:12] }
174+
175+
// end -- format.go
176+
177+
// start -- reader.go
178+
179+
func mustReadFull(r io.Reader, b []byte) (int, error) {
180+
n, err := tryReadFull(r, b)
181+
if err == io.EOF {
182+
err = io.ErrUnexpectedEOF
183+
}
184+
return n, err
185+
}
186+
187+
func tryReadFull(r io.Reader, b []byte) (n int, err error) {
188+
for len(b) > n && err == nil {
189+
var nn int
190+
nn, err = r.Read(b[n:])
191+
n += nn
192+
}
193+
if len(b) == n && err == io.EOF {
194+
err = nil
195+
}
196+
return n, err
197+
}
198+
199+
func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
200+
var (
201+
cntNewline int64
202+
buf bytes.Buffer
203+
blk block
204+
)
205+
206+
// feedTokens copies data in blocks from r into buf until there are
207+
// at least cnt newlines in buf. It will not read more blocks than needed.
208+
feedTokens := func(n int64) error {
209+
for cntNewline < n {
210+
if _, err := mustReadFull(r, blk[:]); err != nil {
211+
return err
212+
}
213+
buf.Write(blk[:])
214+
for _, c := range blk {
215+
if c == '\n' {
216+
cntNewline++
217+
}
218+
}
219+
}
220+
return nil
221+
}
222+
223+
// nextToken gets the next token delimited by a newline. This assumes that
224+
// at least one newline exists in the buffer.
225+
nextToken := func() string {
226+
cntNewline--
227+
tok, _ := buf.ReadString('\n')
228+
return strings.TrimRight(tok, "\n")
229+
}
230+
231+
// Parse for the number of entries.
232+
// Use integer overflow resistant math to check this.
233+
if err := feedTokens(1); err != nil {
234+
return nil, err
235+
}
236+
numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
237+
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
238+
return nil, tar.ErrHeader
239+
}
240+
241+
// Parse for all member entries.
242+
// numEntries is trusted after this since a potential attacker must have
243+
// committed resources proportional to what this library used.
244+
if err := feedTokens(2 * numEntries); err != nil {
245+
return nil, err
246+
}
247+
spd := make(sparseDatas, 0, numEntries)
248+
for i := int64(0); i < numEntries; i++ {
249+
offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
250+
length, err2 := strconv.ParseInt(nextToken(), 10, 64)
251+
if err1 != nil || err2 != nil {
252+
return nil, tar.ErrHeader
253+
}
254+
spd = append(spd, sparseEntry{Offset: offset, Length: length})
255+
}
256+
return spd, nil
257+
}
258+
259+
func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) {
260+
// Get number of entries.
261+
// Use integer overflow resistant math to check this.
262+
numEntriesStr := paxHdrs[paxGNUSparseNumBlocks]
263+
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
264+
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
265+
return nil, tar.ErrHeader
266+
}
267+
268+
// There should be two numbers in sparseMap for each entry.
269+
sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",")
270+
if len(sparseMap) == 1 && sparseMap[0] == "" {
271+
sparseMap = sparseMap[:0]
272+
}
273+
if int64(len(sparseMap)) != 2*numEntries {
274+
return nil, tar.ErrHeader
275+
}
276+
277+
// Loop through the entries in the sparse map.
278+
// numEntries is trusted now.
279+
spd := make(sparseDatas, 0, numEntries)
280+
for len(sparseMap) >= 2 {
281+
offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
282+
length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
283+
if err1 != nil || err2 != nil {
284+
return nil, tar.ErrHeader
285+
}
286+
spd = append(spd, sparseEntry{Offset: offset, Length: length})
287+
sparseMap = sparseMap[2:]
288+
}
289+
return spd, nil
290+
}
291+
292+
// end -- reader.go
293+
294+
// start -- strconv.go
295+
296+
type parser struct {
297+
err error // Last error seen
298+
}
299+
300+
// parseString parses bytes as a NUL-terminated C-style string.
301+
// If a NUL byte is not found then the whole slice is returned as a string.
302+
func (*parser) parseString(b []byte) string {
303+
if i := bytes.IndexByte(b, 0); i >= 0 {
304+
return string(b[:i])
305+
}
306+
return string(b)
307+
}
308+
309+
// parseNumeric parses the input as being encoded in either base-256 or octal.
310+
// This function may return negative numbers.
311+
// If parsing fails or an integer overflow occurs, err will be set.
312+
func (p *parser) parseNumeric(b []byte) int64 {
313+
// Check for base-256 (binary) format first.
314+
// If the first bit is set, then all following bits constitute a two's
315+
// complement encoded number in big-endian byte order.
316+
if len(b) > 0 && b[0]&0x80 != 0 {
317+
// Handling negative numbers relies on the following identity:
318+
// -a-1 == ^a
319+
//
320+
// If the number is negative, we use an inversion mask to invert the
321+
// data bytes and treat the value as an unsigned number.
322+
var inv byte // 0x00 if positive or zero, 0xff if negative
323+
if b[0]&0x40 != 0 {
324+
inv = 0xff
325+
}
326+
327+
var x uint64
328+
for i, c := range b {
329+
c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
330+
if i == 0 {
331+
c &= 0x7f // Ignore signal bit in first byte
332+
}
333+
if (x >> 56) > 0 {
334+
p.err = tar.ErrHeader // Integer overflow
335+
return 0
336+
}
337+
x = x<<8 | uint64(c)
338+
}
339+
if (x >> 63) > 0 {
340+
p.err = tar.ErrHeader // Integer overflow
341+
return 0
342+
}
343+
if inv == 0xff {
344+
return ^int64(x)
345+
}
346+
return int64(x)
347+
}
348+
349+
// Normal case is base-8 (octal) format.
350+
return p.parseOctal(b)
351+
}
352+
353+
func (p *parser) parseOctal(b []byte) int64 {
354+
// Because unused fields are filled with NULs, we need
355+
// to skip leading NULs. Fields may also be padded with
356+
// spaces or NULs.
357+
// So we remove leading and trailing NULs and spaces to
358+
// be sure.
359+
b = bytes.Trim(b, " \x00")
360+
361+
if len(b) == 0 {
362+
return 0
363+
}
364+
x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
365+
if perr != nil {
366+
p.err = tar.ErrHeader
367+
}
368+
return int64(x)
369+
}
370+
371+
// end -- strconv.go

0 commit comments

Comments
 (0)