|
| 1 | +// Copyright 2009 The Go Authors. All rights reserved. |
| 2 | +// Use of this source code is governed by a BSD-style |
| 3 | +// license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +package tarfs |
| 6 | + |
| 7 | +import ( |
| 8 | + "archive/tar" |
| 9 | + "bytes" |
| 10 | + "io" |
| 11 | + "strconv" |
| 12 | + "strings" |
| 13 | +) |
| 14 | + |
| 15 | +// start -- common.go |
| 16 | + |
| 17 | +const ( |
| 18 | + // Keywords for GNU sparse files in a PAX extended header. |
| 19 | + paxGNUSparse = "GNU.sparse." |
| 20 | + paxGNUSparseNumBlocks = "GNU.sparse.numblocks" |
| 21 | + paxGNUSparseOffset = "GNU.sparse.offset" |
| 22 | + paxGNUSparseNumBytes = "GNU.sparse.numbytes" |
| 23 | + paxGNUSparseMap = "GNU.sparse.map" |
| 24 | + paxGNUSparseName = "GNU.sparse.name" |
| 25 | + paxGNUSparseMajor = "GNU.sparse.major" |
| 26 | + paxGNUSparseMinor = "GNU.sparse.minor" |
| 27 | + paxGNUSparseSize = "GNU.sparse.size" |
| 28 | + paxGNUSparseRealSize = "GNU.sparse.realsize" |
| 29 | +) |
| 30 | + |
| 31 | +type sparseEntry struct{ Offset, Length int64 } |
| 32 | + |
| 33 | +func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length } |
| 34 | + |
| 35 | +// A sparse file can be represented as either a sparseDatas or a sparseHoles. |
| 36 | +// As long as the total size is known, they are equivalent and one can be |
| 37 | +// converted to the other form and back. The various tar formats with sparse |
| 38 | +// file support represent sparse files in the sparseDatas form. That is, they |
| 39 | +// specify the fragments in the file that has data, and treat everything else as |
| 40 | +// having zero bytes. As such, the encoding and decoding logic in this package |
| 41 | +// deals with sparseDatas. |
| 42 | +// |
| 43 | +// However, the external API uses sparseHoles instead of sparseDatas because the |
| 44 | +// zero value of sparseHoles logically represents a normal file (i.e., there are |
| 45 | +// no holes in it). On the other hand, the zero value of sparseDatas implies |
| 46 | +// that the file has no data in it, which is rather odd. |
| 47 | +// |
| 48 | +// As an example, if the underlying raw file contains the 10-byte data: |
| 49 | +// |
| 50 | +// var compactFile = "abcdefgh" |
| 51 | +// |
| 52 | +// And the sparse map has the following entries: |
| 53 | +// |
| 54 | +// var spd sparseDatas = []sparseEntry{ |
| 55 | +// {Offset: 2, Length: 5}, // Data fragment for 2..6 |
| 56 | +// {Offset: 18, Length: 3}, // Data fragment for 18..20 |
| 57 | +// } |
| 58 | +// var sph sparseHoles = []sparseEntry{ |
| 59 | +// {Offset: 0, Length: 2}, // Hole fragment for 0..1 |
| 60 | +// {Offset: 7, Length: 11}, // Hole fragment for 7..17 |
| 61 | +// {Offset: 21, Length: 4}, // Hole fragment for 21..24 |
| 62 | +// } |
| 63 | +// |
| 64 | +// Then the content of the resulting sparse file with a Header.Size of 25 is: |
| 65 | +// |
| 66 | +// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 |
| 67 | +type ( |
| 68 | + sparseDatas []sparseEntry |
| 69 | + sparseHoles []sparseEntry |
| 70 | +) |
| 71 | + |
| 72 | +func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry { |
| 73 | + dst := src[:0] |
| 74 | + var pre sparseEntry |
| 75 | + for _, cur := range src { |
| 76 | + if cur.Length == 0 { |
| 77 | + continue // Skip empty fragments |
| 78 | + } |
| 79 | + pre.Length = cur.Offset - pre.Offset |
| 80 | + if pre.Length > 0 { |
| 81 | + dst = append(dst, pre) // Only add non-empty fragments |
| 82 | + } |
| 83 | + pre.Offset = cur.endOffset() |
| 84 | + } |
| 85 | + pre.Length = size - pre.Offset // Possibly the only empty fragment |
| 86 | + return append(dst, pre) |
| 87 | +} |
| 88 | + |
| 89 | +// end -- common.go |
| 90 | + |
| 91 | +// start -- format.go |
| 92 | + |
| 93 | +// Size constants from various tar specifications. |
| 94 | +const ( |
| 95 | + blockSize = 512 // Size of each block in a tar stream |
| 96 | + nameSize = 100 // Max length of the name field in USTAR format |
| 97 | + prefixSize = 155 // Max length of the prefix field in USTAR format |
| 98 | +) |
| 99 | + |
| 100 | +type block [blockSize]byte |
| 101 | + |
| 102 | +type headerV7 [blockSize]byte |
| 103 | + |
| 104 | +// Convert block to any number of formats. |
| 105 | +func (b *block) toV7() *headerV7 { return (*headerV7)(b) } |
| 106 | +func (b *block) toGNU() *headerGNU { return (*headerGNU)(b) } |
| 107 | + |
| 108 | +// func (b *block) toSTAR() *headerSTAR { return (*headerSTAR)(b) } |
| 109 | +// func (b *block) toUSTAR() *headerUSTAR { return (*headerUSTAR)(b) } |
| 110 | +func (b *block) toSparse() sparseArray { return sparseArray(b[:]) } |
| 111 | + |
| 112 | +// func (h *headerV7) name() []byte { return h[000:][:100] } |
| 113 | +// func (h *headerV7) mode() []byte { return h[100:][:8] } |
| 114 | +// func (h *headerV7) uid() []byte { return h[108:][:8] } |
| 115 | +// func (h *headerV7) gid() []byte { return h[116:][:8] } |
| 116 | +func (h *headerV7) size() []byte { return h[124:][:12] } |
| 117 | + |
| 118 | +// func (h *headerV7) modTime() []byte { return h[136:][:12] } |
| 119 | +// func (h *headerV7) chksum() []byte { return h[148:][:8] } |
| 120 | +func (h *headerV7) typeFlag() []byte { return h[156:][:1] } |
| 121 | + |
| 122 | +// func (h *headerV7) linkName() []byte { return h[157:][:100] } |
| 123 | + |
| 124 | +type headerGNU [blockSize]byte |
| 125 | + |
| 126 | +// func (h *headerGNU) v7() *headerV7 { return (*headerV7)(h) } |
| 127 | +// func (h *headerGNU) magic() []byte { return h[257:][:6] } |
| 128 | +// func (h *headerGNU) version() []byte { return h[263:][:2] } |
| 129 | +// func (h *headerGNU) userName() []byte { return h[265:][:32] } |
| 130 | +// func (h *headerGNU) groupName() []byte { return h[297:][:32] } |
| 131 | +// func (h *headerGNU) devMajor() []byte { return h[329:][:8] } |
| 132 | +// func (h *headerGNU) devMinor() []byte { return h[337:][:8] } |
| 133 | +// func (h *headerGNU) accessTime() []byte { return h[345:][:12] } |
| 134 | +// func (h *headerGNU) changeTime() []byte { return h[357:][:12] } |
| 135 | +func (h *headerGNU) sparse() sparseArray { return sparseArray(h[386:][:24*4+1]) } |
| 136 | + |
| 137 | +// func (h *headerGNU) realSize() []byte { return h[483:][:12] } |
| 138 | + |
| 139 | +// type headerSTAR [blockSize]byte |
| 140 | + |
| 141 | +// func (h *headerSTAR) v7() *headerV7 { return (*headerV7)(h) } |
| 142 | +// func (h *headerSTAR) magic() []byte { return h[257:][:6] } |
| 143 | +// func (h *headerSTAR) version() []byte { return h[263:][:2] } |
| 144 | +// func (h *headerSTAR) userName() []byte { return h[265:][:32] } |
| 145 | +// func (h *headerSTAR) groupName() []byte { return h[297:][:32] } |
| 146 | +// func (h *headerSTAR) devMajor() []byte { return h[329:][:8] } |
| 147 | +// func (h *headerSTAR) devMinor() []byte { return h[337:][:8] } |
| 148 | +// func (h *headerSTAR) prefix() []byte { return h[345:][:131] } |
| 149 | +// func (h *headerSTAR) accessTime() []byte { return h[476:][:12] } |
| 150 | +// func (h *headerSTAR) changeTime() []byte { return h[488:][:12] } |
| 151 | +// func (h *headerSTAR) trailer() []byte { return h[508:][:4] } |
| 152 | + |
| 153 | +// type headerUSTAR [blockSize]byte |
| 154 | + |
| 155 | +// func (h *headerUSTAR) v7() *headerV7 { return (*headerV7)(h) } |
| 156 | +// func (h *headerUSTAR) magic() []byte { return h[257:][:6] } |
| 157 | +// func (h *headerUSTAR) version() []byte { return h[263:][:2] } |
| 158 | +// func (h *headerUSTAR) userName() []byte { return h[265:][:32] } |
| 159 | +// func (h *headerUSTAR) groupName() []byte { return h[297:][:32] } |
| 160 | +// func (h *headerUSTAR) devMajor() []byte { return h[329:][:8] } |
| 161 | +// func (h *headerUSTAR) devMinor() []byte { return h[337:][:8] } |
| 162 | +// func (h *headerUSTAR) prefix() []byte { return h[345:][:155] } |
| 163 | + |
| 164 | +type sparseArray []byte |
| 165 | + |
| 166 | +func (s sparseArray) entry(i int) sparseElem { return sparseElem(s[i*24:]) } |
| 167 | +func (s sparseArray) isExtended() []byte { return s[24*s.maxEntries():][:1] } |
| 168 | +func (s sparseArray) maxEntries() int { return len(s) / 24 } |
| 169 | + |
| 170 | +type sparseElem []byte |
| 171 | + |
| 172 | +func (s sparseElem) offset() []byte { return s[0o0:][:12] } |
| 173 | +func (s sparseElem) length() []byte { return s[12:][:12] } |
| 174 | + |
| 175 | +// end -- format.go |
| 176 | + |
| 177 | +// start -- reader.go |
| 178 | + |
| 179 | +func mustReadFull(r io.Reader, b []byte) (int, error) { |
| 180 | + n, err := tryReadFull(r, b) |
| 181 | + if err == io.EOF { |
| 182 | + err = io.ErrUnexpectedEOF |
| 183 | + } |
| 184 | + return n, err |
| 185 | +} |
| 186 | + |
| 187 | +func tryReadFull(r io.Reader, b []byte) (n int, err error) { |
| 188 | + for len(b) > n && err == nil { |
| 189 | + var nn int |
| 190 | + nn, err = r.Read(b[n:]) |
| 191 | + n += nn |
| 192 | + } |
| 193 | + if len(b) == n && err == io.EOF { |
| 194 | + err = nil |
| 195 | + } |
| 196 | + return n, err |
| 197 | +} |
| 198 | + |
| 199 | +func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { |
| 200 | + var ( |
| 201 | + cntNewline int64 |
| 202 | + buf bytes.Buffer |
| 203 | + blk block |
| 204 | + ) |
| 205 | + |
| 206 | + // feedTokens copies data in blocks from r into buf until there are |
| 207 | + // at least cnt newlines in buf. It will not read more blocks than needed. |
| 208 | + feedTokens := func(n int64) error { |
| 209 | + for cntNewline < n { |
| 210 | + if _, err := mustReadFull(r, blk[:]); err != nil { |
| 211 | + return err |
| 212 | + } |
| 213 | + buf.Write(blk[:]) |
| 214 | + for _, c := range blk { |
| 215 | + if c == '\n' { |
| 216 | + cntNewline++ |
| 217 | + } |
| 218 | + } |
| 219 | + } |
| 220 | + return nil |
| 221 | + } |
| 222 | + |
| 223 | + // nextToken gets the next token delimited by a newline. This assumes that |
| 224 | + // at least one newline exists in the buffer. |
| 225 | + nextToken := func() string { |
| 226 | + cntNewline-- |
| 227 | + tok, _ := buf.ReadString('\n') |
| 228 | + return strings.TrimRight(tok, "\n") |
| 229 | + } |
| 230 | + |
| 231 | + // Parse for the number of entries. |
| 232 | + // Use integer overflow resistant math to check this. |
| 233 | + if err := feedTokens(1); err != nil { |
| 234 | + return nil, err |
| 235 | + } |
| 236 | + numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int |
| 237 | + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { |
| 238 | + return nil, tar.ErrHeader |
| 239 | + } |
| 240 | + |
| 241 | + // Parse for all member entries. |
| 242 | + // numEntries is trusted after this since a potential attacker must have |
| 243 | + // committed resources proportional to what this library used. |
| 244 | + if err := feedTokens(2 * numEntries); err != nil { |
| 245 | + return nil, err |
| 246 | + } |
| 247 | + spd := make(sparseDatas, 0, numEntries) |
| 248 | + for i := int64(0); i < numEntries; i++ { |
| 249 | + offset, err1 := strconv.ParseInt(nextToken(), 10, 64) |
| 250 | + length, err2 := strconv.ParseInt(nextToken(), 10, 64) |
| 251 | + if err1 != nil || err2 != nil { |
| 252 | + return nil, tar.ErrHeader |
| 253 | + } |
| 254 | + spd = append(spd, sparseEntry{Offset: offset, Length: length}) |
| 255 | + } |
| 256 | + return spd, nil |
| 257 | +} |
| 258 | + |
| 259 | +func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { |
| 260 | + // Get number of entries. |
| 261 | + // Use integer overflow resistant math to check this. |
| 262 | + numEntriesStr := paxHdrs[paxGNUSparseNumBlocks] |
| 263 | + numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int |
| 264 | + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { |
| 265 | + return nil, tar.ErrHeader |
| 266 | + } |
| 267 | + |
| 268 | + // There should be two numbers in sparseMap for each entry. |
| 269 | + sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",") |
| 270 | + if len(sparseMap) == 1 && sparseMap[0] == "" { |
| 271 | + sparseMap = sparseMap[:0] |
| 272 | + } |
| 273 | + if int64(len(sparseMap)) != 2*numEntries { |
| 274 | + return nil, tar.ErrHeader |
| 275 | + } |
| 276 | + |
| 277 | + // Loop through the entries in the sparse map. |
| 278 | + // numEntries is trusted now. |
| 279 | + spd := make(sparseDatas, 0, numEntries) |
| 280 | + for len(sparseMap) >= 2 { |
| 281 | + offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64) |
| 282 | + length, err2 := strconv.ParseInt(sparseMap[1], 10, 64) |
| 283 | + if err1 != nil || err2 != nil { |
| 284 | + return nil, tar.ErrHeader |
| 285 | + } |
| 286 | + spd = append(spd, sparseEntry{Offset: offset, Length: length}) |
| 287 | + sparseMap = sparseMap[2:] |
| 288 | + } |
| 289 | + return spd, nil |
| 290 | +} |
| 291 | + |
| 292 | +// end -- reader.go |
| 293 | + |
| 294 | +// start -- strconv.go |
| 295 | + |
| 296 | +type parser struct { |
| 297 | + err error // Last error seen |
| 298 | +} |
| 299 | + |
| 300 | +// parseString parses bytes as a NUL-terminated C-style string. |
| 301 | +// If a NUL byte is not found then the whole slice is returned as a string. |
| 302 | +func (*parser) parseString(b []byte) string { |
| 303 | + if i := bytes.IndexByte(b, 0); i >= 0 { |
| 304 | + return string(b[:i]) |
| 305 | + } |
| 306 | + return string(b) |
| 307 | +} |
| 308 | + |
| 309 | +// parseNumeric parses the input as being encoded in either base-256 or octal. |
| 310 | +// This function may return negative numbers. |
| 311 | +// If parsing fails or an integer overflow occurs, err will be set. |
| 312 | +func (p *parser) parseNumeric(b []byte) int64 { |
| 313 | + // Check for base-256 (binary) format first. |
| 314 | + // If the first bit is set, then all following bits constitute a two's |
| 315 | + // complement encoded number in big-endian byte order. |
| 316 | + if len(b) > 0 && b[0]&0x80 != 0 { |
| 317 | + // Handling negative numbers relies on the following identity: |
| 318 | + // -a-1 == ^a |
| 319 | + // |
| 320 | + // If the number is negative, we use an inversion mask to invert the |
| 321 | + // data bytes and treat the value as an unsigned number. |
| 322 | + var inv byte // 0x00 if positive or zero, 0xff if negative |
| 323 | + if b[0]&0x40 != 0 { |
| 324 | + inv = 0xff |
| 325 | + } |
| 326 | + |
| 327 | + var x uint64 |
| 328 | + for i, c := range b { |
| 329 | + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing |
| 330 | + if i == 0 { |
| 331 | + c &= 0x7f // Ignore signal bit in first byte |
| 332 | + } |
| 333 | + if (x >> 56) > 0 { |
| 334 | + p.err = tar.ErrHeader // Integer overflow |
| 335 | + return 0 |
| 336 | + } |
| 337 | + x = x<<8 | uint64(c) |
| 338 | + } |
| 339 | + if (x >> 63) > 0 { |
| 340 | + p.err = tar.ErrHeader // Integer overflow |
| 341 | + return 0 |
| 342 | + } |
| 343 | + if inv == 0xff { |
| 344 | + return ^int64(x) |
| 345 | + } |
| 346 | + return int64(x) |
| 347 | + } |
| 348 | + |
| 349 | + // Normal case is base-8 (octal) format. |
| 350 | + return p.parseOctal(b) |
| 351 | +} |
| 352 | + |
| 353 | +func (p *parser) parseOctal(b []byte) int64 { |
| 354 | + // Because unused fields are filled with NULs, we need |
| 355 | + // to skip leading NULs. Fields may also be padded with |
| 356 | + // spaces or NULs. |
| 357 | + // So we remove leading and trailing NULs and spaces to |
| 358 | + // be sure. |
| 359 | + b = bytes.Trim(b, " \x00") |
| 360 | + |
| 361 | + if len(b) == 0 { |
| 362 | + return 0 |
| 363 | + } |
| 364 | + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) |
| 365 | + if perr != nil { |
| 366 | + p.err = tar.ErrHeader |
| 367 | + } |
| 368 | + return int64(x) |
| 369 | +} |
| 370 | + |
| 371 | +// end -- strconv.go |
0 commit comments