diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 1fdf989..4a87ae8 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -67,31 +67,44 @@ jobs: build: TARGET_CC=clang yarn build --target x86_64-unknown-linux-gnu --use-napi-cross - host: ubuntu-latest target: x86_64-unknown-linux-musl - build: yarn build --target x86_64-unknown-linux-musl -x + build: | + yarn build --target x86_64-unknown-linux-musl -x + rm tar.linux-x64-gnu.node - host: macos-latest target: aarch64-apple-darwin build: yarn build --target aarch64-apple-darwin - host: ubuntu-latest target: aarch64-unknown-linux-gnu - build: TARGET_CC=clang yarn build --target aarch64-unknown-linux-gnu --use-napi-cross + build: | + TARGET_CC=clang yarn build --target aarch64-unknown-linux-gnu --use-napi-cross + rm tar.linux-x64-gnu.node - host: ubuntu-latest target: armv7-unknown-linux-gnueabihf - build: TARGET_CC=clang yarn build --target armv7-unknown-linux-gnueabihf --use-napi-cross + build: | + TARGET_CC=clang yarn build --target armv7-unknown-linux-gnueabihf --use-napi-cross + rm tar.linux-x64-gnu.node - host: ubuntu-latest target: aarch64-linux-android - build: yarn build --target aarch64-linux-android + build: | + yarn build --target aarch64-linux-android + rm tar.linux-x64-gnu.node - host: ubuntu-latest target: armv7-linux-androideabi - build: yarn build --target armv7-linux-androideabi + build: | + yarn build --target armv7-linux-androideabi + rm tar.linux-x64-gnu.node - host: ubuntu-latest target: aarch64-unknown-linux-musl - build: yarn build --target aarch64-unknown-linux-musl -x + build: | + yarn build --target aarch64-unknown-linux-musl -x + rm tar.linux-x64-gnu.node - host: ubuntu-latest target: powerpc64le-unknown-linux-gnu build: | export CC=clang export TARGET_CC=clang yarn build --target powerpc64le-unknown-linux-gnu --use-napi-cross + rm tar.linux-x64-gnu.node - host: ubuntu-latest target: s390x-unknown-linux-gnu build: | @@ -99,6 +112,7 @@ jobs: export TARGET_CC=clang export CFLAGS="-fuse-ld=lld" yarn build --target s390x-unknown-linux-gnu --use-napi-cross + rm tar.linux-x64-gnu.node - host: windows-latest target: aarch64-pc-windows-msvc build: yarn build --target aarch64-pc-windows-msvc @@ -109,6 +123,7 @@ jobs: tar -xvf wasi-sdk-27.0-x86_64-linux.tar.gz export WASI_SDK_PATH="$(pwd)/wasi-sdk-27.0-x86_64-linux" yarn build --target wasm32-wasip1-threads + rm tar.linux-x64-gnu.node name: stable - ${{ matrix.settings.target }} - node@22 runs-on: ${{ matrix.settings.host }} steps: diff --git a/README.md b/README.md index 07902c0..1ba2ffc 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ export class Entries { } export class Entry { path(): string | null + asBytes(): Buffer } export class Archive { /** Create a new archive with the underlying path. */ @@ -34,6 +35,32 @@ export class Archive { } ``` +## Extract Single File + +You can extract a specific file from a tar archive without extracting the entire archive. This is useful for inspecting Docker OCI images or extracting specific configuration files: + +```ts +import { Archive } from '@napi-rs/tar' + +// Extract a single file (similar to: tar -x -O -f archive.tar filename) +function extractFile(archivePath: string, targetPath: string): Buffer | null { + const archive = new Archive(archivePath) + for (const entry of archive.entries()) { + if (entry.path() === targetPath) { + return entry.asBytes() + } + } + return null +} + +// Usage example +const indexContent = extractFile('./docker-image.tar', 'index.json') +if (indexContent) { + const manifest = JSON.parse(indexContent.toString('utf-8')) + console.log(manifest) +} +``` + ## Install this test package ``` diff --git a/__test__/index.spec.ts b/__test__/index.spec.ts index d62dd0c..3fae5fc 100644 --- a/__test__/index.spec.ts +++ b/__test__/index.spec.ts @@ -1,10 +1,10 @@ import { readFile } from 'node:fs/promises' import { join } from 'node:path' +import { fileURLToPath } from 'node:url' import test from 'ava' import { Archive } from '../index' -import { fileURLToPath } from 'node:url' const __dirname = join(fileURLToPath(import.meta.url), '..') @@ -16,6 +16,10 @@ test('should be able to read archive', (t) => { }) test('should be able to unpack archive', (t) => { + if (process.env.NAPI_RS_FORCE_WASI) { + t.pass('Skipping unpack test on WASI') + return + } const archive = new Archive(join(__dirname, 'src.tar')) archive.unpack(__dirname) t.pass() @@ -49,3 +53,120 @@ test('should be able to handle tar.xz', (t) => { t.is(typeof entry.path(), 'string') } }) + +test('should be able to extract single file with asBytes', (t) => { + const archive = new Archive(join(__dirname, 'src.tar')) + for (const entry of archive.entries()) { + const path = entry.path() + if (path === 'src/lib.rs') { + const content = entry.asBytes() + t.true(content instanceof Buffer, 'asBytes should return a Buffer') + t.true(content.length > 0, 'Content should not be empty') + + // The content should be valid Rust code, so let's check for some expected content + const contentStr = content.toString('utf-8') + t.true(contentStr.includes('use'), 'Should contain Rust use statements') + t.true(contentStr.includes('napi'), 'Should contain napi imports') + return + } + } + t.fail('Could not find src/lib.rs in the archive') +}) + +test('should be able to extract multiple files with asBytes', (t) => { + const archive = new Archive(join(__dirname, 'src.tar')) + const extractedFiles = new Map() + + for (const entry of archive.entries()) { + const path = entry.path() + if (path && path.endsWith('.rs')) { + const content = entry.asBytes() + extractedFiles.set(path, content) + } + } + + t.true(extractedFiles.size >= 2, 'Should extract at least 2 .rs files') + t.true(extractedFiles.has('src/lib.rs'), 'Should have extracted src/lib.rs') + t.true(extractedFiles.has('src/entry.rs'), 'Should have extracted src/entry.rs') + + // Verify all extracted content is non-empty and valid + for (const [path, content] of extractedFiles) { + t.true(content instanceof Buffer, `Content of ${path} should be a Buffer`) + t.true(content.length > 0, `Content of ${path} should not be empty`) + t.true(content.toString('utf-8').includes('use'), `${path} should contain Rust use statements`) + } +}) + +test('should work with asBytes on compressed archives', async (t) => { + const formats = ['src.tar.gz', 'src.tar.bz2', 'src.tar.xz'] + + for (const format of formats) { + const archive = new Archive(join(__dirname, format)) + let foundFile = false + + for (const entry of archive.entries()) { + const path = entry.path() + if (path === 'src/lib.rs') { + const content = entry.asBytes() + t.true(content instanceof Buffer, `asBytes should return Buffer for ${format}`) + t.true(content.length > 0, `Content should not be empty for ${format}`) + foundFile = true + break + } + } + + t.true(foundFile, `Should find src/lib.rs in ${format}`) + } +}) + +test('should work with asBytes from buffer-based archive', async (t) => { + const archiveBuffer = await readFile(join(__dirname, 'src.tar')) + const archive = new Archive(archiveBuffer) + + for (const entry of archive.entries()) { + const path = entry.path() + if (path === 'src/lib.rs') { + const content = entry.asBytes() + t.true(content instanceof Buffer, 'asBytes should return a Buffer') + t.true(content.length > 0, 'Content should not be empty') + + const contentStr = content.toString('utf-8') + t.true(contentStr.includes('napi'), 'Should contain napi imports') + return + } + } + t.fail('Could not find src/lib.rs in buffer-based archive') +}) + +test('Docker OCI use case - extract specific file like index.json', (t) => { + // This test demonstrates the exact use case mentioned in issue #58 + // where you want to extract a specific file from a tarball (like Docker OCI images) + + // Function to extract a specific file by name, similar to: tar -x -O -f something.tar index.json + function extractFile(archivePath: string, targetPath: string): Buffer | null { + const archive = new Archive(archivePath) + for (const entry of archive.entries()) { + const path = entry.path() + if (path === targetPath) { + return entry.asBytes() + } + } + return null + } + + const archivePath = join(__dirname, 'src.tar') + + // Extract src/lib.rs (simulating extracting index.json from a Docker image) + const libRsContent = extractFile(archivePath, 'src/lib.rs') + t.not(libRsContent, null, 'Should be able to extract src/lib.rs') + t.true(libRsContent instanceof Buffer, 'Extracted content should be a Buffer') + t.true(libRsContent!.length > 0, 'Extracted content should not be empty') + + // Verify the content is correct + const contentStr = libRsContent!.toString('utf-8') + t.true(contentStr.includes('#![deny(clippy::all)]'), 'Should contain expected Rust code') + + // Try to extract a non-existent file + const nonExistentContent = extractFile(archivePath, 'non-existent.json') + t.is(nonExistentContent, null, 'Should return null for non-existent files') +}) diff --git a/index.d.ts b/index.d.ts index 67dd217..ed7f6c0 100644 --- a/index.d.ts +++ b/index.d.ts @@ -102,6 +102,16 @@ export declare class Entry { */ path(): string | null header(): ReadonlyHeader + /** + * Read the entirety of this entry into a byte vector. + * + * This is equivalent to the functionality provided by `tar -x -O -f archive.tar filename` + * which extracts a single file and outputs its contents to stdout. + * + * This method will read the entire contents of this entry into memory. + * For large files, consider using streaming methods if memory usage is a concern. + */ + asBytes(): Buffer } export declare class Header { diff --git a/package.json b/package.json index 49d0939..5542437 100644 --- a/package.json +++ b/package.json @@ -64,6 +64,7 @@ "devDependencies": { "@napi-rs/cli": "^3.1.3", "@napi-rs/lzma": "^1.4.4", + "@napi-rs/wasm-runtime": "^1.0.3", "@oxc-node/core": "^0.0.32", "@taplo/cli": "^0.7.0", "@types/node": "^24.2.1", diff --git a/src/entry.rs b/src/entry.rs index d1f4a2f..d75a471 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -1,3 +1,5 @@ +use std::io::Read; + use napi::{ bindgen_prelude::{Env, Reference, SharedReference}, iterator::Generator, @@ -58,4 +60,18 @@ impl Entry { this.share_with(env, |e| Ok(e.inner.header()))?, )) } + + #[napi] + /// Read the entirety of this entry into a byte vector. + /// + /// This is equivalent to the functionality provided by `tar -x -O -f archive.tar filename` + /// which extracts a single file and outputs its contents to stdout. + /// + /// This method will read the entire contents of this entry into memory. + /// For large files, consider using streaming methods if memory usage is a concern. + pub fn as_bytes(&mut self) -> napi::Result { + let mut data = Vec::new(); + self.inner.read_to_end(&mut data)?; + Ok(data.into()) + } } diff --git a/yarn.lock b/yarn.lock index 7cf2068..96d9f57 100644 --- a/yarn.lock +++ b/yarn.lock @@ -560,6 +560,7 @@ __metadata: dependencies: "@napi-rs/cli": "npm:^3.1.3" "@napi-rs/lzma": "npm:^1.4.4" + "@napi-rs/wasm-runtime": "npm:^1.0.3" "@oxc-node/core": "npm:^0.0.32" "@taplo/cli": "npm:^0.7.0" "@types/node": "npm:^24.2.1"