Skip to content

Commit ac26599

Browse files
authored
#404: avoid unsafe tar file extraction (#410)
1 parent 3e0ed4e commit ac26599

File tree

2 files changed

+37
-6
lines changed

2 files changed

+37
-6
lines changed

recirq/documentation_utils.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import inspect
16+
import io
1617
import os
1718
import re
1819
import tarfile
@@ -67,6 +68,14 @@ def fetch_guide_data_collection_data(base_dir=None):
6768

6869
print("Downloading guide/data_collection data.")
6970
stream = urllib.request.urlopen("https://ndownloader.figshare.com/files/25541666")
70-
71-
with tarfile.open(fileobj=stream, mode='r|xz') as tf:
72-
tf.extractall(path=base_dir)
71+
# Read into a BytesIO object to make it seekable
72+
stream_bytes = io.BytesIO(stream.read())
73+
74+
with tarfile.open(fileobj=stream_bytes, mode='r|xz') as tf:
75+
for member in tf.getmembers():
76+
# Ensure the path being extracted is safe.
77+
if not os.path.abspath(os.path.join(base_dir, member.name)).startswith(
78+
os.path.abspath(base_dir)
79+
):
80+
raise ValueError(f"Encountered untrusted path {member.name}")
81+
tf.extract(member, path=base_dir)

recirq/documentation_utils_test.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
import os
15+
import io
16+
import tarfile
17+
from unittest import mock
18+
19+
import pytest
1520

1621
import recirq
1722
from recirq.readout_scan.tasks import ReadoutScanTask
@@ -35,6 +40,23 @@ def test_display_markdown_docstring():
3540
"""
3641

3742

38-
def test_fetch_guide_data_collection_data(tmpdir):
39-
recirq.fetch_guide_data_collection_data(base_dir=tmpdir)
40-
assert os.path.exists(f'{tmpdir}/2020-02-tutorial')
43+
@mock.patch('urllib.request.urlopen')
44+
def test_fetch_guide_data_collection_data_traversal(mock_urlopen, tmpdir):
45+
# Create a malicious tarball in memory.
46+
malicious_tar_stream = io.BytesIO()
47+
with tarfile.open(fileobj=malicious_tar_stream, mode='w:xz') as tf:
48+
# Add a file that tries to write outside the target directory
49+
malicious_info = tarfile.TarInfo(name="../../tmp/pwned")
50+
tf.addfile(malicious_info, io.BytesIO(b"pwned"))
51+
malicious_tar_stream.seek(0)
52+
53+
# Read the stream into a BytesIO object so that the mock should return a
54+
# response object whose read() method returns the tarball content.
55+
mock_response = mock.Mock()
56+
mock_response.read.return_value = malicious_tar_stream.getvalue()
57+
mock_urlopen.return_value = mock_response
58+
59+
with pytest.raises(ValueError, match="Encountered untrusted path"):
60+
recirq.fetch_guide_data_collection_data(base_dir=tmpdir)
61+
62+
assert not os.path.exists('/tmp/pwned')

0 commit comments

Comments
 (0)