Merge pull request #396 from Serene-Arc/bug_fix_385

Add path limit check
This commit is contained in:
Ali Parlakçı 2021-06-06 13:24:22 +03:00 committed by GitHub
commit 6839c65bd6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 53 additions and 22 deletions

View File

@ -4,6 +4,7 @@ import datetime
import logging
import platform
import re
import subprocess
from pathlib import Path
from typing import Optional
@ -104,32 +105,46 @@ class FileNameFormatter:
) -> Path:
subfolder = Path(
destination_directory,
*[self._format_name(resource.source_submission, part) for part in self.directory_format_string]
*[self._format_name(resource.source_submission, part) for part in self.directory_format_string],
)
index = f'_{str(index)}' if index else ''
if not resource.extension:
raise BulkDownloaderException(f'Resource from {resource.url} has no extension')
ending = index + resource.extension
file_name = str(self._format_name(resource.source_submission, self.file_format_string))
file_name = self._limit_file_name_length(file_name, ending)
try:
file_path = Path(subfolder, file_name)
file_path = self._limit_file_name_length(file_name, ending, subfolder)
except TypeError:
raise BulkDownloaderException(f'Could not determine path name: {subfolder}, {index}, {resource.extension}')
return file_path
@staticmethod
def _limit_file_name_length(filename: str, ending: str) -> str:
def _limit_file_name_length(filename: str, ending: str, root: Path) -> Path:
root = root.resolve().expanduser()
possible_id = re.search(r'((?:_\w{6})?$)', filename)
if possible_id:
ending = possible_id.group(1) + ending
filename = filename[:possible_id.start()]
max_path = FileNameFormatter.find_max_path_length()
max_length_chars = 255 - len(ending)
max_length_bytes = 255 - len(ending.encode('utf-8'))
while len(filename) > max_length_chars or len(filename.encode('utf-8')) > max_length_bytes:
max_path_length = max_path - len(ending) - len(str(root)) - 1
while len(filename) > max_length_chars or \
len(filename.encode('utf-8')) > max_length_bytes or \
len(filename) > max_path_length:
filename = filename[:-1]
return filename + ending
return Path(root, filename + ending)
@staticmethod
def find_max_path_length() -> int:
try:
return int(subprocess.check_output(['getconf', 'PATH_MAX', '/']))
except (ValueError, subprocess.CalledProcessError, OSError):
if platform.system() == 'Windows':
return 260
else:
return 4096
def format_resource_paths(
self,

View File

@ -1,11 +1,12 @@
#!/usr/bin/env python3
# coding=utf-8
import platform
import unittest.mock
from datetime import datetime
from pathlib import Path
from typing import Optional
from unittest.mock import MagicMock
import platform
import praw.models
import pytest
@ -28,10 +29,10 @@ def submission() -> MagicMock:
return test
def do_test_string_equality(result: str, expected: str) -> bool:
def do_test_string_equality(result: [Path, str], expected: str) -> bool:
if platform.system() == 'Windows':
expected = FileNameFormatter._format_for_windows(expected)
return expected == result
return str(result).endswith(expected)
def do_test_path_equality(result: Path, expected: str) -> bool:
@ -41,7 +42,7 @@ def do_test_path_equality(result: Path, expected: str) -> bool:
expected = Path(*expected)
else:
expected = Path(expected)
return result == expected
return str(result).endswith(str(expected))
@pytest.fixture(scope='session')
@ -172,8 +173,9 @@ def test_format_multiple_resources():
mocks.append(new_mock)
test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
results = test_formatter.format_resource_paths(mocks, Path('.'))
results = set([str(res[0]) for res in results])
assert results == {'test_1.png', 'test_2.png', 'test_3.png', 'test_4.png'}
results = set([str(res[0].name) for res in results])
expected = {'test_1.png', 'test_2.png', 'test_3.png', 'test_4.png'}
assert results == expected
@pytest.mark.parametrize(('test_filename', 'test_ending'), (
@ -183,10 +185,11 @@ def test_format_multiple_resources():
('😍💕✨' * 100, '_1.png'),
))
def test_limit_filename_length(test_filename: str, test_ending: str):
result = FileNameFormatter._limit_file_name_length(test_filename, test_ending)
assert len(result) <= 255
assert len(result.encode('utf-8')) <= 255
assert isinstance(result, str)
result = FileNameFormatter._limit_file_name_length(test_filename, test_ending, Path('.'))
assert len(result.name) <= 255
assert len(result.name.encode('utf-8')) <= 255
assert len(str(result)) <= FileNameFormatter.find_max_path_length()
assert isinstance(result, Path)
@pytest.mark.parametrize(('test_filename', 'test_ending', 'expected_end'), (
@ -201,11 +204,11 @@ def test_limit_filename_length(test_filename: str, test_ending: str):
('😍💕✨' * 100 + '_aaa1aa', '_1.png', '_aaa1aa_1.png'),
))
def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str):
result = FileNameFormatter._limit_file_name_length(test_filename, test_ending)
assert len(result) <= 255
assert len(result.encode('utf-8')) <= 255
assert isinstance(result, str)
assert result.endswith(expected_end)
result = FileNameFormatter._limit_file_name_length(test_filename, test_ending, Path('.'))
assert len(result.name) <= 255
assert len(result.name.encode('utf-8')) <= 255
assert result.name.endswith(expected_end)
assert len(str(result)) <= FileNameFormatter.find_max_path_length()
def test_shorten_filenames(submission: MagicMock, tmp_path: Path):
@ -295,7 +298,7 @@ def test_format_archive_entry_comment(
test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO')
test_entry = Resource(test_comment, '', '.json')
result = test_formatter.format_path(test_entry, tmp_path)
assert do_test_string_equality(result.name, expected_name)
assert do_test_string_equality(result, expected_name)
@pytest.mark.parametrize(('test_folder_scheme', 'expected'), (
@ -364,3 +367,16 @@ def test_time_string_formats(test_time_format: str, expected: str):
test_formatter = FileNameFormatter('{TITLE}', '', test_time_format)
result = test_formatter._convert_timestamp(test_time.timestamp())
assert result == expected
def test_get_max_path_length():
result = FileNameFormatter.find_max_path_length()
assert result in (4096, 260, 1024)
def test_windows_max_path(tmp_path: Path):
with unittest.mock.patch('platform.system', return_value='Windows'):
with unittest.mock.patch('bdfr.file_name_formatter.FileNameFormatter.find_max_path_length', return_value=260):
result = FileNameFormatter._limit_file_name_length('test' * 100, '_1.png', tmp_path)
assert len(str(result)) <= 260
assert len(result.name) <= (260 - len(str(tmp_path)))