Merge pull request #477 from Serene-Arc/bug_fix_472

This commit is contained in:
Serene 2021-06-30 12:03:46 +10:00 committed by GitHub
commit fdda8f95e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 49 deletions

View File

@ -5,6 +5,7 @@ import re
from typing import Optional
import bs4
import requests
from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError
@ -20,21 +21,21 @@ class Gallery(BaseDownloader):
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
image_urls = self._get_links(self.post.url)
image_urls = self._get_links(self.post.gallery_data['items'])
if not image_urls:
raise SiteDownloaderError('No images found in Reddit gallery')
return [Resource(self.post, url) for url in image_urls]
@staticmethod
def _get_links(url: str) -> list[str]:
resource_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
}
page = Gallery.retrieve_url(url, headers=resource_headers)
soup = bs4.BeautifulSoup(page.text, 'html.parser')
links = soup.findAll('a', attrs={'target': '_blank', 'href': re.compile(r'https://preview\.redd\.it.*')})
links = [link.get('href') for link in links]
return links
@ staticmethod
def _get_links(id_dict: list[dict]) -> list[str]:
out = []
for item in id_dict:
image_id = item['media_id']
possible_extensions = ('.jpg', '.png', '.gif', '.gifv', '.jpeg')
for extension in possible_extensions:
test_url = f'https://i.redd.it/{image_id}{extension}'
response = requests.head(test_url)
if response.status_code == 200:
out.append(test_url)
break
return out

View File

@ -37,9 +37,10 @@ class Imgur(BaseDownloader):
@staticmethod
def _get_data(link: str) -> dict:
if re.match(r'.*\.gifv$', link):
link = link.rstrip('?')
if re.match(r'(?i).*\.gifv$', link):
link = link.replace('i.imgur', 'imgur')
link = re.sub('\\.gifv$', '', link)
link = re.sub('(?i)\\.gifv$', '', link)
res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'})

View File

@ -8,30 +8,32 @@ from bdfr.site_downloaders.gallery import Gallery
@pytest.mark.online
@pytest.mark.parametrize(('test_url', 'expected'), (
('https://www.reddit.com/gallery/m6lvrh', {
'https://preview.redd.it/18nzv9ch0hn61.jpg?width=4160&'
'format=pjpg&auto=webp&s=470a825b9c364e0eace0036882dcff926f821de8',
'https://preview.redd.it/jqkizcch0hn61.jpg?width=4160&'
'format=pjpg&auto=webp&s=ae4f552a18066bb6727676b14f2451c5feecf805',
'https://preview.redd.it/k0fnqzbh0hn61.jpg?width=4160&'
'format=pjpg&auto=webp&s=c6a10fececdc33983487c16ad02219fd3fc6cd76',
'https://preview.redd.it/m3gamzbh0hn61.jpg?width=4160&'
'format=pjpg&auto=webp&s=0dd90f324711851953e24873290b7f29ec73c444'
@pytest.mark.parametrize(('test_ids', 'expected'), (
([
{'media_id': '18nzv9ch0hn61'},
{'media_id': 'jqkizcch0hn61'},
{'media_id': 'k0fnqzbh0hn61'},
{'media_id': 'm3gamzbh0hn61'},
], {
'https://i.redd.it/18nzv9ch0hn61.jpg',
'https://i.redd.it/jqkizcch0hn61.jpg',
'https://i.redd.it/k0fnqzbh0hn61.jpg',
'https://i.redd.it/m3gamzbh0hn61.jpg'
}),
('https://www.reddit.com/gallery/ljyy27', {
'https://preview.redd.it/04vxj25uqih61.png?width=92&'
'format=png&auto=webp&s=6513f3a5c5128ee7680d402cab5ea4fb2bbeead4',
'https://preview.redd.it/0fnx83kpqih61.png?width=241&'
'format=png&auto=webp&s=655e9deb6f499c9ba1476eaff56787a697e6255a',
'https://preview.redd.it/7zkmr1wqqih61.png?width=237&'
'format=png&auto=webp&s=19de214e634cbcad9959f19570c616e29be0c0b0',
'https://preview.redd.it/u37k5gxrqih61.png?width=443&'
'format=png&auto=webp&s=e74dae31841fe4a2545ffd794d3b25b9ff0eb862'
([
{'media_id': '04vxj25uqih61'},
{'media_id': '0fnx83kpqih61'},
{'media_id': '7zkmr1wqqih61'},
{'media_id': 'u37k5gxrqih61'},
], {
'https://i.redd.it/04vxj25uqih61.png',
'https://i.redd.it/0fnx83kpqih61.png',
'https://i.redd.it/7zkmr1wqqih61.png',
'https://i.redd.it/u37k5gxrqih61.png'
}),
))
def test_gallery_get_links(test_url: str, expected: set[str]):
results = Gallery._get_links(test_url)
def test_gallery_get_links(test_ids: list[dict], expected: set[str]):
results = Gallery._get_links(test_ids)
assert set(results) == expected
@ -39,16 +41,20 @@ def test_gallery_get_links(test_url: str, expected: set[str]):
@pytest.mark.reddit
@pytest.mark.parametrize(('test_submission_id', 'expected_hashes'), (
('m6lvrh', {
'6c8a892ae8066cbe119218bcaac731e1',
'93ce177f8cb7994906795f4615114d13',
'9a293adf19354f14582608cf22124574',
'b73e2c3daee02f99404644ea02f1ae65'
'5c42b8341dd56eebef792e86f3981c6a',
'8f38d76da46f4057bf2773a778e725ca',
'f5776f8f90491c8b770b8e0a6bfa49b3',
'fa1a43c94da30026ad19a9813a0ed2c2',
}),
('ljyy27', {
'1bc38bed88f9c4770e22a37122d5c941',
'2539a92b78f3968a069df2dffe2279f9',
'37dea50281c219b905e46edeefc1a18d',
'ec4924cf40549728dcf53dd40bc7a73c'
'359c203ec81d0bc00e675f1023673238',
'79262fd46bce5bfa550d878a3b898be4',
'808c35267f44acb523ce03bfa5687404',
'ec8b65bdb7f1279c4b3af0ea2bbb30c3',
}),
('nxyahw', {
'b89a3f41feb73ec1136ec4ffa7353eb1',
'cabb76fd6fd11ae6e115a2039eb09f04',
}),
))
def test_gallery_download(test_submission_id: str, expected_hashes: set[str], reddit_instance: praw.Reddit):

View File

@ -132,10 +132,16 @@ def test_imgur_extension_validation_bad(test_extension: str):
),
(
'https://i.imgur.com/lFJai6i.gifv',
(
'01a6e79a30bec0e644e5da12365d5071',
),
)
('01a6e79a30bec0e644e5da12365d5071',),
),
(
'https://i.imgur.com/ywSyILa.gifv?',
('56d4afc32d2966017c38d98568709b45',),
),
(
'https://imgur.com/ubYwpbk.GIFV',
('d4a774aac1667783f9ed3a1bd02fac0c',),
),
))
def test_find_resources(test_url: str, expected_hashes: list[str]):
mock_download = Mock()

View File

@ -31,6 +31,7 @@ def test_get_link(test_url: str, expected: str):
('https://redgifs.com/watch/springgreendecisivetaruca', '8dac487ac49a1f18cc1b4dabe23f0869'),
('https://www.gifdeliverynetwork.com/maturenexthippopotamus', '9bec0a9e4163a43781368ed5d70471df'),
('https://www.gifdeliverynetwork.com/regalshoddyhorsechestnutleafminer', '8afb4e2c090a87140230f2352bf8beba'),
('https://redgifs.com/watch/leafysaltydungbeetle', '076792c660b9c024c0471ef4759af8bd'),
))
def test_download_resource(test_url: str, expected_hash: str):
mock_submission = Mock()