Merge pull request #477 from Serene-Arc/bug_fix_472
This commit is contained in:
commit
fdda8f95e6
@ -5,6 +5,7 @@ import re
|
||||
from typing import Optional
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bdfr.exceptions import SiteDownloaderError
|
||||
@ -20,21 +21,21 @@ class Gallery(BaseDownloader):
|
||||
super().__init__(post)
|
||||
|
||||
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
|
||||
image_urls = self._get_links(self.post.url)
|
||||
image_urls = self._get_links(self.post.gallery_data['items'])
|
||||
if not image_urls:
|
||||
raise SiteDownloaderError('No images found in Reddit gallery')
|
||||
return [Resource(self.post, url) for url in image_urls]
|
||||
|
||||
@staticmethod
|
||||
def _get_links(url: str) -> list[str]:
|
||||
resource_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
|
||||
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||
}
|
||||
page = Gallery.retrieve_url(url, headers=resource_headers)
|
||||
soup = bs4.BeautifulSoup(page.text, 'html.parser')
|
||||
|
||||
links = soup.findAll('a', attrs={'target': '_blank', 'href': re.compile(r'https://preview\.redd\.it.*')})
|
||||
links = [link.get('href') for link in links]
|
||||
return links
|
||||
@ staticmethod
|
||||
def _get_links(id_dict: list[dict]) -> list[str]:
|
||||
out = []
|
||||
for item in id_dict:
|
||||
image_id = item['media_id']
|
||||
possible_extensions = ('.jpg', '.png', '.gif', '.gifv', '.jpeg')
|
||||
for extension in possible_extensions:
|
||||
test_url = f'https://i.redd.it/{image_id}{extension}'
|
||||
response = requests.head(test_url)
|
||||
if response.status_code == 200:
|
||||
out.append(test_url)
|
||||
break
|
||||
return out
|
||||
|
@ -37,9 +37,10 @@ class Imgur(BaseDownloader):
|
||||
|
||||
@staticmethod
|
||||
def _get_data(link: str) -> dict:
|
||||
if re.match(r'.*\.gifv$', link):
|
||||
link = link.rstrip('?')
|
||||
if re.match(r'(?i).*\.gifv$', link):
|
||||
link = link.replace('i.imgur', 'imgur')
|
||||
link = re.sub('\\.gifv$', '', link)
|
||||
link = re.sub('(?i)\\.gifv$', '', link)
|
||||
|
||||
res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'})
|
||||
|
||||
|
@ -8,30 +8,32 @@ from bdfr.site_downloaders.gallery import Gallery
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.parametrize(('test_url', 'expected'), (
|
||||
('https://www.reddit.com/gallery/m6lvrh', {
|
||||
'https://preview.redd.it/18nzv9ch0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=470a825b9c364e0eace0036882dcff926f821de8',
|
||||
'https://preview.redd.it/jqkizcch0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=ae4f552a18066bb6727676b14f2451c5feecf805',
|
||||
'https://preview.redd.it/k0fnqzbh0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=c6a10fececdc33983487c16ad02219fd3fc6cd76',
|
||||
'https://preview.redd.it/m3gamzbh0hn61.jpg?width=4160&'
|
||||
'format=pjpg&auto=webp&s=0dd90f324711851953e24873290b7f29ec73c444'
|
||||
@pytest.mark.parametrize(('test_ids', 'expected'), (
|
||||
([
|
||||
{'media_id': '18nzv9ch0hn61'},
|
||||
{'media_id': 'jqkizcch0hn61'},
|
||||
{'media_id': 'k0fnqzbh0hn61'},
|
||||
{'media_id': 'm3gamzbh0hn61'},
|
||||
], {
|
||||
'https://i.redd.it/18nzv9ch0hn61.jpg',
|
||||
'https://i.redd.it/jqkizcch0hn61.jpg',
|
||||
'https://i.redd.it/k0fnqzbh0hn61.jpg',
|
||||
'https://i.redd.it/m3gamzbh0hn61.jpg'
|
||||
}),
|
||||
('https://www.reddit.com/gallery/ljyy27', {
|
||||
'https://preview.redd.it/04vxj25uqih61.png?width=92&'
|
||||
'format=png&auto=webp&s=6513f3a5c5128ee7680d402cab5ea4fb2bbeead4',
|
||||
'https://preview.redd.it/0fnx83kpqih61.png?width=241&'
|
||||
'format=png&auto=webp&s=655e9deb6f499c9ba1476eaff56787a697e6255a',
|
||||
'https://preview.redd.it/7zkmr1wqqih61.png?width=237&'
|
||||
'format=png&auto=webp&s=19de214e634cbcad9959f19570c616e29be0c0b0',
|
||||
'https://preview.redd.it/u37k5gxrqih61.png?width=443&'
|
||||
'format=png&auto=webp&s=e74dae31841fe4a2545ffd794d3b25b9ff0eb862'
|
||||
([
|
||||
{'media_id': '04vxj25uqih61'},
|
||||
{'media_id': '0fnx83kpqih61'},
|
||||
{'media_id': '7zkmr1wqqih61'},
|
||||
{'media_id': 'u37k5gxrqih61'},
|
||||
], {
|
||||
'https://i.redd.it/04vxj25uqih61.png',
|
||||
'https://i.redd.it/0fnx83kpqih61.png',
|
||||
'https://i.redd.it/7zkmr1wqqih61.png',
|
||||
'https://i.redd.it/u37k5gxrqih61.png'
|
||||
}),
|
||||
))
|
||||
def test_gallery_get_links(test_url: str, expected: set[str]):
|
||||
results = Gallery._get_links(test_url)
|
||||
def test_gallery_get_links(test_ids: list[dict], expected: set[str]):
|
||||
results = Gallery._get_links(test_ids)
|
||||
assert set(results) == expected
|
||||
|
||||
|
||||
@ -39,16 +41,20 @@ def test_gallery_get_links(test_url: str, expected: set[str]):
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_submission_id', 'expected_hashes'), (
|
||||
('m6lvrh', {
|
||||
'6c8a892ae8066cbe119218bcaac731e1',
|
||||
'93ce177f8cb7994906795f4615114d13',
|
||||
'9a293adf19354f14582608cf22124574',
|
||||
'b73e2c3daee02f99404644ea02f1ae65'
|
||||
'5c42b8341dd56eebef792e86f3981c6a',
|
||||
'8f38d76da46f4057bf2773a778e725ca',
|
||||
'f5776f8f90491c8b770b8e0a6bfa49b3',
|
||||
'fa1a43c94da30026ad19a9813a0ed2c2',
|
||||
}),
|
||||
('ljyy27', {
|
||||
'1bc38bed88f9c4770e22a37122d5c941',
|
||||
'2539a92b78f3968a069df2dffe2279f9',
|
||||
'37dea50281c219b905e46edeefc1a18d',
|
||||
'ec4924cf40549728dcf53dd40bc7a73c'
|
||||
'359c203ec81d0bc00e675f1023673238',
|
||||
'79262fd46bce5bfa550d878a3b898be4',
|
||||
'808c35267f44acb523ce03bfa5687404',
|
||||
'ec8b65bdb7f1279c4b3af0ea2bbb30c3',
|
||||
}),
|
||||
('nxyahw', {
|
||||
'b89a3f41feb73ec1136ec4ffa7353eb1',
|
||||
'cabb76fd6fd11ae6e115a2039eb09f04',
|
||||
}),
|
||||
))
|
||||
def test_gallery_download(test_submission_id: str, expected_hashes: set[str], reddit_instance: praw.Reddit):
|
||||
|
@ -132,10 +132,16 @@ def test_imgur_extension_validation_bad(test_extension: str):
|
||||
),
|
||||
(
|
||||
'https://i.imgur.com/lFJai6i.gifv',
|
||||
(
|
||||
'01a6e79a30bec0e644e5da12365d5071',
|
||||
),
|
||||
)
|
||||
('01a6e79a30bec0e644e5da12365d5071',),
|
||||
),
|
||||
(
|
||||
'https://i.imgur.com/ywSyILa.gifv?',
|
||||
('56d4afc32d2966017c38d98568709b45',),
|
||||
),
|
||||
(
|
||||
'https://imgur.com/ubYwpbk.GIFV',
|
||||
('d4a774aac1667783f9ed3a1bd02fac0c',),
|
||||
),
|
||||
))
|
||||
def test_find_resources(test_url: str, expected_hashes: list[str]):
|
||||
mock_download = Mock()
|
||||
|
@ -31,6 +31,7 @@ def test_get_link(test_url: str, expected: str):
|
||||
('https://redgifs.com/watch/springgreendecisivetaruca', '8dac487ac49a1f18cc1b4dabe23f0869'),
|
||||
('https://www.gifdeliverynetwork.com/maturenexthippopotamus', '9bec0a9e4163a43781368ed5d70471df'),
|
||||
('https://www.gifdeliverynetwork.com/regalshoddyhorsechestnutleafminer', '8afb4e2c090a87140230f2352bf8beba'),
|
||||
('https://redgifs.com/watch/leafysaltydungbeetle', '076792c660b9c024c0471ef4759af8bd'),
|
||||
))
|
||||
def test_download_resource(test_url: str, expected_hash: str):
|
||||
mock_submission = Mock()
|
||||
|
Loading…
Reference in New Issue
Block a user