9849c0439b
Minimalize README
663 lines
22 KiB
Python
663 lines
22 KiB
Python
#!/usr/bin/env python
|
|
|
|
"""
|
|
This program downloads imgur, gfycat and direct image and video links of
|
|
saved posts from a reddit account. It is written in Python 3.
|
|
"""
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
from io import StringIO
|
|
from pathlib import Path, PurePath
|
|
|
|
from src.downloader import Direct, Gfycat, Imgur, Self
|
|
from src.errors import *
|
|
from src.parser import LinkDesigner
|
|
from src.searcher import getPosts
|
|
from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
|
|
printToFile)
|
|
|
|
__author__ = "Ali Parlakci"
|
|
__license__ = "GPL"
|
|
__version__ = "1.0.1"
|
|
__maintainer__ = "Ali Parlakci"
|
|
__email__ = "parlakciali@gmail.com"
|
|
|
|
def getConfig(configFileName):
|
|
"""Read credentials from config.json file"""
|
|
|
|
keys = ['imgur_client_id',
|
|
'imgur_client_secret']
|
|
|
|
if os.path.exists(configFileName):
|
|
FILE = jsonFile(configFileName)
|
|
content = FILE.read()
|
|
if "reddit_refresh_token" in content:
|
|
if content["reddit_refresh_token"] == "":
|
|
FILE.delete("reddit_refresh_token")
|
|
for key in keys:
|
|
try:
|
|
if content[key] == "":
|
|
raise KeyError
|
|
except KeyError:
|
|
print(key,": ")
|
|
FILE.add({key:input()})
|
|
return jsonFile(configFileName).read()
|
|
|
|
else:
|
|
FILE = jsonFile(configFileName)
|
|
configDictionary = {}
|
|
for key in keys:
|
|
configDictionary[key] = input(key + ": ")
|
|
FILE.add(configDictionary)
|
|
return FILE.read()
|
|
|
|
def parseArguments(arguments=[]):
|
|
"""Initialize argparse and add arguments"""
|
|
|
|
parser = argparse.ArgumentParser(allow_abbrev=False,
|
|
description="This program downloads " \
|
|
"media from reddit " \
|
|
"posts")
|
|
parser.add_argument("--directory",
|
|
help="Specifies the directory where posts will be " \
|
|
"downloaded to",
|
|
metavar="DIRECTORY")
|
|
|
|
parser.add_argument("--link","-l",
|
|
help="Get posts from link",
|
|
metavar="link")
|
|
|
|
parser.add_argument("--saved",
|
|
action="store_true",
|
|
help="Triggers saved mode")
|
|
|
|
parser.add_argument("--submitted",
|
|
action="store_true",
|
|
help="Gets posts of --user")
|
|
|
|
parser.add_argument("--upvoted",
|
|
action="store_true",
|
|
help="Gets upvoted posts of --user")
|
|
|
|
parser.add_argument("--log",
|
|
help="Takes a log file which created by itself " \
|
|
"(json files), reads posts and tries downloadin" \
|
|
"g them again.",
|
|
# type=argparse.FileType('r'),
|
|
metavar="LOG FILE")
|
|
|
|
parser.add_argument("--subreddit",
|
|
nargs="+",
|
|
help="Triggers subreddit mode and takes subreddit's " \
|
|
"name without r/. use \"frontpage\" for frontpage",
|
|
metavar="SUBREDDIT",
|
|
type=str)
|
|
|
|
parser.add_argument("--multireddit",
|
|
help="Triggers multireddit mode and takes "\
|
|
"multireddit's name without m/",
|
|
metavar="MULTIREDDIT",
|
|
type=str)
|
|
|
|
parser.add_argument("--user",
|
|
help="reddit username if needed. use \"me\" for " \
|
|
"current user",
|
|
required="--multireddit" in sys.argv or \
|
|
"--submitted" in sys.argv,
|
|
metavar="redditor",
|
|
type=str)
|
|
|
|
parser.add_argument("--search",
|
|
help="Searches for given query in given subreddits",
|
|
metavar="query",
|
|
type=str)
|
|
|
|
parser.add_argument("--sort",
|
|
help="Either hot, top, new, controversial, rising " \
|
|
"or relevance default: hot",
|
|
choices=[
|
|
"hot","top","new","controversial","rising",
|
|
"relevance"
|
|
],
|
|
metavar="SORT TYPE",
|
|
type=str)
|
|
|
|
parser.add_argument("--limit",
|
|
help="default: unlimited",
|
|
metavar="Limit",
|
|
type=int)
|
|
|
|
parser.add_argument("--time",
|
|
help="Either hour, day, week, month, year or all." \
|
|
" default: all",
|
|
choices=["all","hour","day","week","month","year"],
|
|
metavar="TIME_LIMIT",
|
|
type=str)
|
|
|
|
parser.add_argument("--NoDownload",
|
|
help="Just gets the posts and store them in a file" \
|
|
" for downloading later",
|
|
action="store_true",
|
|
default=False)
|
|
|
|
if arguments == []:
|
|
return parser.parse_args()
|
|
else:
|
|
return parser.parse_args(arguments)
|
|
|
|
def checkConflicts():
|
|
"""Check if command-line arguments are given correcly,
|
|
if not, raise errors
|
|
"""
|
|
|
|
if GLOBAL.arguments.user is None:
|
|
user = 0
|
|
else:
|
|
user = 1
|
|
|
|
modes = ["saved","subreddit","submitted","search","log","link","upvoted"]
|
|
|
|
values = {
|
|
x: 0 if getattr(GLOBAL.arguments,x) is None or \
|
|
getattr(GLOBAL.arguments,x) is False \
|
|
else 1 \
|
|
for x in modes
|
|
}
|
|
|
|
if not sum(values[x] for x in values) == 1:
|
|
raise ProgramModeError("Invalid program mode")
|
|
|
|
if values["search"]+values["saved"] == 2:
|
|
raise SearchModeError("You cannot search in your saved posts")
|
|
|
|
if values["search"]+values["submitted"] == 2:
|
|
raise SearchModeError("You cannot search in submitted posts")
|
|
|
|
if values["search"]+values["upvoted"] == 2:
|
|
raise SearchModeError("You cannot search in upvoted posts")
|
|
|
|
if values["upvoted"]+values["submitted"] == 1 and user == 0:
|
|
raise RedditorNameError("No redditor name given")
|
|
|
|
class PromptUser:
|
|
@staticmethod
|
|
def chooseFrom(choices):
|
|
print()
|
|
choicesByIndex = list(str(x) for x in range(len(choices)+1))
|
|
for i in range(len(choices)):
|
|
print("{indent}[{order}] {mode}".format(
|
|
indent=" "*4,order=i+1,mode=choices[i]
|
|
))
|
|
print(" "*4+"[0] exit\n")
|
|
choice = input("> ")
|
|
while not choice.lower() in choices+choicesByIndex:
|
|
print("Invalid input\n")
|
|
programModeIndex = input("> ")
|
|
|
|
if choice == "0":
|
|
quit()
|
|
elif choice in choicesByIndex:
|
|
return choices[int(choice)-1]
|
|
else:
|
|
return choice
|
|
|
|
def __init__(self):
|
|
print("select program mode:")
|
|
programModes = [
|
|
"search","subreddit","multireddit",
|
|
"submitted","upvoted","saved","log"
|
|
]
|
|
programMode = self.chooseFrom(programModes)
|
|
|
|
if programMode == "search":
|
|
GLOBAL.arguments.search = input("\nquery: ")
|
|
GLOBAL.arguments.subreddit = input("\nsubreddit: ")
|
|
|
|
print("\nselect sort type:")
|
|
sortTypes = [
|
|
"relevance","top","new"
|
|
]
|
|
sortType = self.chooseFrom(sortTypes)
|
|
GLOBAL.arguments.sort = sortType
|
|
|
|
print("\nselect time filter:")
|
|
timeFilters = [
|
|
"hour","day","week","month","year","all"
|
|
]
|
|
timeFilter = self.chooseFrom(timeFilters)
|
|
GLOBAL.arguments.time = timeFilter
|
|
|
|
if programMode == "subreddit":
|
|
GLOBAL.arguments.subreddit = input("\nsubreddit: ")
|
|
if " " in GLOBAL.arguments.subreddit:
|
|
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
|
|
|
|
print("\nselect sort type:")
|
|
sortTypes = [
|
|
"hot","top","new","rising","controversial"
|
|
]
|
|
sortType = self.chooseFrom(sortTypes)
|
|
GLOBAL.arguments.sort = sortType
|
|
|
|
if sortType in ["top","controversial"]:
|
|
print("\nselect time filter:")
|
|
timeFilters = [
|
|
"hour","day","week","month","year","all"
|
|
]
|
|
timeFilter = self.chooseFrom(timeFilters)
|
|
GLOBAL.arguments.time = timeFilter
|
|
else:
|
|
GLOBAL.arguments.time = "all"
|
|
|
|
elif programMode == "multireddit":
|
|
GLOBAL.arguments.user = input("\nredditor: ")
|
|
GLOBAL.arguments.subreddit = input("\nmultireddit: ")
|
|
|
|
print("\nselect sort type:")
|
|
sortTypes = [
|
|
"hot","top","new","rising","controversial"
|
|
]
|
|
sortType = self.chooseFrom(sortTypes)
|
|
GLOBAL.arguments.sort = sortType
|
|
|
|
if sortType in ["top","controversial"]:
|
|
print("\nselect time filter:")
|
|
timeFilters = [
|
|
"hour","day","week","month","year","all"
|
|
]
|
|
timeFilter = self.chooseFrom(timeFilters)
|
|
GLOBAL.arguments.time = timeFilter
|
|
else:
|
|
GLOBAL.arguments.time = "all"
|
|
|
|
elif programMode == "submitted":
|
|
GLOBAL.arguments.submitted = True
|
|
GLOBAL.arguments.user = input("\nredditor: ")
|
|
|
|
print("\nselect sort type:")
|
|
sortTypes = [
|
|
"hot","top","new","controversial"
|
|
]
|
|
sortType = self.chooseFrom(sortTypes)
|
|
GLOBAL.arguments.sort = sortType
|
|
|
|
if sortType == "top":
|
|
print("\nselect time filter:")
|
|
timeFilters = [
|
|
"hour","day","week","month","year","all"
|
|
]
|
|
timeFilter = self.chooseFrom(timeFilters)
|
|
GLOBAL.arguments.time = timeFilter
|
|
else:
|
|
GLOBAL.arguments.time = "all"
|
|
|
|
elif programMode == "upvoted":
|
|
GLOBAL.arguments.upvoted = True
|
|
GLOBAL.arguments.user = input("\nredditor: ")
|
|
|
|
elif programMode == "saved":
|
|
GLOBAL.arguments.saved = True
|
|
|
|
elif programMode == "log":
|
|
while True:
|
|
GLOBAL.arguments.log = input("\nlog file directory:")
|
|
if Path(GLOBAL.arguments.log ).is_file():
|
|
break
|
|
|
|
while True:
|
|
try:
|
|
GLOBAL.arguments.limit = int(input("\nlimit: "))
|
|
break
|
|
except ValueError:
|
|
pass
|
|
|
|
def prepareAttributes():
|
|
ATTRIBUTES = {}
|
|
|
|
if GLOBAL.arguments.user is not None:
|
|
ATTRIBUTES["user"] = GLOBAL.arguments.user
|
|
|
|
if GLOBAL.arguments.search is not None:
|
|
ATTRIBUTES["search"] = GLOBAL.arguments.search
|
|
if GLOBAL.arguments.sort == "hot" or \
|
|
GLOBAL.arguments.sort == "controversial" or \
|
|
GLOBAL.arguments.sort == "rising":
|
|
GLOBAL.arguments.sort = "relevance"
|
|
|
|
if GLOBAL.arguments.sort is not None:
|
|
ATTRIBUTES["sort"] = GLOBAL.arguments.sort
|
|
else:
|
|
if GLOBAL.arguments.submitted:
|
|
ATTRIBUTES["sort"] = "new"
|
|
else:
|
|
ATTRIBUTES["sort"] = "hot"
|
|
|
|
if GLOBAL.arguments.time is not None:
|
|
ATTRIBUTES["time"] = GLOBAL.arguments.time
|
|
else:
|
|
ATTRIBUTES["time"] = "all"
|
|
|
|
if GLOBAL.arguments.link is not None:
|
|
|
|
GLOBAL.arguments.link = GLOBAL.arguments.link.strip("\"")
|
|
|
|
try:
|
|
ATTRIBUTES = LinkDesigner(GLOBAL.arguments.link)
|
|
except InvalidRedditLink:
|
|
raise InvalidRedditLink
|
|
|
|
if GLOBAL.arguments.search is not None:
|
|
ATTRIBUTES["search"] = GLOBAL.arguments.search
|
|
|
|
if GLOBAL.arguments.sort is not None:
|
|
ATTRIBUTES["sort"] = GLOBAL.arguments.sort
|
|
|
|
if GLOBAL.arguments.time is not None:
|
|
ATTRIBUTES["time"] = GLOBAL.arguments.time
|
|
|
|
elif GLOBAL.arguments.subreddit is not None:
|
|
if type(GLOBAL.arguments.subreddit) == list:
|
|
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit)
|
|
|
|
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
|
|
|
|
elif GLOBAL.arguments.saved is True:
|
|
ATTRIBUTES["saved"] = True
|
|
|
|
elif GLOBAL.arguments.upvoted is True:
|
|
ATTRIBUTES["upvoted"] = True
|
|
|
|
elif GLOBAL.arguments.submitted is not None:
|
|
ATTRIBUTES["submitted"] = True
|
|
|
|
if GLOBAL.arguments.sort == "rising":
|
|
raise InvalidSortingType
|
|
|
|
ATTRIBUTES["limit"] = GLOBAL.arguments.limit
|
|
|
|
return ATTRIBUTES
|
|
|
|
def postFromLog(fileName):
|
|
"""Analyze a log file and return a list of dictionaries containing
|
|
submissions
|
|
"""
|
|
if Path.is_file(Path(fileName)):
|
|
content = jsonFile(fileName).read()
|
|
else:
|
|
print("File not found")
|
|
quit()
|
|
|
|
try:
|
|
del content["HEADER"]
|
|
except KeyError:
|
|
pass
|
|
|
|
posts = []
|
|
|
|
for post in content:
|
|
if not content[post][-1]['postType'] == None:
|
|
posts.append(content[post][-1])
|
|
|
|
return posts
|
|
|
|
def postExists(POST):
|
|
"""Figure out a file's name and checks if the file already exists"""
|
|
|
|
title = nameCorrector(POST['postTitle'])
|
|
FILENAME = title + "_" + POST['postId']
|
|
PATH = GLOBAL.directory / POST["postSubreddit"]
|
|
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"]
|
|
|
|
for i in range(2):
|
|
for extension in possibleExtensions:
|
|
FILE_PATH = PATH / (FILENAME+extension)
|
|
if FILE_PATH.exists():
|
|
return True
|
|
else:
|
|
FILENAME = POST['postId']
|
|
else:
|
|
return False
|
|
|
|
def download(submissions):
|
|
"""Analyze list of submissions and call the right function
|
|
to download each one, catch errors, update the log files
|
|
"""
|
|
|
|
subsLenght = len(submissions)
|
|
lastRequestTime = 0
|
|
downloadedCount = subsLenght
|
|
duplicates = 0
|
|
BACKUP = {}
|
|
|
|
FAILED_FILE = createLogFile("FAILED")
|
|
|
|
for i in range(subsLenght):
|
|
print("\n({}/{})".format(i+1,subsLenght))
|
|
print(
|
|
"https://reddit.com/r/{subreddit}/comments/{id}".format(
|
|
subreddit=submissions[i]['postSubreddit'],
|
|
id=submissions[i]['postId']
|
|
)
|
|
)
|
|
|
|
if postExists(submissions[i]):
|
|
result = False
|
|
print(submissions[i]['postType'].upper())
|
|
print("It already exists")
|
|
duplicates += 1
|
|
downloadedCount -= 1
|
|
continue
|
|
|
|
directory = GLOBAL.directory / submissions[i]['postSubreddit']
|
|
|
|
if submissions[i]['postType'] == 'imgur':
|
|
print("IMGUR",end="")
|
|
|
|
while int(time.time() - lastRequestTime) <= 2:
|
|
pass
|
|
credit = Imgur.get_credits()
|
|
|
|
IMGUR_RESET_TIME = credit['UserReset']-time.time()
|
|
USER_RESET = ("after " \
|
|
+ str(int(IMGUR_RESET_TIME/60)) \
|
|
+ " Minutes " \
|
|
+ str(int(IMGUR_RESET_TIME%60)) \
|
|
+ " Seconds")
|
|
print(
|
|
" => Client: {} - User: {} - Reset {}".format(
|
|
credit['ClientRemaining'],
|
|
credit['UserRemaining'],
|
|
USER_RESET
|
|
)
|
|
)
|
|
|
|
if not (credit['UserRemaining'] == 0 or \
|
|
credit['ClientRemaining'] == 0):
|
|
|
|
"""This block of code is needed
|
|
"""
|
|
while int(time.time() - lastRequestTime) <= 2:
|
|
pass
|
|
lastRequestTime = time.time()
|
|
|
|
try:
|
|
Imgur(directory,submissions[i])
|
|
|
|
except FileAlreadyExistsError:
|
|
print("It already exists")
|
|
duplicates += 1
|
|
downloadedCount -= 1
|
|
|
|
except ImgurLoginError:
|
|
print(
|
|
"Imgur login failed. Quitting the program "\
|
|
"as unexpected errors might occur."
|
|
)
|
|
quit()
|
|
|
|
except Exception as exception:
|
|
print(exception)
|
|
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
downloadedCount -= 1
|
|
|
|
else:
|
|
if credit['UserRemaining'] == 0:
|
|
KEYWORD = "user"
|
|
elif credit['ClientRemaining'] == 0:
|
|
KEYWORD = "client"
|
|
|
|
print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
|
|
FAILED_FILE.add(
|
|
{int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
|
|
submissions[i]]}
|
|
)
|
|
downloadedCount -= 1
|
|
|
|
elif submissions[i]['postType'] == 'gfycat':
|
|
print("GFYCAT")
|
|
try:
|
|
Gfycat(directory,submissions[i])
|
|
|
|
except FileAlreadyExistsError:
|
|
print("It already exists")
|
|
duplicates += 1
|
|
downloadedCount -= 1
|
|
|
|
except NotADownloadableLinkError as exception:
|
|
print("Could not read the page source")
|
|
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
downloadedCount -= 1
|
|
|
|
except Exception as exception:
|
|
print(exception)
|
|
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
downloadedCount -= 1
|
|
|
|
elif submissions[i]['postType'] == 'direct':
|
|
print("DIRECT")
|
|
try:
|
|
Direct(directory,submissions[i])
|
|
|
|
except FileAlreadyExistsError:
|
|
print("It already exists")
|
|
downloadedCount -= 1
|
|
duplicates += 1
|
|
|
|
except Exception as exception:
|
|
print(exception)
|
|
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
downloadedCount -= 1
|
|
|
|
elif submissions[i]['postType'] == 'self':
|
|
print("SELF")
|
|
try:
|
|
Self(directory,submissions[i])
|
|
|
|
except FileAlreadyExistsError:
|
|
print("It already exists")
|
|
downloadedCount -= 1
|
|
duplicates += 1
|
|
|
|
except Exception as exception:
|
|
print(exception)
|
|
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
downloadedCount -= 1
|
|
|
|
else:
|
|
print("No match found, skipping...")
|
|
downloadedCount -= 1
|
|
|
|
if duplicates:
|
|
print("\n There was {} duplicates".format(duplicates))
|
|
|
|
if downloadedCount == 0:
|
|
print(" Nothing downloaded :(")
|
|
|
|
else:
|
|
print(" Total of {} links downloaded!".format(downloadedCount))
|
|
|
|
def main():
|
|
GLOBAL.arguments = parseArguments()
|
|
|
|
if GLOBAL.arguments.directory is not None:
|
|
GLOBAL.directory = Path(GLOBAL.arguments.directory)
|
|
else:
|
|
GLOBAL.directory = Path(input("download directory: "))
|
|
|
|
print("\n"," ".join(sys.argv),"\n")
|
|
|
|
try:
|
|
checkConflicts()
|
|
except ProgramModeError as err:
|
|
PromptUser()
|
|
except Exception as err:
|
|
print(err)
|
|
quit()
|
|
|
|
GLOBAL.config = getConfig("config.json")
|
|
|
|
|
|
if GLOBAL.arguments.log is not None:
|
|
logDir = Path(GLOBAL.arguments.log)
|
|
download(postFromLog(logDir))
|
|
quit()
|
|
|
|
try:
|
|
POSTS = getPosts(prepareAttributes())
|
|
except InsufficientPermission:
|
|
print("You do not have permission to do that")
|
|
quit()
|
|
except NoMatchingSubmissionFound:
|
|
print("No matching submission was found")
|
|
quit()
|
|
except NoRedditSupoort:
|
|
print("Reddit does not support that")
|
|
quit()
|
|
except NoPrawSupport:
|
|
print("PRAW does not support that")
|
|
quit()
|
|
except MultiredditNotFound:
|
|
print("Multireddit not found")
|
|
quit()
|
|
except InvalidSortingType:
|
|
print("Invalid sorting type has given")
|
|
quit()
|
|
except InvalidRedditLink:
|
|
print("Invalid reddit link")
|
|
quit()
|
|
|
|
if POSTS is None:
|
|
print("I could not find any posts in that URL")
|
|
quit()
|
|
|
|
if GLOBAL.arguments.NoDownload:
|
|
quit()
|
|
|
|
else:
|
|
download(POSTS)
|
|
|
|
if __name__ == "__main__":
|
|
|
|
log_stream = StringIO()
|
|
logging.basicConfig(stream=log_stream, level=logging.INFO)
|
|
|
|
try:
|
|
VanillaPrint = print
|
|
print = printToFile
|
|
GLOBAL.RUN_TIME = time.time()
|
|
main()
|
|
except KeyboardInterrupt:
|
|
if GLOBAL.directory is None:
|
|
GLOBAL.directory = Path(".\\")
|
|
print("\nQUITTING...")
|
|
quit()
|
|
except Exception as exception:
|
|
logging.error("Runtime error!", exc_info=full_exc_info(sys.exc_info()))
|
|
print(log_stream.getvalue())
|
|
|
|
input("Press enter to quit\n")
|