diff --git a/main.py b/main.py index 132d417..d52c38b 100644 --- a/main.py +++ b/main.py @@ -6,19 +6,25 @@ import sys import galdPl -import requests +import urllib.request +import urllib.parse import re import os def get_json_files_from_folder(folder): base_url = "https://git.gald.site/gald/galdistream/src/branch/main/resources/" url = base_url + folder - r = requests.get(url, timeout=10) - r.raise_for_status() + + # Použijeme urllib místo requests + req = urllib.request.Request(url) + req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36') + + with urllib.request.urlopen(req, timeout=10) as response: + html_content = response.read().decode('utf-8') # Hledáme JSON soubory pomocí regex json_pattern = r'href="(/gald/galdistream/src/branch/main/resources/[^"]*\.json)"' - matches = re.findall(json_pattern, r.text) + matches = re.findall(json_pattern, html_content) files = [] for match in matches: @@ -41,11 +47,15 @@ def update_json_db(): url = base_url_raw + file local_path = "resources/" + file try: - r = requests.get(url, timeout=10) - r.raise_for_status() + req = urllib.request.Request(url) + req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36') + + with urllib.request.urlopen(req, timeout=10) as response: + content = response.read() + os.makedirs(os.path.dirname(local_path), exist_ok=True) with open(local_path, "wb") as f: - f.write(r.content) + f.write(content) except Exception as e: print(f"Chyba při stahování {file}: {e}") diff --git a/test_regex.py b/test_regex.py deleted file mode 100644 index b73929a..0000000 --- a/test_regex.py +++ /dev/null @@ -1,49 +0,0 @@ -import requests -import re -import os - -def get_json_files_from_folder(folder): - base_url = "https://git.gald.site/gald/galdistream/src/branch/main/resources/" - url = base_url + folder - r = requests.get(url, timeout=10) - r.raise_for_status() - - # Hledáme JSON soubory pomocí regex - json_pattern = r'href="(/gald/galdistream/src/branch/main/resources/[^"]*\.json)"' - matches = re.findall(json_pattern, r.text) - - files = [] - for match in matches: - # Extrahujeme pouze název souboru - file_name = match.split("/")[-1] - files.append(file_name) - return files - -def update_json_db(): - base_url_raw = "https://git.gald.site/gald/galdistream/raw/branch/main/resources/" - folders = ["movies", "series"] - all_files = [] - for folder in folders: - try: - files = get_json_files_from_folder(folder) - print(f"Nalezené soubory v {folder}: {files}") - all_files += [f"{folder}/{file}" for file in files] - except Exception as e: - print(f"Chyba při získávání souborů ze složky {folder}: {e}") - - print(f"Celkem souborů ke stažení: {len(all_files)}") - for file in all_files: - url = base_url_raw + file - local_path = "resources/" + file - try: - r = requests.get(url, timeout=10) - r.raise_for_status() - os.makedirs(os.path.dirname(local_path), exist_ok=True) - with open(local_path, "wb") as f: - f.write(r.content) - print(f"Staženo: {file}") - except Exception as e: - print(f"Chyba při stahování {file}: {e}") - -if __name__ == '__main__': - update_json_db() \ No newline at end of file