Replace requests with urllib for HTTP requests in JSON file retrieval, enhancing compatibility and removing the test_regex.py file as it is no longer needed.
This commit is contained in:
24
main.py
24
main.py
@@ -6,19 +6,25 @@
|
||||
|
||||
import sys
|
||||
import galdPl
|
||||
import requests
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import re
|
||||
import os
|
||||
|
||||
def get_json_files_from_folder(folder):
|
||||
base_url = "https://git.gald.site/gald/galdistream/src/branch/main/resources/"
|
||||
url = base_url + folder
|
||||
r = requests.get(url, timeout=10)
|
||||
r.raise_for_status()
|
||||
|
||||
# Použijeme urllib místo requests
|
||||
req = urllib.request.Request(url)
|
||||
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
|
||||
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
html_content = response.read().decode('utf-8')
|
||||
|
||||
# Hledáme JSON soubory pomocí regex
|
||||
json_pattern = r'href="(/gald/galdistream/src/branch/main/resources/[^"]*\.json)"'
|
||||
matches = re.findall(json_pattern, r.text)
|
||||
matches = re.findall(json_pattern, html_content)
|
||||
|
||||
files = []
|
||||
for match in matches:
|
||||
@@ -41,11 +47,15 @@ def update_json_db():
|
||||
url = base_url_raw + file
|
||||
local_path = "resources/" + file
|
||||
try:
|
||||
r = requests.get(url, timeout=10)
|
||||
r.raise_for_status()
|
||||
req = urllib.request.Request(url)
|
||||
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
|
||||
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
content = response.read()
|
||||
|
||||
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
||||
with open(local_path, "wb") as f:
|
||||
f.write(r.content)
|
||||
f.write(content)
|
||||
except Exception as e:
|
||||
print(f"Chyba při stahování {file}: {e}")
|
||||
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
import requests
|
||||
import re
|
||||
import os
|
||||
|
||||
def get_json_files_from_folder(folder):
|
||||
base_url = "https://git.gald.site/gald/galdistream/src/branch/main/resources/"
|
||||
url = base_url + folder
|
||||
r = requests.get(url, timeout=10)
|
||||
r.raise_for_status()
|
||||
|
||||
# Hledáme JSON soubory pomocí regex
|
||||
json_pattern = r'href="(/gald/galdistream/src/branch/main/resources/[^"]*\.json)"'
|
||||
matches = re.findall(json_pattern, r.text)
|
||||
|
||||
files = []
|
||||
for match in matches:
|
||||
# Extrahujeme pouze název souboru
|
||||
file_name = match.split("/")[-1]
|
||||
files.append(file_name)
|
||||
return files
|
||||
|
||||
def update_json_db():
|
||||
base_url_raw = "https://git.gald.site/gald/galdistream/raw/branch/main/resources/"
|
||||
folders = ["movies", "series"]
|
||||
all_files = []
|
||||
for folder in folders:
|
||||
try:
|
||||
files = get_json_files_from_folder(folder)
|
||||
print(f"Nalezené soubory v {folder}: {files}")
|
||||
all_files += [f"{folder}/{file}" for file in files]
|
||||
except Exception as e:
|
||||
print(f"Chyba při získávání souborů ze složky {folder}: {e}")
|
||||
|
||||
print(f"Celkem souborů ke stažení: {len(all_files)}")
|
||||
for file in all_files:
|
||||
url = base_url_raw + file
|
||||
local_path = "resources/" + file
|
||||
try:
|
||||
r = requests.get(url, timeout=10)
|
||||
r.raise_for_status()
|
||||
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
||||
with open(local_path, "wb") as f:
|
||||
f.write(r.content)
|
||||
print(f"Staženo: {file}")
|
||||
except Exception as e:
|
||||
print(f"Chyba při stahování {file}: {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
update_json_db()
|
||||
Reference in New Issue
Block a user