Replace requests with urllib for HTTP requests in JSON file retrieval, enhancing compatibility and removing the test_regex.py file as it is no longer needed.

2025-07-29 19:18:09 +02:00
parent da4b380b1e
commit c92f583609
2 changed files with 17 additions and 56 deletions
--- a/main.py
+++ b/main.py
@@ -6,19 +6,25 @@

 import sys
 import galdPl
-import requests
+import urllib.request
+import urllib.parse
 import re
 import os

 def get_json_files_from_folder(folder):
    base_url = "https://git.gald.site/gald/galdistream/src/branch/main/resources/"
    url = base_url + folder
-    r = requests.get(url, timeout=10)
-    r.raise_for_status()
+    
+    # Použijeme urllib místo requests
+    req = urllib.request.Request(url)
+    req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
+    
+    with urllib.request.urlopen(req, timeout=10) as response:
+        html_content = response.read().decode('utf-8')
    
    # Hledáme JSON soubory pomocí regex
    json_pattern = r'href="(/gald/galdistream/src/branch/main/resources/[^"]*\.json)"'
-    matches = re.findall(json_pattern, r.text)
+    matches = re.findall(json_pattern, html_content)
    
    files = []
    for match in matches:
@@ -41,11 +47,15 @@ def update_json_db():
        url = base_url_raw + file
        local_path = "resources/" + file
        try:
-            r = requests.get(url, timeout=10)
-            r.raise_for_status()
+            req = urllib.request.Request(url)
+            req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
+            
+            with urllib.request.urlopen(req, timeout=10) as response:
+                content = response.read()
+            
            os.makedirs(os.path.dirname(local_path), exist_ok=True)
            with open(local_path, "wb") as f:
-                f.write(r.content)
+                f.write(content)
        except Exception as e:
            print(f"Chyba při stahování {file}: {e}")

--- a/test_regex.py
+++ b/test_regex.py
@@ -1,49 +0,0 @@
-import requests
-import re
-import os
-
-def get_json_files_from_folder(folder):
-    base_url = "https://git.gald.site/gald/galdistream/src/branch/main/resources/"
-    url = base_url + folder
-    r = requests.get(url, timeout=10)
-    r.raise_for_status()
-    
-    # Hledáme JSON soubory pomocí regex
-    json_pattern = r'href="(/gald/galdistream/src/branch/main/resources/[^"]*\.json)"'
-    matches = re.findall(json_pattern, r.text)
-    
-    files = []
-    for match in matches:
-        # Extrahujeme pouze název souboru
-        file_name = match.split("/")[-1]
-        files.append(file_name)
-    return files
-
-def update_json_db():
-    base_url_raw = "https://git.gald.site/gald/galdistream/raw/branch/main/resources/"
-    folders = ["movies", "series"]
-    all_files = []
-    for folder in folders:
-        try:
-            files = get_json_files_from_folder(folder)
-            print(f"Nalezené soubory v {folder}: {files}")
-            all_files += [f"{folder}/{file}" for file in files]
-        except Exception as e:
-            print(f"Chyba při získávání souborů ze složky {folder}: {e}")
-    
-    print(f"Celkem souborů ke stažení: {len(all_files)}")
-    for file in all_files:
-        url = base_url_raw + file
-        local_path = "resources/" + file
-        try:
-            r = requests.get(url, timeout=10)
-            r.raise_for_status()
-            os.makedirs(os.path.dirname(local_path), exist_ok=True)
-            with open(local_path, "wb") as f:
-                f.write(r.content)
-            print(f"Staženo: {file}")
-        except Exception as e:
-            print(f"Chyba při stahování {file}: {e}")
-
-if __name__ == '__main__':
-    update_json_db()