Refactor JSON file retrieval logic to filter links by .json extension and extract only file names for improved clarity and efficiency.

2025-07-29 16:56:19 +02:00
parent 1801ad52b0
commit 7a614fd824
2 changed files with 8 additions and 5 deletions
--- a/main.py
+++ b/main.py
@@ -17,11 +17,13 @@ def get_json_files_from_folder(folder):
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")
    files = []
-    for a in soup.find_all("a", class_="ui basic label"):
+    # Hledáme odkazy s .json v href
+    for a in soup.find_all("a", href=lambda x: x and x.endswith('.json')):
        href = a.get("href", "")
-        if href.endswith(".json"):
-            file_path = href.split("/resources/")[-1]
-            files.append(file_path)
+        if href.startswith("/gald/galdistream/src/branch/main/resources/"):
+            # Extrahujeme pouze název souboru
+            file_name = href.split("/")[-1]
+            files.append(file_name)
    return files

 def update_json_db():
@@ -30,7 +32,8 @@ def update_json_db():
    all_files = []
    for folder in folders:
        try:
-            all_files += [f"{folder}/{file}" for file in get_json_files_from_folder(folder)]
+            files = get_json_files_from_folder(folder)
+            all_files += [f"{folder}/{file}" for file in files]
        except Exception as e:
            print(f"Chyba při získávání souborů ze složky {folder}: {e}")
    for file in all_files: