commit d8165cd91c619d3be97183ea904de91aaa2eee74 Author: Robert Koch Date: Tue May 20 14:05:28 2025 +0200 bookReaderStitcher added diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5ff6309 --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### IntelliJ IDEA ### +.idea/modules.xml +.idea/jarRepositories.xml +.idea/compiler.xml +.idea/libraries/ +*.iws +*.iml +*.ipr + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml new file mode 100644 index 0000000..4b5bd44 --- /dev/null +++ b/.idea/dataSources.xml @@ -0,0 +1,15 @@ + + + + + sqlite.xerial + true + org.sqlite.JDBC + jdbc:sqlite:$PROJECT_DIR$/archion.db + + + + $ProjectFileDir$ + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..aa00ffa --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..e122dea --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,14 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/archion.db b/archion.db new file mode 100644 index 0000000..2fa40a1 Binary files /dev/null and b/archion.db differ diff --git a/archive.csv b/archive.csv new file mode 100644 index 0000000..31df30c --- /dev/null +++ b/archive.csv @@ -0,0 +1,26 @@ +Baden-Württemberg;Landeskirchliches Archiv Karlsruhe;https://www.archion.de/de/alle-archive/baden-wuerttemberg/landeskirchliches-archiv-karlsruhe +Baden-Württemberg;Landeskirchliches Archiv Stuttgart;https://www.archion.de/de/alle-archive/baden-wuerttemberg/landeskirchliches-archiv-stuttgart +Bayern;Landeskirchliches Archiv der Evangelisch-Lutherischen Kirche in Bayern;https://www.archion.de/de/alle-archive/bayern/landeskirchliches-archiv-der-evangelisch-lutherischen-kirche-in-bayern +Berlin / Brandenburg;Evangelisches Zentralarchiv in Berlin;https://www.archion.de/de/alle-archive/berlin-/-brandenburg/evangelisches-zentralarchiv-in-berlin +Berlin / Brandenburg;Landeskirchliches Archiv der Evangelischen Kirche Berlin-Brandenburg-schlesische Oberlausitz;https://www.archion.de/de/alle-archive/berlin-/-brandenburg/landeskirchliches-archiv-der-evangelischen-kirche-berlin-brandenburg-schlesische-oberlausitz +Hessen;Landeskirchliches Archiv der Evangelischen Kirche von Kurhessen-Waldeck;https://www.archion.de/de/alle-archive/hessen/landeskirchliches-archiv-der-evangelischen-kirche-von-kurhessen-waldeck +Hessen;Zentralarchiv der Evangelischen Kirche in Hessen und Nassau;https://www.archion.de/de/alle-archive/hessen/zentralarchiv-der-evangelischen-kirche-in-hessen-und-nassau +Niedersachsen;Archiv der Evangelisch-Lutherischen Landeskirche Oldenburg;https://www.archion.de/de/alle-archive/niedersachsen/archiv-der-evangelisch-lutherischen-landeskirche-oldenburg +Niedersachsen;Bistumsarchiv Hildesheim;https://www.archion.de/de/alle-archive/niedersachsen/bistumsarchiv-hildesheim +Niedersachsen;Evangelisch-reformierte Kirchengemeinde Hannover;https://www.archion.de/de/alle-archive/niedersachsen/evangelisch-reformierte-kirchengemeinde-hannover +Niedersachsen;Landeskirchliches Archiv der Evangelisch-Lutherischen Landeskirche in Braunschweig;https://www.archion.de/de/alle-archive/niedersachsen/landeskirchliches-archiv-der-evangelisch-lutherischen-landeskirche-in-braunschweig +Niedersachsen;Landeskirchliches Archiv der Evangelisch-lutherischen Landeskirche Hannovers;https://www.archion.de/de/alle-archive/niedersachsen/landeskirchliches-archiv-der-evangelisch-lutherischen-landeskirche-hannovers +Niedersachsen;Niedersächsisches Landesarchiv;https://www.archion.de/de/alle-archive/niedersachsen/niedersaechsisches-landesarchiv +Nordrhein-Westfalen;Archiv der Evangelischen Kirche im Rheinland;https://www.archion.de/de/alle-archive/nordrhein-westfalen/archiv-der-evangelischen-kirche-im-rheinland +Nordrhein-Westfalen;Archiv der Lippischen Landeskirche;https://www.archion.de/de/alle-archive/nordrhein-westfalen/archiv-der-lippischen-landeskirche +Nordrhein-Westfalen;Landeskirchliches Archiv der Evangelischen Kirche von Westfalen;https://www.archion.de/de/alle-archive/nordrhein-westfalen/landeskirchliches-archiv-der-evangelischen-kirche-von-westfalen +Rheinland-Pfalz;Archiv der Mennonitischen Forschungsstelle;https://www.archion.de/de/alle-archive/rheinland-pfalz/archiv-der-mennonitischen-forschungsstelle +Rheinland-Pfalz;Bistumsarchiv Speyer;https://www.archion.de/de/alle-archive/rheinland-pfalz/bistumsarchiv-speyer +Rheinland-Pfalz;Landesarchiv Speyer;https://www.archion.de/de/alle-archive/rheinland-pfalz/landesarchiv-speyer +Rheinland-Pfalz;Landeshauptarchiv Koblenz;https://www.archion.de/de/alle-archive/rheinland-pfalz/landeshauptarchiv-koblenz +Rheinland-Pfalz;Zentralarchiv der Evangelischen Kirche der Pfalz;https://www.archion.de/de/alle-archive/rheinland-pfalz/zentralarchiv-der-evangelischen-kirche-der-pfalz +Sachsen;Landeskirchliches Archiv der Evangelisch-Lutherischen Landeskirche Sachsens;https://www.archion.de/de/alle-archive/sachsen/landeskirchliches-archiv-der-evangelisch-lutherischen-landeskirche-sachsens +Sachsen-Anhalt;Archiv der Evangelischen Landeskirche Anhalts;https://www.archion.de/de/alle-archive/sachsen-anhalt/archiv-der-evangelischen-landeskirche-anhalts +Sachsen-Anhalt;Landeskirchenarchiv der Evangelischen Kirche Mitteldeutschland/Magdeburg;https://www.archion.de/de/alle-archive/sachsen-anhalt/landeskirchenarchiv-der-evangelischen-kirche-mitteldeutschland/magdeburg +Schleswig-Holstein;Landeskirchliches Archiv der Evangelisch-Lutherischen Kirche in Norddeutschland;https://www.archion.de/de/alle-archive/schleswig-holstein/landeskirchliches-archiv-der-evangelisch-lutherischen-kirche-in-norddeutschland +Thüringen;Landeskirchenarchiv der Evangelischen Kirche Mitteldeutschland/Eisenach;https://www.archion.de/de/alle-archive/thueringen/landeskirchenarchiv-der-evangelischen-kirche-mitteldeutschland/eisenach diff --git a/https:/www.archion.de/de/viewer/churchRegister/287339?cHash=ca3bf31106a5081448b44947b5d5bd95/seite-001.png b/https:/www.archion.de/de/viewer/churchRegister/287339?cHash=ca3bf31106a5081448b44947b5d5bd95/seite-001.png new file mode 100644 index 0000000..cbadc73 Binary files /dev/null and b/https:/www.archion.de/de/viewer/churchRegister/287339?cHash=ca3bf31106a5081448b44947b5d5bd95/seite-001.png differ diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..14cfb80 --- /dev/null +++ b/pom.xml @@ -0,0 +1,46 @@ + + + 4.0.0 + + de.roko.genalogy.downloader + arch + 1.0-SNAPSHOT + + + 23 + 23 + UTF-8 + + + + + org.seleniumhq.selenium + selenium-java + 4.21.0 + + + io.github.bonigarcia + webdrivermanager + 5.8.0 + + + org.jsoup + jsoup + 1.17.2 + + + org.xerial + sqlite-jdbc + 3.45.1.0 + + + org.junit.jupiter + junit-jupiter + 5.10.0 + test + + + + \ No newline at end of file diff --git a/src/main/java/de/roko/genalogy/downloader/archion/ArchionArchiveToCSV.java b/src/main/java/de/roko/genalogy/downloader/archion/ArchionArchiveToCSV.java new file mode 100644 index 0000000..1ab6054 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/archion/ArchionArchiveToCSV.java @@ -0,0 +1,93 @@ +package de.roko.genalogy.downloader.archion; + +import de.roko.genalogy.downloader.archiv.Archiv; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.*; + +public class ArchionArchiveToCSV { + + + private static final String CSV_FILE = "archive.csv"; + + + public static void writeArchionArchiveToCSV(Elements stateDivs) { + Map archiveMap = readCsvAsMap(CSV_FILE); + + int updated = 0; + int added = 0; + + for (Element stateDiv : stateDivs) { + String bundesland = stateDiv.selectFirst("a.h6.text-muted").text().trim(); + Elements archiveLinks = stateDiv.select("ul > li > a"); + + for (Element link : archiveLinks) { + String archivname = link.text().trim(); + String href = "https://www.archion.de" + link.attr("href").trim(); + + String key = bundesland + ";" + archivname; + if (archiveMap.containsKey(key)) { + Archiv existing = archiveMap.get(key); + if (!existing.link.equals(href)) { + existing.link = href; // Link aktualisieren + updated++; + } + } else { + archiveMap.put(key, new Archiv(bundesland, archivname, href)); + added++; + } + } + } + + // Sortieren nach Bundesland > Archiv + List sorted = new ArrayList<>(archiveMap.values()); + sorted.sort(Comparator.comparing((Archiv e) -> e.bundesland) + .thenComparing(e -> e.archivname)); + + // Schreiben + try (BufferedWriter writer = Files.newBufferedWriter(Paths.get(CSV_FILE))) { + for (Archiv entry : sorted) { + writer.write(entry.toCsvLine()); + writer.newLine(); + } + }catch (IOException e) { + e.printStackTrace(); + } + + System.out.printf("✅ Fertig. %d neue, %d aktualisierte Einträge. Gesamt: %d\n", added, updated, sorted.size()); + } + + + + + + private static Map readCsvAsMap(String filePath) { + Map map = new HashMap<>(); + Path path = Paths.get(filePath); + + if (!Files.exists(path)) return map; + + try (BufferedReader reader = Files.newBufferedReader(path)) { + String line; + while ((line = reader.readLine()) != null) { + String[] parts = line.trim().split(";", 3); + if (parts.length == 3) { + String key = parts[0] + ";" + parts[1]; + map.put(key, new Archiv(parts[0], parts[1], parts[2])); + } + } + } catch (IOException e) { + System.err.println("⚠️ Fehler beim Lesen von " + filePath + ": " + e.getMessage()); + } + + return map; + } + +} diff --git a/src/main/java/de/roko/genalogy/downloader/archion/ArchionLoginHelper.java b/src/main/java/de/roko/genalogy/downloader/archion/ArchionLoginHelper.java new file mode 100644 index 0000000..7f8af18 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/archion/ArchionLoginHelper.java @@ -0,0 +1,83 @@ +package de.roko.genalogy.downloader.archion; + +import org.openqa.selenium.By; +import org.openqa.selenium.TimeoutException; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.WebElement; +import org.openqa.selenium.support.ui.ExpectedConditions; +import org.openqa.selenium.support.ui.WebDriverWait; + +import java.time.Duration; + +public class ArchionLoginHelper { + + + private final WebDriver driver; + private final WebDriverWait wait; + private boolean loggedIn; + + public ArchionLoginHelper(WebDriver driver) { + this.driver = driver; + this.wait = new WebDriverWait(driver, Duration.ofSeconds(10)); + } + + public void login(String username, String password) { + + // Seite aufrufen + driver.get("https://www.archion.de/de/"); + + + // "Anmelden"-Link per Attribut (title="Anmelden") finden + WebElement loginLink = wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector("a[title='Anmelden']"))); + loginLink.click(); + + // Eingabefelder finden und ausfüllen + WebElement emailField = wait.until(ExpectedConditions.visibilityOfElementLocated(By.name("user"))); + WebElement passwordField = wait.until(ExpectedConditions.visibilityOfElementLocated(By.name("pass"))); + + emailField.sendKeys(username); + passwordField.sendKeys(password); + + // Login-Button klicken + WebElement submitButton = wait.until( + ExpectedConditions.elementToBeClickable(By.name("submit")) + ); + // Klick auf den Button "Anmelden" + submitButton.click(); + + System.out.println("Login abgeschlossen. Aktuelle URL: " + driver.getCurrentUrl()); + + } + + public boolean isLoggedIn() { + try { + // Eventuell Dropdown öffnen, wenn nötig: + WebElement kontoDropdownToggle = wait.until(ExpectedConditions.elementToBeClickable( + By.cssSelector("a.nav-link.dropdown-toggle[href='#'], a.nav-link.dropdown-toggle.show"))); + kontoDropdownToggle.click(); + + // Warte auf Eintrag "Konto-Übersicht" + WebElement kontoUebersicht = wait.until(ExpectedConditions.visibilityOfElementLocated( + By.cssSelector("a[href*='/konto-uebersicht']"))); + + System.out.println("✅ Menüeintrag 'Konto-Übersicht' gefunden."); + loggedIn = kontoUebersicht.isDisplayed(); + return kontoUebersicht.isDisplayed(); + } catch (TimeoutException e) { + System.out.println("❌ Menüeintrag 'Konto-Übersicht' nicht sichtbar."); + loggedIn = false; + return false; + } + } + + public void openAlleArchive() { + WebElement alleArchiveLink = wait.until( + ExpectedConditions.elementToBeClickable(By.linkText("Alle Archive")) + ); + alleArchiveLink.click(); + + wait.until(ExpectedConditions.urlContains("/de/alle-archive")); + System.out.println("✅ Seite 'Alle Archive' geöffnet."); + } + +} diff --git a/src/main/java/de/roko/genalogy/downloader/archiv/Archiv.java b/src/main/java/de/roko/genalogy/downloader/archiv/Archiv.java new file mode 100644 index 0000000..df6fc85 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/archiv/Archiv.java @@ -0,0 +1,20 @@ +package de.roko.genalogy.downloader.archiv; + +public class Archiv { + + public String bundesland; + public String archivname; + public String link; + + public Archiv(String bundesland, String archivname, String link) { + this.bundesland = bundesland; + this.archivname = archivname; + this.link = link; + } + + + public String toCsvLine() { + return String.join(";", bundesland, archivname, link); + } + +} diff --git a/src/main/java/de/roko/genalogy/downloader/archiv/Kirchenkreis.java b/src/main/java/de/roko/genalogy/downloader/archiv/Kirchenkreis.java new file mode 100644 index 0000000..5358322 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/archiv/Kirchenkreis.java @@ -0,0 +1,4 @@ +package de.roko.genalogy.downloader.archiv; + +public class Kirchenkreis { +} diff --git a/src/main/java/de/roko/genalogy/downloader/archiv/Ort.java b/src/main/java/de/roko/genalogy/downloader/archiv/Ort.java new file mode 100644 index 0000000..4ec8c75 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/archiv/Ort.java @@ -0,0 +1,16 @@ +package de.roko.genalogy.downloader.archiv; + +import de.roko.genalogy.downloader.archiv.buch.Buch; + +import java.util.List; + +public class Ort { + private String ortsname; + private List buecher; + + public Ort(String ortsname, List buecher) { + this.ortsname = ortsname; + this.buecher = buecher; + } + +} diff --git a/src/main/java/de/roko/genalogy/downloader/archiv/buch/Bild.java b/src/main/java/de/roko/genalogy/downloader/archiv/buch/Bild.java new file mode 100644 index 0000000..6885494 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/archiv/buch/Bild.java @@ -0,0 +1,16 @@ +package de.roko.genalogy.downloader.archiv.buch; + +public class Bild { + + private String urllink; + private String pathlocal; + + public void Bild(String urllink) { + this.urllink = urllink; + } + + private void downloadPic() { + + } + +} diff --git a/src/main/java/de/roko/genalogy/downloader/archiv/buch/Buch.java b/src/main/java/de/roko/genalogy/downloader/archiv/buch/Buch.java new file mode 100644 index 0000000..6f320e5 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/archiv/buch/Buch.java @@ -0,0 +1,9 @@ +package de.roko.genalogy.downloader.archiv.buch; + +import java.util.List; + +public class Buch { + + private List seiten; + +} diff --git a/src/main/java/de/roko/genalogy/downloader/archiv/buch/Seite.java b/src/main/java/de/roko/genalogy/downloader/archiv/buch/Seite.java new file mode 100644 index 0000000..3385f52 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/archiv/buch/Seite.java @@ -0,0 +1,11 @@ +package de.roko.genalogy.downloader.archiv.buch; + +import java.util.List; + +public class Seite { + + public int seitennummer; + private List bilder; + private Bild bildgesamt; + +} diff --git a/src/main/java/de/roko/genalogy/downloader/database/ArchionDatabaseSetup.java b/src/main/java/de/roko/genalogy/downloader/database/ArchionDatabaseSetup.java new file mode 100644 index 0000000..8f1500e --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/database/ArchionDatabaseSetup.java @@ -0,0 +1,83 @@ +package de.roko.genalogy.downloader.database; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.Statement; + +public class ArchionDatabaseSetup { + + private static final String DB_FILE = "archion.db"; + + public static void main(String[] args) { + try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB_FILE); + Statement stmt = conn.createStatement()) { + + stmt.execute(""" + CREATE TABLE IF NOT EXISTS archive ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + bundesland TEXT NOT NULL, + name TEXT NOT NULL, + link TEXT NOT NULL, + UNIQUE(bundesland, name) + ); + """); + + stmt.execute(""" + CREATE TABLE IF NOT EXISTS kreis ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + archiv_id INTEGER NOT NULL, + name TEXT NOT NULL, + link TEXT NOT NULL, + UNIQUE(archiv_id, name), + FOREIGN KEY (archiv_id) REFERENCES archive(id) + ); + """); + + stmt.execute(""" + CREATE TABLE IF NOT EXISTS ort ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + kreis_id INTEGER, -- NULL, wenn Ort direkt unter Archiv hängt + archiv_id INTEGER NOT NULL, + name TEXT NOT NULL, + link TEXT NOT NULL, + UNIQUE(kreis_id, archiv_id, name) -- ← Diese Kombination muss zum ON CONFLICT passen + ); + """); + + stmt.execute(""" + CREATE TABLE IF NOT EXISTS buch ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ort_id INTEGER NOT NULL, + titel TEXT NOT NULL, + zeitraum TEXT, + FOREIGN KEY (ort_id) REFERENCES ort(id) + ); + """); + + stmt.execute(""" + CREATE TABLE IF NOT EXISTS seite ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + buch_id INTEGER NOT NULL, + seiten_nr TEXT NOT NULL, + FOREIGN KEY (buch_id) REFERENCES buch(id) + ); + """); + + stmt.execute(""" + CREATE TABLE IF NOT EXISTS bild ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + seite_id INTEGER NOT NULL, + url TEXT NOT NULL, + ist_gesamtansicht BOOLEAN DEFAULT FALSE, + FOREIGN KEY (seite_id) REFERENCES seite(id) + ); + """); + + System.out.println("✅ Tabellen erfolgreich erstellt in: " + DB_FILE); + + } catch (Exception e) { + System.err.println("❌ Fehler beim Erstellen der Tabellen: " + e.getMessage()); + e.printStackTrace(); + } + } +} diff --git a/src/main/java/de/roko/genalogy/downloader/database/ArchivStrukturParser.java b/src/main/java/de/roko/genalogy/downloader/database/ArchivStrukturParser.java new file mode 100644 index 0000000..1dce677 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/database/ArchivStrukturParser.java @@ -0,0 +1,103 @@ +package de.roko.genalogy.downloader.database; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.sql.*; +import java.util.ArrayList; +import java.util.List; + +public class ArchivStrukturParser { + + private static final String DB = "archion.db"; + + public void parse() throws Exception { + try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) { + + String sql = "SELECT id, name, link FROM archive ORDER BY id"; + try (PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet rs = stmt.executeQuery()) { + + while (rs.next()) { + int archivId = rs.getInt("id"); + String archivName = rs.getString("name"); + String archivLink = rs.getString("link"); + + System.out.println("\n🔍 Verarbeite Archiv: " + archivName); + processArchiv(conn, archivId, archivLink); + } + } + } + } + + private static void processArchiv(Connection conn, int archivId, String link) { + try { + Document doc = Jsoup.connect(link).get(); + Elements items = doc.select("#archive-nav li.item a"); + + List kreise = new ArrayList<>(); + List orte = new ArrayList<>(); + + for (Element a : items) { + String name = a.text().trim(); + String href = a.absUrl("href").trim(); + + if (name.toLowerCase().contains("kirchenkreis") || name.toLowerCase().contains("dekanat")) { + kreise.add(new Kirchenkreis(archivId, name, href)); + } else { + orte.add(new Ort(null, archivId, name, href)); + } + } + + saveKreise(kreise, conn); + saveOrteDirekt(orte, conn); + System.out.println("→ " + kreise.size() + " Kirchenkreise und " + orte.size() + " direkte Orte gespeichert."); + + } catch (Exception e) { + System.err.println("⚠️ Fehler bei Archiv-Link " + link + ": " + e.getMessage()); + } + } + + private static void saveKreise(List list, Connection conn) throws SQLException { + String sql = """ + INSERT INTO kreis (archiv_id, name, link) + VALUES (?, ?, ?) + ON CONFLICT(archiv_id, name) + DO UPDATE SET link = excluded.link; + """; + + try (PreparedStatement stmt = conn.prepareStatement(sql)) { + for (Kirchenkreis k : list) { + stmt.setInt(1, k.archivId()); + stmt.setString(2, k.name()); + stmt.setString(3, k.link()); + stmt.addBatch(); + } + stmt.executeBatch(); + } + } + + private static void saveOrteDirekt(List list, Connection conn) throws SQLException { + String sql = """ + INSERT INTO ort (kreis_id, archiv_id, name, link) + VALUES (NULL, ?, ?, ?) + ON CONFLICT(archiv_id, name) + DO UPDATE SET link = excluded.link; + """; + + try (PreparedStatement stmt = conn.prepareStatement(sql)) { + for (Ort o : list) { + stmt.setInt(1, o.archivId()); + stmt.setString(2, o.name()); + stmt.setString(3, o.link()); + stmt.addBatch(); + } + stmt.executeBatch(); + } + } + + public record Kirchenkreis(int archivId, String name, String link) {} + public record Ort(Integer kreisId, Integer archivId, String name, String link) {} +} diff --git a/src/main/java/de/roko/genalogy/downloader/database/ArchiveInserter.java b/src/main/java/de/roko/genalogy/downloader/database/ArchiveInserter.java new file mode 100644 index 0000000..57ad0b9 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/database/ArchiveInserter.java @@ -0,0 +1,102 @@ +package de.roko.genalogy.downloader.database; + +import io.github.bonigarcia.wdm.WebDriverManager; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.chrome.ChromeDriver; + +import java.sql.*; +import java.util.Objects; + +public class ArchiveInserter { + + private static final String DB_FILE = "archion.db"; + + public static void main(String[] args) throws Exception { + new ArchiveInserter().run(); + } + + public void run() throws Exception { + WebDriverManager.chromedriver().setup(); + WebDriver driver = new ChromeDriver(); + run(driver); + } + + public void run(WebDriver driver) throws Exception { + + driver.get("https://www.archion.de/de/alle-archive"); + + String html = driver.getPageSource(); + driver.quit(); + + Document doc = Jsoup.parse(html); + Elements stateDivs = doc.select("div[id^=state]"); + + try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB_FILE)) { + int added = 0, updated = 0; + + for (Element stateDiv : stateDivs) { + String bundesland = stateDiv.selectFirst("a.h6.text-muted").text().trim(); + Elements archiveLinks = stateDiv.select("ul > li > a"); + + for (Element link : archiveLinks) { + String archivname = link.text().trim(); + String url = "https://www.archion.de" + link.attr("href").trim(); + + Integer existingId = findArchiveId(conn, bundesland, archivname); + if (existingId == null) { + insertArchive(conn, bundesland, archivname, url); + added++; + } else { + if (updateArchiveIfLinkChanged(conn, existingId, url)) { + updated++; + } + } + } + } + + System.out.printf("✅ Archive verarbeitet. Neu: %d, aktualisiert: %d%n", added, updated); + } + } + + private Integer findArchiveId(Connection conn, String bundesland, String name) throws SQLException { + String sql = "SELECT id FROM archive WHERE bundesland = ? AND name = ?"; + try (PreparedStatement stmt = conn.prepareStatement(sql)) { + stmt.setString(1, bundesland); + stmt.setString(2, name); + ResultSet rs = stmt.executeQuery(); + return rs.next() ? rs.getInt("id") : null; + } + } + + private void insertArchive(Connection conn, String bundesland, String name, String link) throws SQLException { + String sql = "INSERT INTO archive (bundesland, name, link) VALUES (?, ?, ?)"; + try (PreparedStatement stmt = conn.prepareStatement(sql)) { + stmt.setString(1, bundesland); + stmt.setString(2, name); + stmt.setString(3, link); + stmt.executeUpdate(); + } + } + + private boolean updateArchiveIfLinkChanged(Connection conn, int id, String newLink) throws SQLException { + String sql = "SELECT link FROM archive WHERE id = ?"; + try (PreparedStatement stmt = conn.prepareStatement(sql)) { + stmt.setInt(1, id); + ResultSet rs = stmt.executeQuery(); + if (rs.next() && !Objects.equals(rs.getString("link"), newLink)) { + try (PreparedStatement updateStmt = conn.prepareStatement("UPDATE archive SET link = ? WHERE id = ?")) { + updateStmt.setString(1, newLink); + updateStmt.setInt(2, id); + updateStmt.executeUpdate(); + return true; + } + } + } + return false; + } +} + diff --git a/src/main/java/de/roko/genalogy/downloader/database/ArchiveReader.java b/src/main/java/de/roko/genalogy/downloader/database/ArchiveReader.java new file mode 100644 index 0000000..98d75b4 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/database/ArchiveReader.java @@ -0,0 +1,29 @@ +package de.roko.genalogy.downloader.database; + +import java.sql.*; + +public class ArchiveReader { + + private static final String DB_FILE = "archion.db"; + + public static void printAllArchives() { + String sql = "SELECT id, bundesland, name, link FROM archive ORDER BY bundesland, name"; + + try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB_FILE); + Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery(sql)) { + + while (rs.next()) { + int id = rs.getInt("id"); + String bundesland = rs.getString("bundesland"); + String name = rs.getString("name"); + String link = rs.getString("link"); + + System.out.printf("ID: %-3d | %-20s | %-60s | %s%n", id, bundesland, name, link); + } + + } catch (SQLException e) { + System.err.println("❌ Fehler beim Lesen aus der Datenbank: " + e.getMessage()); + } + } +} diff --git a/src/main/java/de/roko/genalogy/downloader/database/KirchenkreisExtractor.java b/src/main/java/de/roko/genalogy/downloader/database/KirchenkreisExtractor.java new file mode 100644 index 0000000..3966cca --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/database/KirchenkreisExtractor.java @@ -0,0 +1,77 @@ +package de.roko.genalogy.downloader.database; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.sql.*; +import java.util.ArrayList; +import java.util.List; + +public class KirchenkreisExtractor { + + private static final String DB = "archion.db"; + + public void readNwrite() throws Exception { + try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) { + + String sql = "SELECT id, name, link FROM archive ORDER BY id"; + try (PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet rs = stmt.executeQuery()) { + + while (rs.next()) { + int id = rs.getInt("id"); + String name = rs.getString("name"); + String link = rs.getString("link"); + System.out.println("\n🔍 Lade Kirchenkreise f\u00fcr Archiv: " + name); + + List kreise = extractKirchenkreise(id, link); + saveKirchenkreise(kreise, conn); + System.out.println("→ " + kreise.size() + " Kirchenkreise gespeichert."); + } + } + } + } + + public static List extractKirchenkreise(int archivId, String url) { + List list = new ArrayList<>(); + try { + Document doc = Jsoup.connect(url).get(); + Elements items = doc.select("#archive-nav li.item a"); + + for (Element link : items) { + String name = link.text().trim(); + String href = link.absUrl("href").trim(); + if (!name.isEmpty() && !href.isEmpty()) { + list.add(new Kirchenkreis(archivId, name, href)); + } + } + } catch (Exception e) { + System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage()); + } + return list; + } + + public static void saveKirchenkreise(List list, Connection conn) throws SQLException { + String sql = """ + INSERT INTO kreis (archiv_id, name, link) + VALUES (?, ?, ?) + ON CONFLICT(archiv_id, name) + DO UPDATE SET link = excluded.link; + """; + + try (PreparedStatement stmt = conn.prepareStatement(sql)) { + for (Kirchenkreis k : list) { + stmt.setInt(1, k.archivId()); + stmt.setString(2, k.name()); + stmt.setString(3, k.link()); + stmt.addBatch(); + } + stmt.executeBatch(); + } + } + + public record Kirchenkreis(int archivId, String name, String link) {} +} + diff --git a/src/main/java/de/roko/genalogy/downloader/database/KreisOrtExtractor.java b/src/main/java/de/roko/genalogy/downloader/database/KreisOrtExtractor.java new file mode 100644 index 0000000..e574e8c --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/database/KreisOrtExtractor.java @@ -0,0 +1,77 @@ +package de.roko.genalogy.downloader.database; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.sql.*; +import java.util.ArrayList; +import java.util.List; + +public class KreisOrtExtractor { + + private static final String DB = "archion.db"; + + public void readNwrite() throws Exception { + try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) { + String sql = "SELECT id, archiv_id, name, link FROM kreis ORDER BY id"; + try (PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet rs = stmt.executeQuery()) { + + while (rs.next()) { + int kreisId = rs.getInt("id"); + int archivId = rs.getInt("archiv_id"); + String name = rs.getString("name"); + String link = rs.getString("link"); + + System.out.println("\n🔍 Lade Orte für Kirchenkreis/Dekanat: " + name); + List orte = extractOrte(kreisId, archivId, link); + saveOrte(orte, conn); + System.out.println("→ " + orte.size() + " Orte gespeichert."); + } + } + } + } + + private static List extractOrte(int kreisId, int archivId, String url) { + List list = new ArrayList<>(); + try { + Document doc = Jsoup.connect(url).get(); + Elements items = doc.select(".list li a"); + for (Element link : items) { + String name = link.text().trim(); + String href = link.absUrl("href").trim(); + if (!name.isEmpty() && !href.isEmpty()) { + list.add(new Ort(kreisId, archivId, name, href)); + } + } + } catch (Exception e) { + System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage()); + } + return list; + } + + private static void saveOrte(List list, Connection conn) throws SQLException { + String sql = """ + INSERT INTO ort (kreis_id, archiv_id, name, link) + VALUES (?, ?, ?, ?) + ON CONFLICT(kreis_id, archiv_id, name) + DO UPDATE SET link = excluded.link; + """; + + try (PreparedStatement stmt = conn.prepareStatement(sql)) { + for (Ort o : list) { + stmt.setInt(1, o.kreisId()); + stmt.setInt(2, o.archivId()); + stmt.setString(3, o.name()); + stmt.setString(4, o.link()); + stmt.addBatch(); + } + stmt.executeBatch(); + } + } + + + public record Ort(int kreisId, int archivId, String name, String link) {} +} diff --git a/src/main/java/de/roko/genalogy/downloader/database/OrtExtractor.java b/src/main/java/de/roko/genalogy/downloader/database/OrtExtractor.java new file mode 100644 index 0000000..253bc7d --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/database/OrtExtractor.java @@ -0,0 +1,76 @@ +package de.roko.genalogy.downloader.database; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.sql.*; +import java.util.ArrayList; +import java.util.List; + +public class OrtExtractor { + + private static final String DB = "archion.db"; + + public void read() throws Exception { + try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) { + + String sql = "SELECT id, name, link FROM kreis WHERE status = 'verfügbar' ORDER BY id"; + try (PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet rs = stmt.executeQuery()) { + + while (rs.next()) { + int id = rs.getInt("id"); + String name = rs.getString("name"); + String link = rs.getString("link"); + System.out.println("\n🔍 Lade Orte für Kreis/Dekanat: " + name); + + List orte = extractOrte(id, link); + saveOrte(orte, conn); + System.out.println("→ " + orte.size() + " Orte gespeichert."); + } + } + } + } + + public static List extractOrte(int kreisId, String url) { + List list = new ArrayList<>(); + try { + Document doc = Jsoup.connect(url).get(); + Elements items = doc.select(".list li a"); + + for (Element link : items) { + String name = link.text().trim(); + String href = link.absUrl("href").trim(); + if (!name.isEmpty() && !href.isEmpty()) { + list.add(new Ort(kreisId, name, href)); + } + } + } catch (Exception e) { + System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage()); + } + return list; + } + + public static void saveOrte(List list, Connection conn) throws SQLException { + String sql = """ + INSERT INTO ort (kreis_id, name, link) + VALUES (?, ?, ?) + ON CONFLICT(kreis_id, name) + DO UPDATE SET link = excluded.link; + """; + + try (PreparedStatement stmt = conn.prepareStatement(sql)) { + for (Ort o : list) { + stmt.setInt(1, o.kreisId()); + stmt.setString(2, o.name()); + stmt.setString(3, o.link()); + stmt.addBatch(); + } + stmt.executeBatch(); + } + } + + public record Ort(int kreisId, String name, String link) {} +} diff --git a/src/main/java/de/roko/genalogy/downloader/tools/ArchionBatchDownloader.java b/src/main/java/de/roko/genalogy/downloader/tools/ArchionBatchDownloader.java new file mode 100644 index 0000000..67b4e3a --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/tools/ArchionBatchDownloader.java @@ -0,0 +1,127 @@ + +package de.roko.genalogy.downloader.tools; + +import org.openqa.selenium.*; +import org.openqa.selenium.support.ui.WebDriverWait; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.List; + +public class ArchionBatchDownloader { + + private final WebDriver driver; + private final WebDriverWait wait; + private final BookTileDownloaderLite downloader; + + public ArchionBatchDownloader(WebDriver driver) { + this.driver = driver; + this.wait = new WebDriverWait(driver, Duration.ofSeconds(10)); + this.downloader = new BookTileDownloaderLite(driver); + } + + public void downloadAndStitchPages(String startUrl, String zielVerzeichnis) throws InterruptedException { + int seitenanzahl = ermittleSeitenAnzahl(startUrl); + downloadAndStitchPages(startUrl, zielVerzeichnis, seitenanzahl); + } + + public void downloadAndStitchPages(String startUrl, String zielVerzeichnis, int seitenAnzahl) { + try { + driver.get(startUrl); + Thread.sleep(2000); // initiale Ladezeit + + for (int seite = 1; seite <= seitenAnzahl; seite++) { + File tileFolder = new File(zielVerzeichnis, String.format("seite-%03d-tiles", seite)); + File outputFile = new File(zielVerzeichnis, String.format("seite-%03d.png", seite)); + + if (outputFile.exists()) { + System.out.printf("⏭️ Seite %d bereits vorhanden (PNG) – wird übersprungen%n", seite); + } else { + System.out.printf("📄 Verarbeite Seite %d...%n", seite); + + // Download + downloader.downloadVisibleTiles(null, zielVerzeichnis, seite); + + // Stitch + TileStitcherFixedGrid.stitch(tileFolder, outputFile); + + // Tile-Ordner löschen + if (tileFolder.exists()) { + deleteDirectoryRecursive(tileFolder.toPath()); + System.out.printf("🗑️ Tile-Ordner gelöscht: %s%n", tileFolder.getName()); + } + } + + // letzte Seite erreicht? + if (seite == seitenAnzahl) { + System.out.println("✅ Alle gewünschten Seiten verarbeitet."); + break; + } + + // Weiterblättern + List nextBtns = driver.findElements(By.cssSelector("a.next-page")); + if (nextBtns.isEmpty()) { + System.out.println("🛑 Kein 'Nächste Seite'-Button gefunden – Abbruch."); + break; + } + + WebElement dropdown = driver.findElement(By.cssSelector("select.page-select")); + String currentValue = dropdown.getAttribute("value"); + + nextBtns.get(0).click(); + + new WebDriverWait(driver, Duration.ofSeconds(10)).until(d -> + !dropdown.getAttribute("value").equals(currentValue) + ); + Thread.sleep(500); + } + + System.out.println("✅ Batch-Abschluss: Alle Seiten wurden verarbeitet."); + + } catch (Exception e) { + System.err.println("❌ Fehler im Batch-Prozess: " + e.getMessage()); + e.printStackTrace(); + } + } + + private void deleteDirectoryRecursive(Path path) { + try { + Files.walk(path) + .sorted((a, b) -> b.compareTo(a)) // erst Dateien, dann Verzeichnisse + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Exception e) { + System.err.printf("⚠️ Fehler beim Löschen von %s: %s%n", p, e.getMessage()); + } + }); + } catch (Exception e) { + System.err.printf("⚠️ Fehler beim Löschen des Verzeichnisses %s: %s%n", path, e.getMessage()); + } + } + + public int ermittleSeitenAnzahl(String startUrl) throws InterruptedException { + driver.get(startUrl); + Thread.sleep(2000); // initiale Ladezeit + return ermittleSeitenAnzahl(); + + } + + /** + * Ermittelt die Seitenanzahl aus dem Dropdown-Menü im Archion-Viewer. + */ + public int ermittleSeitenAnzahl() { + try { + WebElement dropdown = driver.findElement(By.cssSelector("select.page-select")); + List options = dropdown.findElements(By.tagName("option")); + int anzahl = options.size(); + System.out.printf("📄 Seitenanzahl erkannt: %d Seiten%n", anzahl); + return anzahl; + } catch (Exception e) { + System.err.println("❌ Fehler beim Ermitteln der Seitenanzahl: " + e.getMessage()); + return 0; + } + } +} diff --git a/src/main/java/de/roko/genalogy/downloader/tools/BookTileDownloader.java b/src/main/java/de/roko/genalogy/downloader/tools/BookTileDownloader.java new file mode 100644 index 0000000..7b8a6b3 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/tools/BookTileDownloader.java @@ -0,0 +1,166 @@ +package de.roko.genalogy.downloader.tools; + +import org.openqa.selenium.*; +import org.openqa.selenium.interactions.Actions; +import org.openqa.selenium.support.ui.ExpectedConditions; +import org.openqa.selenium.support.ui.WebDriverWait; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.InputStream; +import java.net.URL; +import java.nio.file.Files; +import java.time.Duration; +import java.util.*; + +public class BookTileDownloader { + + private final WebDriver driver; + private final JavascriptExecutor js; + private final WebDriverWait wait; + private final Actions actions; + + public BookTileDownloader(WebDriver driver) { + this.driver = driver; + this.js = (JavascriptExecutor) driver; + this.wait = new WebDriverWait(driver, Duration.ofSeconds(10)); + this.actions = new Actions(driver); + } + + private int extractPixel(String style, String property) { + try { + for (String part : style.split(";")) { + part = part.trim(); + if (part.startsWith(property)) { + return Integer.parseInt(part.replaceAll("[^0-9]", "")); + } + } + } catch (Exception e) { + System.err.printf("⚠️ Fehler beim Extrahieren von '%s' aus Style: %s%n", property, style); + } + return 0; + } + + public void downloadTilesForPage(String viewerUrl, String zielVerzeichnis, int seitenIndex) { + try { + System.out.printf("🌐 Lade Viewer-Seite: %s%n", viewerUrl); + driver.get(viewerUrl); + wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector(".zoom-holder"))); + + // Zoom maximieren + for (int z = 0; z < 20; z++) { + try { + WebElement zoomState = driver.findElement(By.cssSelector("a.zoom-state .current")); + String style = zoomState.getAttribute("style"); + if (style != null && style.contains("left: 120px")) break; + WebElement zoomInButton = driver.findElement(By.cssSelector("a.zoom-in")); + if (zoomInButton.isDisplayed() && zoomInButton.isEnabled()) { + zoomInButton.click(); + Thread.sleep(400); + } else break; + } catch (Exception e) { + System.err.println("⚠️ Fehler beim Zoom: " + e.getMessage()); + } + } + + WebElement zoomHolder = driver.findElement(By.cssSelector(".zoom-holder")); + File tileFolder = new File(zielVerzeichnis, String.format("seite-%03d-tiles", seitenIndex)); + if (!tileFolder.exists()) Files.createDirectories(tileFolder.toPath()); + + int scrollWidth = 8192; + int scrollHeight = 8192; + try { + scrollWidth = ((Long) js.executeScript("return arguments[0].scrollWidth;", zoomHolder)).intValue(); + scrollHeight = ((Long) js.executeScript("return arguments[0].scrollHeight;", zoomHolder)).intValue(); + System.out.printf("📐 Scrollbereich erkannt: %d x %d px%n", scrollWidth, scrollHeight); + } catch (Exception e) { + System.err.println("⚠️ Fehler beim Auslesen der Scrollgröße: " + e.getMessage()); + } + + Set seenSrcs = new HashSet<>(); + int step = 256; + int tilesSaved = 0; + int tilesSkipped = 0; + int errors = 0; + long startTime = System.currentTimeMillis(); + + for (int y = 0; y <= scrollHeight; y += step) { + for (int x = 0; x <= scrollWidth; x += step) { + try { + System.out.printf("🔄 Scrolle zu Position x=%d, y=%d...%n", x, y); + js.executeScript("arguments[0].scrollTo(arguments[1], arguments[2]);", zoomHolder, x, y); + actions.moveToElement(zoomHolder, x % 200 + 20, y % 200 + 20).perform(); + Thread.sleep(800); + } catch (Exception e) { + System.err.printf("⚠️ Fehler beim Scrollen zu (%d,%d): %s%n", x, y, e.getMessage()); + errors++; + continue; + } + + List tiles; + try { + tiles = driver.findElements(By.cssSelector(".zoom-tiles img")); + } catch (Exception e) { + System.err.println("⚠️ Fehler beim Finden der Tiles: " + e.getMessage()); + errors++; + continue; + } + + System.out.printf("🔍 %d Tiles gefunden an Position (%d,%d)%n", tiles.size(), x, y); + + for (WebElement img : tiles) { + try { + String src = img.getAttribute("_src"); + if (src == null) { + tilesSkipped++; + continue; + } + if (src.startsWith("/")) { + src = "https://www.archion.de" + src; + } + if (seenSrcs.contains(src)) { + tilesSkipped++; + continue; + } + + String style = img.getAttribute("style"); + int left = extractPixel(style, "left"); + int top = extractPixel(style, "top"); + String filename = String.format("tile_%d_%d.png", left, top); + File tileFile = new File(tileFolder, filename); + + try (InputStream in = new URL(src).openStream()) { + BufferedImage tile = ImageIO.read(in); + if (tile != null) { + ImageIO.write(tile, "png", tileFile); + tilesSaved++; + System.out.printf("💾 Gespeichert: %s%n", filename); + } + } catch (Exception e) { + System.err.printf("❌ Fehler beim Laden oder Schreiben von: %s%n", src); + errors++; + } + + seenSrcs.add(src); + } catch (Exception e) { + System.err.println("⚠️ Fehler beim Verarbeiten eines Tiles: " + e.getMessage()); + errors++; + } + } + + long elapsed = (System.currentTimeMillis() - startTime) / 1000; + System.out.printf("📊 Gesamt: %d gespeichert, %d übersprungen, %d Fehler, %ds seit Start%n%n", + tilesSaved, tilesSkipped, errors, elapsed); + } + } + + System.out.printf("✅ Alle Tiles für Seite %d gespeichert (%d Tiles) in %s%n", + seitenIndex, tilesSaved, tileFolder.getAbsolutePath()); + + } catch (Exception e) { + System.err.println("❌ Schwerer Fehler: " + e.getMessage()); + e.printStackTrace(); + } + } +} diff --git a/src/main/java/de/roko/genalogy/downloader/tools/BookTileDownloaderLite.java b/src/main/java/de/roko/genalogy/downloader/tools/BookTileDownloaderLite.java new file mode 100644 index 0000000..6a0af19 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/tools/BookTileDownloaderLite.java @@ -0,0 +1,122 @@ +package de.roko.genalogy.downloader.tools; + +import org.openqa.selenium.*; +import org.openqa.selenium.interactions.Actions; +import org.openqa.selenium.support.ui.WebDriverWait; +import org.openqa.selenium.support.ui.ExpectedConditions; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.InputStream; +import java.net.URL; +import java.nio.file.Files; +import java.time.Duration; +import java.util.*; + +public class BookTileDownloaderLite { + + private final WebDriver driver; + private final JavascriptExecutor js; + private final WebDriverWait wait; + private final Actions actions; + + public BookTileDownloaderLite(WebDriver driver) { + this.driver = driver; + this.js = (JavascriptExecutor) driver; + this.wait = new WebDriverWait(driver, Duration.ofSeconds(10)); + this.actions = new Actions(driver); + } + + private int extractPixel(String style, String property) { + try { + for (String part : style.split(";")) { + part = part.trim(); + if (part.startsWith(property)) { + return Integer.parseInt(part.replaceAll("[^0-9]", "")); + } + } + } catch (Exception e) { + System.err.printf("⚠️ Fehler beim Extrahieren von '%s' aus Style: %s%n", property, style); + } + return 0; + } + + public void downloadVisibleTiles(String viewerUrlOrNull, String zielVerzeichnis, int seitenIndex) { + try { + if (viewerUrlOrNull != null) { + driver.get(viewerUrlOrNull); + wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector(".zoom-holder"))); + Thread.sleep(1000); + } + +// Zoom unabhängig vom Seitenaufruf + try { + for (int z = 0; z < 20; z++) { + WebElement zoomState = driver.findElement(By.cssSelector("a.zoom-state .current")); + String style = zoomState.getAttribute("style"); + if (style != null && style.contains("left: 120px")) break; + WebElement zoomInButton = driver.findElement(By.cssSelector("a.zoom-in")); + if (zoomInButton.isDisplayed() && zoomInButton.isEnabled()) { + zoomInButton.click(); + Thread.sleep(400); + } else break; + } + } catch (Exception e) { + System.err.println("⚠️ Fehler beim Zoom: " + e.getMessage()); + } + + WebElement zoomHolder = driver.findElement(By.cssSelector(".zoom-holder")); + js.executeScript("arguments[0].scrollTo(0, 0);", zoomHolder); + actions.moveToElement(zoomHolder, 100, 100).perform(); + Thread.sleep(800); // Lazy loading + + File tileFolder = new File(zielVerzeichnis, String.format("seite-%03d-tiles", seitenIndex)); + if (!tileFolder.exists()) Files.createDirectories(tileFolder.toPath()); + + Set seenSrcs = new HashSet<>(); + int tilesSaved = 0; + int errors = 0; + + List tiles = driver.findElements(By.cssSelector(".zoom-tiles img")); + System.out.printf("🔍 Seite %d: %d sichtbare Tiles gefunden%n", seitenIndex, tiles.size()); + + for (WebElement img : tiles) { + try { + String src = img.getAttribute("_src"); + if (src == null) continue; + if (src.startsWith("/")) src = "https://www.archion.de" + src; + if (!seenSrcs.add(src)) continue; + + String style = img.getAttribute("style"); + int left = extractPixel(style, "left"); + int top = extractPixel(style, "top"); + String filename = String.format("tile_%d_%d.png", left, top); + File tileFile = new File(tileFolder, filename); + + try (InputStream in = new URL(src).openStream()) { + BufferedImage tile = ImageIO.read(in); + if (tile != null) { + ImageIO.write(tile, "png", tileFile); + tilesSaved++; + System.out.printf("💾 %s gespeichert%n", filename); + } + } catch (Exception e) { + System.err.printf("❌ Fehler beim Speichern von: %s%n", src); + errors++; + } + + } catch (Exception e) { + System.err.println("⚠️ Fehler beim Tile-Handling: " + e.getMessage()); + errors++; + } + } + + System.out.printf("✅ Seite %d: %d Tiles gespeichert, %d Fehler%n", seitenIndex, tilesSaved, errors); + + } catch (Exception e) { + System.err.println("❌ Schwerer Fehler beim Download der Seite: " + e.getMessage()); + e.printStackTrace(); + } + } +} diff --git a/src/main/java/de/roko/genalogy/downloader/tools/TileStitcherFixedGrid.java b/src/main/java/de/roko/genalogy/downloader/tools/TileStitcherFixedGrid.java new file mode 100644 index 0000000..401f3a8 --- /dev/null +++ b/src/main/java/de/roko/genalogy/downloader/tools/TileStitcherFixedGrid.java @@ -0,0 +1,124 @@ +package de.roko.genalogy.downloader.tools; + +import javax.imageio.ImageIO; +import java.awt.*; +import java.awt.image.BufferedImage; +import java.io.File; +import java.util.*; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class TileStitcherFixedGrid { + + private static final Pattern TILE_PATTERN = Pattern.compile("tile_(\\d+)_(\\d+)\\.png"); + + public static void stitch(File inputFolder, File outputFile) { + File[] files = inputFolder.listFiles(); + if (files == null || files.length == 0) { + System.err.println("❌ Keine Tiles im Ordner: " + inputFolder.getAbsolutePath()); + return; + } + + List tiles = new ArrayList(); + TreeSet colSet = new TreeSet(); + TreeSet rowSet = new TreeSet(); + + for (File file : files) { + Matcher matcher = TILE_PATTERN.matcher(file.getName()); + if (!matcher.matches()) continue; + + int x = Integer.parseInt(matcher.group(1)); + int y = Integer.parseInt(matcher.group(2)); + + tiles.add(new Tile(file, x, y)); + colSet.add(x); + rowSet.add(y); + } + + if (tiles.isEmpty()) { + System.err.println("❌ Keine gültigen Tiles gefunden."); + return; + } + + List colList = new ArrayList(colSet); + List rowList = new ArrayList(rowSet); + + Map colIndexMap = new HashMap(); + Map rowIndexMap = new HashMap(); + for (int i = 0; i < colList.size(); i++) { + colIndexMap.put(colList.get(i), i); + } + for (int i = 0; i < rowList.size(); i++) { + rowIndexMap.put(rowList.get(i), i); + } + + // Beispiel-Kachel lesen + BufferedImage sampleTile = null; + for (Tile tile : tiles) { + try { + sampleTile = ImageIO.read(tile.file); + if (sampleTile != null) break; + } catch (Exception ignored) {} + } + + if (sampleTile == null) { + System.err.println("❌ Keine lesbare Kachel gefunden."); + return; + } + + int tileWidth = sampleTile.getWidth(); + int tileHeight = sampleTile.getHeight(); + int cols = colList.size(); + int rows = rowList.size(); + int fullWidth = cols * tileWidth; + int fullHeight = rows * tileHeight; + + System.out.printf("📐 %d×%d Tiles à %dx%d → Bildgröße: %dx%d px%n", + cols, rows, tileWidth, tileHeight, fullWidth, fullHeight); + + BufferedImage result = new BufferedImage(fullWidth, fullHeight, BufferedImage.TYPE_INT_RGB); + Graphics2D g = result.createGraphics(); + + int count = 0; + for (Tile tile : tiles) { + try { + BufferedImage img = ImageIO.read(tile.file); + if (img == null) continue; + + int colIndex = colIndexMap.get(tile.x); + int rowIndex = rowIndexMap.get(tile.y); + int dx = colIndex * tileWidth; + int dy = rowIndex * tileHeight; + + g.drawImage(img, dx, dy, null); + count++; + } catch (Exception e) { + System.err.printf("⚠️ Fehler bei %s: %s%n", tile.file.getName(), e.getMessage()); + } + } + + g.dispose(); + + try { + ImageIO.write(result, "png", outputFile); + System.out.printf("✅ Gesamtbild gespeichert: %s (%d Tiles verwendet)%n", + outputFile.getAbsolutePath(), count); + } catch (Exception e) { + System.err.println("❌ Fehler beim Speichern: " + e.getMessage()); + } + } + + // Hilfsklasse (Java 8-kompatibel, kein record) + private static class Tile { + final File file; + final int x; + final int y; + + Tile(File file, int x, int y) { + this.file = file; + this.x = x; + this.y = y; + } + } +} diff --git a/src/main/resources/archive.csv b/src/main/resources/archive.csv new file mode 100644 index 0000000..e69de29 diff --git a/src/test/java/de/roko/genalogy/downloader/util/ArchionBatchDownloaderTest.java b/src/test/java/de/roko/genalogy/downloader/util/ArchionBatchDownloaderTest.java new file mode 100644 index 0000000..4a1219c --- /dev/null +++ b/src/test/java/de/roko/genalogy/downloader/util/ArchionBatchDownloaderTest.java @@ -0,0 +1,59 @@ +package de.roko.genalogy.downloader.util; + +import de.roko.genalogy.downloader.archion.ArchionLoginHelper; +import de.roko.genalogy.downloader.tools.ArchionBatchDownloader; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.chrome.ChromeDriver; +import org.openqa.selenium.support.ui.WebDriverWait; + +import java.time.Duration; + +class ArchionBatchDownloaderTest { + + public static WebDriver driver; + + @BeforeAll + static void setup() { + String username = "robatkoch"; + String password = "PaLiNa2016$$"; + driver = new ChromeDriver(); + + ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver); + + archionLoginHelper.login(username, password); + } + + @Test + void downloadAndStitchPages() { + + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10)); + + String url = "https://www.archion.de/de/viewer/churchRegister/287339?cHash=ca3bf31106a5081448b44947b5d5bd95"; + String zielPfad = System.getProperty("user.home") + "/Dokumente/archion"; + + ArchionBatchDownloader batch = new ArchionBatchDownloader(driver); + batch.downloadAndStitchPages( + url, // Startseite im Viewer + zielPfad, // Zielverzeichnis + 8 // Anzahl Seiten + ); + + } + @Test + void ermittleSeitenAnzahl() throws InterruptedException { + + String startUrl = "https://www.archion.de/de/viewer/churchRegister/287339?cHash=ca3bf31106a5081448b44947b5d5bd95"; + ArchionBatchDownloader batch = new ArchionBatchDownloader(driver); + System.out.println("Seitenanzahl: " + batch.ermittleSeitenAnzahl(startUrl)); + + + } + + @AfterEach + void tearDown() { + driver.quit(); + } +} \ No newline at end of file diff --git a/src/test/java/de/roko/genalogy/downloader/util/BookTileDownloaderLiteTest.java b/src/test/java/de/roko/genalogy/downloader/util/BookTileDownloaderLiteTest.java new file mode 100644 index 0000000..ec67d63 --- /dev/null +++ b/src/test/java/de/roko/genalogy/downloader/util/BookTileDownloaderLiteTest.java @@ -0,0 +1,34 @@ +package de.roko.genalogy.downloader.util; + +import de.roko.genalogy.downloader.archion.ArchionLoginHelper; +import de.roko.genalogy.downloader.tools.BookTileDownloaderLite; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.chrome.ChromeDriver; + +class BookTileDownloaderLiteTest { + public static WebDriver driver; + + @BeforeAll + static void setup() { + String username = "robatkoch"; + String password = "PaLiNa2016$$"; + driver = new ChromeDriver(); + + ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver); + + archionLoginHelper.login(username, password); + } + @Test + void downloadVisibleTiles() { + + BookTileDownloaderLite downloader = new BookTileDownloaderLite(driver); + + String url = "https://www.archion.de/de/viewer/churchRegister/287339?cHash=ca3bf31106a5081448b44947b5d5bd95"; + String kachelOrdner = System.getProperty("user.home") + "/Dokumente/archion/seite-001"; + String zielPfad = System.getProperty("user.home") + "/Dokumente/archion"; + + + } +} \ No newline at end of file diff --git a/src/test/java/de/roko/genalogy/downloader/util/TileStitcherFixedGridTest.java b/src/test/java/de/roko/genalogy/downloader/util/TileStitcherFixedGridTest.java new file mode 100644 index 0000000..3c210c1 --- /dev/null +++ b/src/test/java/de/roko/genalogy/downloader/util/TileStitcherFixedGridTest.java @@ -0,0 +1,22 @@ +package de.roko.genalogy.downloader.util; + +import de.roko.genalogy.downloader.tools.TileStitcherFixedGrid; +import org.junit.jupiter.api.Test; + +import java.io.File; + +class TileStitcherFixedGridTest { + + @Test + void stitch() { + + String kachelOrdner = System.getProperty("user.home") + "/Dokumente/archion/seite-001-tiles"; + String zielPfad = System.getProperty("user.home") + "/Dokumente/archion"; + + + File inputFolder = new File(kachelOrdner); + File outputImage = new File(System.getProperty("user.home") + "/Dokumente/archion/seite-001.png"); + + TileStitcherFixedGrid.stitch(inputFolder, outputImage); + } +} \ No newline at end of file