bookReaderStitcher added
This commit is contained in:
commit
d8165cd91c
38
.gitignore
vendored
Normal file
38
.gitignore
vendored
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
target/
|
||||||
|
!.mvn/wrapper/maven-wrapper.jar
|
||||||
|
!**/src/main/**/target/
|
||||||
|
!**/src/test/**/target/
|
||||||
|
|
||||||
|
### IntelliJ IDEA ###
|
||||||
|
.idea/modules.xml
|
||||||
|
.idea/jarRepositories.xml
|
||||||
|
.idea/compiler.xml
|
||||||
|
.idea/libraries/
|
||||||
|
*.iws
|
||||||
|
*.iml
|
||||||
|
*.ipr
|
||||||
|
|
||||||
|
### Eclipse ###
|
||||||
|
.apt_generated
|
||||||
|
.classpath
|
||||||
|
.factorypath
|
||||||
|
.project
|
||||||
|
.settings
|
||||||
|
.springBeans
|
||||||
|
.sts4-cache
|
||||||
|
|
||||||
|
### NetBeans ###
|
||||||
|
/nbproject/private/
|
||||||
|
/nbbuild/
|
||||||
|
/dist/
|
||||||
|
/nbdist/
|
||||||
|
/.nb-gradle/
|
||||||
|
build/
|
||||||
|
!**/src/main/**/build/
|
||||||
|
!**/src/test/**/build/
|
||||||
|
|
||||||
|
### VS Code ###
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
### Mac OS ###
|
||||||
|
.DS_Store
|
||||||
8
.idea/.gitignore
generated
vendored
Normal file
8
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
15
.idea/dataSources.xml
generated
Normal file
15
.idea/dataSources.xml
generated
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="DataSourceManagerImpl" format="xml" multifile-model="true">
|
||||||
|
<data-source source="LOCAL" name="archion" uuid="07bf1574-7cd6-469e-9612-e80560756921">
|
||||||
|
<driver-ref>sqlite.xerial</driver-ref>
|
||||||
|
<synchronize>true</synchronize>
|
||||||
|
<jdbc-driver>org.sqlite.JDBC</jdbc-driver>
|
||||||
|
<jdbc-url>jdbc:sqlite:$PROJECT_DIR$/archion.db</jdbc-url>
|
||||||
|
<jdbc-additional-properties>
|
||||||
|
<property name="com.intellij.clouds.kubernetes.db.enabled" value="false" />
|
||||||
|
</jdbc-additional-properties>
|
||||||
|
<working-dir>$ProjectFileDir$</working-dir>
|
||||||
|
</data-source>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
7
.idea/encodings.xml
generated
Normal file
7
.idea/encodings.xml
generated
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Encoding">
|
||||||
|
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
|
||||||
|
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
14
.idea/misc.xml
generated
Normal file
14
.idea/misc.xml
generated
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ExternalStorageConfigurationManager" enabled="true" />
|
||||||
|
<component name="MavenProjectsManager">
|
||||||
|
<option name="originalFiles">
|
||||||
|
<list>
|
||||||
|
<option value="$PROJECT_DIR$/pom.xml" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" languageLevel="JDK_23" default="true" project-jdk-name="openjdk-23" project-jdk-type="JavaSDK">
|
||||||
|
<output url="file://$PROJECT_DIR$/out" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
BIN
archion.db
Normal file
BIN
archion.db
Normal file
Binary file not shown.
26
archive.csv
Normal file
26
archive.csv
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
Baden-Württemberg;Landeskirchliches Archiv Karlsruhe;https://www.archion.de/de/alle-archive/baden-wuerttemberg/landeskirchliches-archiv-karlsruhe
|
||||||
|
Baden-Württemberg;Landeskirchliches Archiv Stuttgart;https://www.archion.de/de/alle-archive/baden-wuerttemberg/landeskirchliches-archiv-stuttgart
|
||||||
|
Bayern;Landeskirchliches Archiv der Evangelisch-Lutherischen Kirche in Bayern;https://www.archion.de/de/alle-archive/bayern/landeskirchliches-archiv-der-evangelisch-lutherischen-kirche-in-bayern
|
||||||
|
Berlin / Brandenburg;Evangelisches Zentralarchiv in Berlin;https://www.archion.de/de/alle-archive/berlin-/-brandenburg/evangelisches-zentralarchiv-in-berlin
|
||||||
|
Berlin / Brandenburg;Landeskirchliches Archiv der Evangelischen Kirche Berlin-Brandenburg-schlesische Oberlausitz;https://www.archion.de/de/alle-archive/berlin-/-brandenburg/landeskirchliches-archiv-der-evangelischen-kirche-berlin-brandenburg-schlesische-oberlausitz
|
||||||
|
Hessen;Landeskirchliches Archiv der Evangelischen Kirche von Kurhessen-Waldeck;https://www.archion.de/de/alle-archive/hessen/landeskirchliches-archiv-der-evangelischen-kirche-von-kurhessen-waldeck
|
||||||
|
Hessen;Zentralarchiv der Evangelischen Kirche in Hessen und Nassau;https://www.archion.de/de/alle-archive/hessen/zentralarchiv-der-evangelischen-kirche-in-hessen-und-nassau
|
||||||
|
Niedersachsen;Archiv der Evangelisch-Lutherischen Landeskirche Oldenburg;https://www.archion.de/de/alle-archive/niedersachsen/archiv-der-evangelisch-lutherischen-landeskirche-oldenburg
|
||||||
|
Niedersachsen;Bistumsarchiv Hildesheim;https://www.archion.de/de/alle-archive/niedersachsen/bistumsarchiv-hildesheim
|
||||||
|
Niedersachsen;Evangelisch-reformierte Kirchengemeinde Hannover;https://www.archion.de/de/alle-archive/niedersachsen/evangelisch-reformierte-kirchengemeinde-hannover
|
||||||
|
Niedersachsen;Landeskirchliches Archiv der Evangelisch-Lutherischen Landeskirche in Braunschweig;https://www.archion.de/de/alle-archive/niedersachsen/landeskirchliches-archiv-der-evangelisch-lutherischen-landeskirche-in-braunschweig
|
||||||
|
Niedersachsen;Landeskirchliches Archiv der Evangelisch-lutherischen Landeskirche Hannovers;https://www.archion.de/de/alle-archive/niedersachsen/landeskirchliches-archiv-der-evangelisch-lutherischen-landeskirche-hannovers
|
||||||
|
Niedersachsen;Niedersächsisches Landesarchiv;https://www.archion.de/de/alle-archive/niedersachsen/niedersaechsisches-landesarchiv
|
||||||
|
Nordrhein-Westfalen;Archiv der Evangelischen Kirche im Rheinland;https://www.archion.de/de/alle-archive/nordrhein-westfalen/archiv-der-evangelischen-kirche-im-rheinland
|
||||||
|
Nordrhein-Westfalen;Archiv der Lippischen Landeskirche;https://www.archion.de/de/alle-archive/nordrhein-westfalen/archiv-der-lippischen-landeskirche
|
||||||
|
Nordrhein-Westfalen;Landeskirchliches Archiv der Evangelischen Kirche von Westfalen;https://www.archion.de/de/alle-archive/nordrhein-westfalen/landeskirchliches-archiv-der-evangelischen-kirche-von-westfalen
|
||||||
|
Rheinland-Pfalz;Archiv der Mennonitischen Forschungsstelle;https://www.archion.de/de/alle-archive/rheinland-pfalz/archiv-der-mennonitischen-forschungsstelle
|
||||||
|
Rheinland-Pfalz;Bistumsarchiv Speyer;https://www.archion.de/de/alle-archive/rheinland-pfalz/bistumsarchiv-speyer
|
||||||
|
Rheinland-Pfalz;Landesarchiv Speyer;https://www.archion.de/de/alle-archive/rheinland-pfalz/landesarchiv-speyer
|
||||||
|
Rheinland-Pfalz;Landeshauptarchiv Koblenz;https://www.archion.de/de/alle-archive/rheinland-pfalz/landeshauptarchiv-koblenz
|
||||||
|
Rheinland-Pfalz;Zentralarchiv der Evangelischen Kirche der Pfalz;https://www.archion.de/de/alle-archive/rheinland-pfalz/zentralarchiv-der-evangelischen-kirche-der-pfalz
|
||||||
|
Sachsen;Landeskirchliches Archiv der Evangelisch-Lutherischen Landeskirche Sachsens;https://www.archion.de/de/alle-archive/sachsen/landeskirchliches-archiv-der-evangelisch-lutherischen-landeskirche-sachsens
|
||||||
|
Sachsen-Anhalt;Archiv der Evangelischen Landeskirche Anhalts;https://www.archion.de/de/alle-archive/sachsen-anhalt/archiv-der-evangelischen-landeskirche-anhalts
|
||||||
|
Sachsen-Anhalt;Landeskirchenarchiv der Evangelischen Kirche Mitteldeutschland/Magdeburg;https://www.archion.de/de/alle-archive/sachsen-anhalt/landeskirchenarchiv-der-evangelischen-kirche-mitteldeutschland/magdeburg
|
||||||
|
Schleswig-Holstein;Landeskirchliches Archiv der Evangelisch-Lutherischen Kirche in Norddeutschland;https://www.archion.de/de/alle-archive/schleswig-holstein/landeskirchliches-archiv-der-evangelisch-lutherischen-kirche-in-norddeutschland
|
||||||
|
Thüringen;Landeskirchenarchiv der Evangelischen Kirche Mitteldeutschland/Eisenach;https://www.archion.de/de/alle-archive/thueringen/landeskirchenarchiv-der-evangelischen-kirche-mitteldeutschland/eisenach
|
||||||
|
Binary file not shown.
|
After Width: | Height: | Size: 429 KiB |
46
pom.xml
Normal file
46
pom.xml
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<groupId>de.roko.genalogy.downloader</groupId>
|
||||||
|
<artifactId>arch</artifactId>
|
||||||
|
<version>1.0-SNAPSHOT</version>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>23</maven.compiler.source>
|
||||||
|
<maven.compiler.target>23</maven.compiler.target>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.seleniumhq.selenium</groupId>
|
||||||
|
<artifactId>selenium-java</artifactId>
|
||||||
|
<version>4.21.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.github.bonigarcia</groupId>
|
||||||
|
<artifactId>webdrivermanager</artifactId>
|
||||||
|
<version>5.8.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.jsoup</groupId>
|
||||||
|
<artifactId>jsoup</artifactId>
|
||||||
|
<version>1.17.2</version> <!-- aktuelle stabile Version (Stand Mai 2025) -->
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.xerial</groupId>
|
||||||
|
<artifactId>sqlite-jdbc</artifactId>
|
||||||
|
<version>3.45.1.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
<artifactId>junit-jupiter</artifactId>
|
||||||
|
<version>5.10.0</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
||||||
@ -0,0 +1,93 @@
|
|||||||
|
package de.roko.genalogy.downloader.archion;
|
||||||
|
|
||||||
|
import de.roko.genalogy.downloader.archiv.Archiv;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class ArchionArchiveToCSV {
|
||||||
|
|
||||||
|
|
||||||
|
private static final String CSV_FILE = "archive.csv";
|
||||||
|
|
||||||
|
|
||||||
|
public static void writeArchionArchiveToCSV(Elements stateDivs) {
|
||||||
|
Map<String, Archiv> archiveMap = readCsvAsMap(CSV_FILE);
|
||||||
|
|
||||||
|
int updated = 0;
|
||||||
|
int added = 0;
|
||||||
|
|
||||||
|
for (Element stateDiv : stateDivs) {
|
||||||
|
String bundesland = stateDiv.selectFirst("a.h6.text-muted").text().trim();
|
||||||
|
Elements archiveLinks = stateDiv.select("ul > li > a");
|
||||||
|
|
||||||
|
for (Element link : archiveLinks) {
|
||||||
|
String archivname = link.text().trim();
|
||||||
|
String href = "https://www.archion.de" + link.attr("href").trim();
|
||||||
|
|
||||||
|
String key = bundesland + ";" + archivname;
|
||||||
|
if (archiveMap.containsKey(key)) {
|
||||||
|
Archiv existing = archiveMap.get(key);
|
||||||
|
if (!existing.link.equals(href)) {
|
||||||
|
existing.link = href; // Link aktualisieren
|
||||||
|
updated++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
archiveMap.put(key, new Archiv(bundesland, archivname, href));
|
||||||
|
added++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sortieren nach Bundesland > Archiv
|
||||||
|
List<Archiv> sorted = new ArrayList<>(archiveMap.values());
|
||||||
|
sorted.sort(Comparator.comparing((Archiv e) -> e.bundesland)
|
||||||
|
.thenComparing(e -> e.archivname));
|
||||||
|
|
||||||
|
// Schreiben
|
||||||
|
try (BufferedWriter writer = Files.newBufferedWriter(Paths.get(CSV_FILE))) {
|
||||||
|
for (Archiv entry : sorted) {
|
||||||
|
writer.write(entry.toCsvLine());
|
||||||
|
writer.newLine();
|
||||||
|
}
|
||||||
|
}catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.printf("✅ Fertig. %d neue, %d aktualisierte Einträge. Gesamt: %d\n", added, updated, sorted.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private static Map<String, Archiv> readCsvAsMap(String filePath) {
|
||||||
|
Map<String, Archiv> map = new HashMap<>();
|
||||||
|
Path path = Paths.get(filePath);
|
||||||
|
|
||||||
|
if (!Files.exists(path)) return map;
|
||||||
|
|
||||||
|
try (BufferedReader reader = Files.newBufferedReader(path)) {
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
String[] parts = line.trim().split(";", 3);
|
||||||
|
if (parts.length == 3) {
|
||||||
|
String key = parts[0] + ";" + parts[1];
|
||||||
|
map.put(key, new Archiv(parts[0], parts[1], parts[2]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
System.err.println("⚠️ Fehler beim Lesen von " + filePath + ": " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,83 @@
|
|||||||
|
package de.roko.genalogy.downloader.archion;
|
||||||
|
|
||||||
|
import org.openqa.selenium.By;
|
||||||
|
import org.openqa.selenium.TimeoutException;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.openqa.selenium.WebElement;
|
||||||
|
import org.openqa.selenium.support.ui.ExpectedConditions;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
|
||||||
|
public class ArchionLoginHelper {
|
||||||
|
|
||||||
|
|
||||||
|
private final WebDriver driver;
|
||||||
|
private final WebDriverWait wait;
|
||||||
|
private boolean loggedIn;
|
||||||
|
|
||||||
|
public ArchionLoginHelper(WebDriver driver) {
|
||||||
|
this.driver = driver;
|
||||||
|
this.wait = new WebDriverWait(driver, Duration.ofSeconds(10));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void login(String username, String password) {
|
||||||
|
|
||||||
|
// Seite aufrufen
|
||||||
|
driver.get("https://www.archion.de/de/");
|
||||||
|
|
||||||
|
|
||||||
|
// "Anmelden"-Link per Attribut (title="Anmelden") finden
|
||||||
|
WebElement loginLink = wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector("a[title='Anmelden']")));
|
||||||
|
loginLink.click();
|
||||||
|
|
||||||
|
// Eingabefelder finden und ausfüllen
|
||||||
|
WebElement emailField = wait.until(ExpectedConditions.visibilityOfElementLocated(By.name("user")));
|
||||||
|
WebElement passwordField = wait.until(ExpectedConditions.visibilityOfElementLocated(By.name("pass")));
|
||||||
|
|
||||||
|
emailField.sendKeys(username);
|
||||||
|
passwordField.sendKeys(password);
|
||||||
|
|
||||||
|
// Login-Button klicken
|
||||||
|
WebElement submitButton = wait.until(
|
||||||
|
ExpectedConditions.elementToBeClickable(By.name("submit"))
|
||||||
|
);
|
||||||
|
// Klick auf den Button "Anmelden"
|
||||||
|
submitButton.click();
|
||||||
|
|
||||||
|
System.out.println("Login abgeschlossen. Aktuelle URL: " + driver.getCurrentUrl());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isLoggedIn() {
|
||||||
|
try {
|
||||||
|
// Eventuell Dropdown öffnen, wenn nötig:
|
||||||
|
WebElement kontoDropdownToggle = wait.until(ExpectedConditions.elementToBeClickable(
|
||||||
|
By.cssSelector("a.nav-link.dropdown-toggle[href='#'], a.nav-link.dropdown-toggle.show")));
|
||||||
|
kontoDropdownToggle.click();
|
||||||
|
|
||||||
|
// Warte auf Eintrag "Konto-Übersicht"
|
||||||
|
WebElement kontoUebersicht = wait.until(ExpectedConditions.visibilityOfElementLocated(
|
||||||
|
By.cssSelector("a[href*='/konto-uebersicht']")));
|
||||||
|
|
||||||
|
System.out.println("✅ Menüeintrag 'Konto-Übersicht' gefunden.");
|
||||||
|
loggedIn = kontoUebersicht.isDisplayed();
|
||||||
|
return kontoUebersicht.isDisplayed();
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
System.out.println("❌ Menüeintrag 'Konto-Übersicht' nicht sichtbar.");
|
||||||
|
loggedIn = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void openAlleArchive() {
|
||||||
|
WebElement alleArchiveLink = wait.until(
|
||||||
|
ExpectedConditions.elementToBeClickable(By.linkText("Alle Archive"))
|
||||||
|
);
|
||||||
|
alleArchiveLink.click();
|
||||||
|
|
||||||
|
wait.until(ExpectedConditions.urlContains("/de/alle-archive"));
|
||||||
|
System.out.println("✅ Seite 'Alle Archive' geöffnet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
20
src/main/java/de/roko/genalogy/downloader/archiv/Archiv.java
Normal file
20
src/main/java/de/roko/genalogy/downloader/archiv/Archiv.java
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
package de.roko.genalogy.downloader.archiv;
|
||||||
|
|
||||||
|
public class Archiv {
|
||||||
|
|
||||||
|
public String bundesland;
|
||||||
|
public String archivname;
|
||||||
|
public String link;
|
||||||
|
|
||||||
|
public Archiv(String bundesland, String archivname, String link) {
|
||||||
|
this.bundesland = bundesland;
|
||||||
|
this.archivname = archivname;
|
||||||
|
this.link = link;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String toCsvLine() {
|
||||||
|
return String.join(";", bundesland, archivname, link);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,4 @@
|
|||||||
|
package de.roko.genalogy.downloader.archiv;
|
||||||
|
|
||||||
|
public class Kirchenkreis {
|
||||||
|
}
|
||||||
16
src/main/java/de/roko/genalogy/downloader/archiv/Ort.java
Normal file
16
src/main/java/de/roko/genalogy/downloader/archiv/Ort.java
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
package de.roko.genalogy.downloader.archiv;
|
||||||
|
|
||||||
|
import de.roko.genalogy.downloader.archiv.buch.Buch;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Ort {
|
||||||
|
private String ortsname;
|
||||||
|
private List<Buch> buecher;
|
||||||
|
|
||||||
|
public Ort(String ortsname, List<Buch> buecher) {
|
||||||
|
this.ortsname = ortsname;
|
||||||
|
this.buecher = buecher;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,16 @@
|
|||||||
|
package de.roko.genalogy.downloader.archiv.buch;
|
||||||
|
|
||||||
|
public class Bild {
|
||||||
|
|
||||||
|
private String urllink;
|
||||||
|
private String pathlocal;
|
||||||
|
|
||||||
|
public void Bild(String urllink) {
|
||||||
|
this.urllink = urllink;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void downloadPic() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,9 @@
|
|||||||
|
package de.roko.genalogy.downloader.archiv.buch;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Buch {
|
||||||
|
|
||||||
|
private List<Seite> seiten;
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,11 @@
|
|||||||
|
package de.roko.genalogy.downloader.archiv.buch;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Seite {
|
||||||
|
|
||||||
|
public int seitennummer;
|
||||||
|
private List<Bild> bilder;
|
||||||
|
private Bild bildgesamt;
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,83 @@
|
|||||||
|
package de.roko.genalogy.downloader.database;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.Statement;
|
||||||
|
|
||||||
|
public class ArchionDatabaseSetup {
|
||||||
|
|
||||||
|
private static final String DB_FILE = "archion.db";
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB_FILE);
|
||||||
|
Statement stmt = conn.createStatement()) {
|
||||||
|
|
||||||
|
stmt.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS archive (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
bundesland TEXT NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
link TEXT NOT NULL,
|
||||||
|
UNIQUE(bundesland, name)
|
||||||
|
);
|
||||||
|
""");
|
||||||
|
|
||||||
|
stmt.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS kreis (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
archiv_id INTEGER NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
link TEXT NOT NULL,
|
||||||
|
UNIQUE(archiv_id, name),
|
||||||
|
FOREIGN KEY (archiv_id) REFERENCES archive(id)
|
||||||
|
);
|
||||||
|
""");
|
||||||
|
|
||||||
|
stmt.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS ort (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
kreis_id INTEGER, -- NULL, wenn Ort direkt unter Archiv hängt
|
||||||
|
archiv_id INTEGER NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
link TEXT NOT NULL,
|
||||||
|
UNIQUE(kreis_id, archiv_id, name) -- ← Diese Kombination muss zum ON CONFLICT passen
|
||||||
|
);
|
||||||
|
""");
|
||||||
|
|
||||||
|
stmt.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS buch (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
ort_id INTEGER NOT NULL,
|
||||||
|
titel TEXT NOT NULL,
|
||||||
|
zeitraum TEXT,
|
||||||
|
FOREIGN KEY (ort_id) REFERENCES ort(id)
|
||||||
|
);
|
||||||
|
""");
|
||||||
|
|
||||||
|
stmt.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS seite (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
buch_id INTEGER NOT NULL,
|
||||||
|
seiten_nr TEXT NOT NULL,
|
||||||
|
FOREIGN KEY (buch_id) REFERENCES buch(id)
|
||||||
|
);
|
||||||
|
""");
|
||||||
|
|
||||||
|
stmt.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS bild (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
seite_id INTEGER NOT NULL,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
ist_gesamtansicht BOOLEAN DEFAULT FALSE,
|
||||||
|
FOREIGN KEY (seite_id) REFERENCES seite(id)
|
||||||
|
);
|
||||||
|
""");
|
||||||
|
|
||||||
|
System.out.println("✅ Tabellen erfolgreich erstellt in: " + DB_FILE);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("❌ Fehler beim Erstellen der Tabellen: " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,103 @@
|
|||||||
|
package de.roko.genalogy.downloader.database;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
import java.sql.*;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ArchivStrukturParser {
|
||||||
|
|
||||||
|
private static final String DB = "archion.db";
|
||||||
|
|
||||||
|
public void parse() throws Exception {
|
||||||
|
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||||
|
|
||||||
|
String sql = "SELECT id, name, link FROM archive ORDER BY id";
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql);
|
||||||
|
ResultSet rs = stmt.executeQuery()) {
|
||||||
|
|
||||||
|
while (rs.next()) {
|
||||||
|
int archivId = rs.getInt("id");
|
||||||
|
String archivName = rs.getString("name");
|
||||||
|
String archivLink = rs.getString("link");
|
||||||
|
|
||||||
|
System.out.println("\n🔍 Verarbeite Archiv: " + archivName);
|
||||||
|
processArchiv(conn, archivId, archivLink);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void processArchiv(Connection conn, int archivId, String link) {
|
||||||
|
try {
|
||||||
|
Document doc = Jsoup.connect(link).get();
|
||||||
|
Elements items = doc.select("#archive-nav li.item a");
|
||||||
|
|
||||||
|
List<Kirchenkreis> kreise = new ArrayList<>();
|
||||||
|
List<Ort> orte = new ArrayList<>();
|
||||||
|
|
||||||
|
for (Element a : items) {
|
||||||
|
String name = a.text().trim();
|
||||||
|
String href = a.absUrl("href").trim();
|
||||||
|
|
||||||
|
if (name.toLowerCase().contains("kirchenkreis") || name.toLowerCase().contains("dekanat")) {
|
||||||
|
kreise.add(new Kirchenkreis(archivId, name, href));
|
||||||
|
} else {
|
||||||
|
orte.add(new Ort(null, archivId, name, href));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
saveKreise(kreise, conn);
|
||||||
|
saveOrteDirekt(orte, conn);
|
||||||
|
System.out.println("→ " + kreise.size() + " Kirchenkreise und " + orte.size() + " direkte Orte gespeichert.");
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Fehler bei Archiv-Link " + link + ": " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void saveKreise(List<Kirchenkreis> list, Connection conn) throws SQLException {
|
||||||
|
String sql = """
|
||||||
|
INSERT INTO kreis (archiv_id, name, link)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
ON CONFLICT(archiv_id, name)
|
||||||
|
DO UPDATE SET link = excluded.link;
|
||||||
|
""";
|
||||||
|
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||||
|
for (Kirchenkreis k : list) {
|
||||||
|
stmt.setInt(1, k.archivId());
|
||||||
|
stmt.setString(2, k.name());
|
||||||
|
stmt.setString(3, k.link());
|
||||||
|
stmt.addBatch();
|
||||||
|
}
|
||||||
|
stmt.executeBatch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void saveOrteDirekt(List<Ort> list, Connection conn) throws SQLException {
|
||||||
|
String sql = """
|
||||||
|
INSERT INTO ort (kreis_id, archiv_id, name, link)
|
||||||
|
VALUES (NULL, ?, ?, ?)
|
||||||
|
ON CONFLICT(archiv_id, name)
|
||||||
|
DO UPDATE SET link = excluded.link;
|
||||||
|
""";
|
||||||
|
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||||
|
for (Ort o : list) {
|
||||||
|
stmt.setInt(1, o.archivId());
|
||||||
|
stmt.setString(2, o.name());
|
||||||
|
stmt.setString(3, o.link());
|
||||||
|
stmt.addBatch();
|
||||||
|
}
|
||||||
|
stmt.executeBatch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record Kirchenkreis(int archivId, String name, String link) {}
|
||||||
|
public record Ort(Integer kreisId, Integer archivId, String name, String link) {}
|
||||||
|
}
|
||||||
@ -0,0 +1,102 @@
|
|||||||
|
package de.roko.genalogy.downloader.database;
|
||||||
|
|
||||||
|
import io.github.bonigarcia.wdm.WebDriverManager;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
|
|
||||||
|
import java.sql.*;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public class ArchiveInserter {
|
||||||
|
|
||||||
|
private static final String DB_FILE = "archion.db";
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
new ArchiveInserter().run();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() throws Exception {
|
||||||
|
WebDriverManager.chromedriver().setup();
|
||||||
|
WebDriver driver = new ChromeDriver();
|
||||||
|
run(driver);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run(WebDriver driver) throws Exception {
|
||||||
|
|
||||||
|
driver.get("https://www.archion.de/de/alle-archive");
|
||||||
|
|
||||||
|
String html = driver.getPageSource();
|
||||||
|
driver.quit();
|
||||||
|
|
||||||
|
Document doc = Jsoup.parse(html);
|
||||||
|
Elements stateDivs = doc.select("div[id^=state]");
|
||||||
|
|
||||||
|
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB_FILE)) {
|
||||||
|
int added = 0, updated = 0;
|
||||||
|
|
||||||
|
for (Element stateDiv : stateDivs) {
|
||||||
|
String bundesland = stateDiv.selectFirst("a.h6.text-muted").text().trim();
|
||||||
|
Elements archiveLinks = stateDiv.select("ul > li > a");
|
||||||
|
|
||||||
|
for (Element link : archiveLinks) {
|
||||||
|
String archivname = link.text().trim();
|
||||||
|
String url = "https://www.archion.de" + link.attr("href").trim();
|
||||||
|
|
||||||
|
Integer existingId = findArchiveId(conn, bundesland, archivname);
|
||||||
|
if (existingId == null) {
|
||||||
|
insertArchive(conn, bundesland, archivname, url);
|
||||||
|
added++;
|
||||||
|
} else {
|
||||||
|
if (updateArchiveIfLinkChanged(conn, existingId, url)) {
|
||||||
|
updated++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.printf("✅ Archive verarbeitet. Neu: %d, aktualisiert: %d%n", added, updated);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Integer findArchiveId(Connection conn, String bundesland, String name) throws SQLException {
|
||||||
|
String sql = "SELECT id FROM archive WHERE bundesland = ? AND name = ?";
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||||
|
stmt.setString(1, bundesland);
|
||||||
|
stmt.setString(2, name);
|
||||||
|
ResultSet rs = stmt.executeQuery();
|
||||||
|
return rs.next() ? rs.getInt("id") : null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void insertArchive(Connection conn, String bundesland, String name, String link) throws SQLException {
|
||||||
|
String sql = "INSERT INTO archive (bundesland, name, link) VALUES (?, ?, ?)";
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||||
|
stmt.setString(1, bundesland);
|
||||||
|
stmt.setString(2, name);
|
||||||
|
stmt.setString(3, link);
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean updateArchiveIfLinkChanged(Connection conn, int id, String newLink) throws SQLException {
|
||||||
|
String sql = "SELECT link FROM archive WHERE id = ?";
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||||
|
stmt.setInt(1, id);
|
||||||
|
ResultSet rs = stmt.executeQuery();
|
||||||
|
if (rs.next() && !Objects.equals(rs.getString("link"), newLink)) {
|
||||||
|
try (PreparedStatement updateStmt = conn.prepareStatement("UPDATE archive SET link = ? WHERE id = ?")) {
|
||||||
|
updateStmt.setString(1, newLink);
|
||||||
|
updateStmt.setInt(2, id);
|
||||||
|
updateStmt.executeUpdate();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@ -0,0 +1,29 @@
|
|||||||
|
package de.roko.genalogy.downloader.database;
|
||||||
|
|
||||||
|
import java.sql.*;
|
||||||
|
|
||||||
|
public class ArchiveReader {
|
||||||
|
|
||||||
|
private static final String DB_FILE = "archion.db";
|
||||||
|
|
||||||
|
public static void printAllArchives() {
|
||||||
|
String sql = "SELECT id, bundesland, name, link FROM archive ORDER BY bundesland, name";
|
||||||
|
|
||||||
|
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB_FILE);
|
||||||
|
Statement stmt = conn.createStatement();
|
||||||
|
ResultSet rs = stmt.executeQuery(sql)) {
|
||||||
|
|
||||||
|
while (rs.next()) {
|
||||||
|
int id = rs.getInt("id");
|
||||||
|
String bundesland = rs.getString("bundesland");
|
||||||
|
String name = rs.getString("name");
|
||||||
|
String link = rs.getString("link");
|
||||||
|
|
||||||
|
System.out.printf("ID: %-3d | %-20s | %-60s | %s%n", id, bundesland, name, link);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (SQLException e) {
|
||||||
|
System.err.println("❌ Fehler beim Lesen aus der Datenbank: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,77 @@
|
|||||||
|
package de.roko.genalogy.downloader.database;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
import java.sql.*;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class KirchenkreisExtractor {
|
||||||
|
|
||||||
|
private static final String DB = "archion.db";
|
||||||
|
|
||||||
|
public void readNwrite() throws Exception {
|
||||||
|
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||||
|
|
||||||
|
String sql = "SELECT id, name, link FROM archive ORDER BY id";
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql);
|
||||||
|
ResultSet rs = stmt.executeQuery()) {
|
||||||
|
|
||||||
|
while (rs.next()) {
|
||||||
|
int id = rs.getInt("id");
|
||||||
|
String name = rs.getString("name");
|
||||||
|
String link = rs.getString("link");
|
||||||
|
System.out.println("\n🔍 Lade Kirchenkreise f\u00fcr Archiv: " + name);
|
||||||
|
|
||||||
|
List<Kirchenkreis> kreise = extractKirchenkreise(id, link);
|
||||||
|
saveKirchenkreise(kreise, conn);
|
||||||
|
System.out.println("→ " + kreise.size() + " Kirchenkreise gespeichert.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<Kirchenkreis> extractKirchenkreise(int archivId, String url) {
|
||||||
|
List<Kirchenkreis> list = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
Document doc = Jsoup.connect(url).get();
|
||||||
|
Elements items = doc.select("#archive-nav li.item a");
|
||||||
|
|
||||||
|
for (Element link : items) {
|
||||||
|
String name = link.text().trim();
|
||||||
|
String href = link.absUrl("href").trim();
|
||||||
|
if (!name.isEmpty() && !href.isEmpty()) {
|
||||||
|
list.add(new Kirchenkreis(archivId, name, href));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage());
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void saveKirchenkreise(List<Kirchenkreis> list, Connection conn) throws SQLException {
|
||||||
|
String sql = """
|
||||||
|
INSERT INTO kreis (archiv_id, name, link)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
ON CONFLICT(archiv_id, name)
|
||||||
|
DO UPDATE SET link = excluded.link;
|
||||||
|
""";
|
||||||
|
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||||
|
for (Kirchenkreis k : list) {
|
||||||
|
stmt.setInt(1, k.archivId());
|
||||||
|
stmt.setString(2, k.name());
|
||||||
|
stmt.setString(3, k.link());
|
||||||
|
stmt.addBatch();
|
||||||
|
}
|
||||||
|
stmt.executeBatch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record Kirchenkreis(int archivId, String name, String link) {}
|
||||||
|
}
|
||||||
|
|
||||||
@ -0,0 +1,77 @@
|
|||||||
|
package de.roko.genalogy.downloader.database;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
import java.sql.*;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class KreisOrtExtractor {
|
||||||
|
|
||||||
|
private static final String DB = "archion.db";
|
||||||
|
|
||||||
|
public void readNwrite() throws Exception {
|
||||||
|
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||||
|
String sql = "SELECT id, archiv_id, name, link FROM kreis ORDER BY id";
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql);
|
||||||
|
ResultSet rs = stmt.executeQuery()) {
|
||||||
|
|
||||||
|
while (rs.next()) {
|
||||||
|
int kreisId = rs.getInt("id");
|
||||||
|
int archivId = rs.getInt("archiv_id");
|
||||||
|
String name = rs.getString("name");
|
||||||
|
String link = rs.getString("link");
|
||||||
|
|
||||||
|
System.out.println("\n🔍 Lade Orte für Kirchenkreis/Dekanat: " + name);
|
||||||
|
List<Ort> orte = extractOrte(kreisId, archivId, link);
|
||||||
|
saveOrte(orte, conn);
|
||||||
|
System.out.println("→ " + orte.size() + " Orte gespeichert.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Ort> extractOrte(int kreisId, int archivId, String url) {
|
||||||
|
List<Ort> list = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
Document doc = Jsoup.connect(url).get();
|
||||||
|
Elements items = doc.select(".list li a");
|
||||||
|
for (Element link : items) {
|
||||||
|
String name = link.text().trim();
|
||||||
|
String href = link.absUrl("href").trim();
|
||||||
|
if (!name.isEmpty() && !href.isEmpty()) {
|
||||||
|
list.add(new Ort(kreisId, archivId, name, href));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage());
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void saveOrte(List<Ort> list, Connection conn) throws SQLException {
|
||||||
|
String sql = """
|
||||||
|
INSERT INTO ort (kreis_id, archiv_id, name, link)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
ON CONFLICT(kreis_id, archiv_id, name)
|
||||||
|
DO UPDATE SET link = excluded.link;
|
||||||
|
""";
|
||||||
|
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||||
|
for (Ort o : list) {
|
||||||
|
stmt.setInt(1, o.kreisId());
|
||||||
|
stmt.setInt(2, o.archivId());
|
||||||
|
stmt.setString(3, o.name());
|
||||||
|
stmt.setString(4, o.link());
|
||||||
|
stmt.addBatch();
|
||||||
|
}
|
||||||
|
stmt.executeBatch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public record Ort(int kreisId, int archivId, String name, String link) {}
|
||||||
|
}
|
||||||
@ -0,0 +1,76 @@
|
|||||||
|
package de.roko.genalogy.downloader.database;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
import java.sql.*;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class OrtExtractor {
|
||||||
|
|
||||||
|
private static final String DB = "archion.db";
|
||||||
|
|
||||||
|
public void read() throws Exception {
|
||||||
|
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||||
|
|
||||||
|
String sql = "SELECT id, name, link FROM kreis WHERE status = 'verfügbar' ORDER BY id";
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql);
|
||||||
|
ResultSet rs = stmt.executeQuery()) {
|
||||||
|
|
||||||
|
while (rs.next()) {
|
||||||
|
int id = rs.getInt("id");
|
||||||
|
String name = rs.getString("name");
|
||||||
|
String link = rs.getString("link");
|
||||||
|
System.out.println("\n🔍 Lade Orte für Kreis/Dekanat: " + name);
|
||||||
|
|
||||||
|
List<Ort> orte = extractOrte(id, link);
|
||||||
|
saveOrte(orte, conn);
|
||||||
|
System.out.println("→ " + orte.size() + " Orte gespeichert.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<Ort> extractOrte(int kreisId, String url) {
|
||||||
|
List<Ort> list = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
Document doc = Jsoup.connect(url).get();
|
||||||
|
Elements items = doc.select(".list li a");
|
||||||
|
|
||||||
|
for (Element link : items) {
|
||||||
|
String name = link.text().trim();
|
||||||
|
String href = link.absUrl("href").trim();
|
||||||
|
if (!name.isEmpty() && !href.isEmpty()) {
|
||||||
|
list.add(new Ort(kreisId, name, href));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage());
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void saveOrte(List<Ort> list, Connection conn) throws SQLException {
|
||||||
|
String sql = """
|
||||||
|
INSERT INTO ort (kreis_id, name, link)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
ON CONFLICT(kreis_id, name)
|
||||||
|
DO UPDATE SET link = excluded.link;
|
||||||
|
""";
|
||||||
|
|
||||||
|
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||||
|
for (Ort o : list) {
|
||||||
|
stmt.setInt(1, o.kreisId());
|
||||||
|
stmt.setString(2, o.name());
|
||||||
|
stmt.setString(3, o.link());
|
||||||
|
stmt.addBatch();
|
||||||
|
}
|
||||||
|
stmt.executeBatch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record Ort(int kreisId, String name, String link) {}
|
||||||
|
}
|
||||||
@ -0,0 +1,127 @@
|
|||||||
|
|
||||||
|
package de.roko.genalogy.downloader.tools;
|
||||||
|
|
||||||
|
import org.openqa.selenium.*;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ArchionBatchDownloader {
|
||||||
|
|
||||||
|
private final WebDriver driver;
|
||||||
|
private final WebDriverWait wait;
|
||||||
|
private final BookTileDownloaderLite downloader;
|
||||||
|
|
||||||
|
public ArchionBatchDownloader(WebDriver driver) {
|
||||||
|
this.driver = driver;
|
||||||
|
this.wait = new WebDriverWait(driver, Duration.ofSeconds(10));
|
||||||
|
this.downloader = new BookTileDownloaderLite(driver);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void downloadAndStitchPages(String startUrl, String zielVerzeichnis) throws InterruptedException {
|
||||||
|
int seitenanzahl = ermittleSeitenAnzahl(startUrl);
|
||||||
|
downloadAndStitchPages(startUrl, zielVerzeichnis, seitenanzahl);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void downloadAndStitchPages(String startUrl, String zielVerzeichnis, int seitenAnzahl) {
|
||||||
|
try {
|
||||||
|
driver.get(startUrl);
|
||||||
|
Thread.sleep(2000); // initiale Ladezeit
|
||||||
|
|
||||||
|
for (int seite = 1; seite <= seitenAnzahl; seite++) {
|
||||||
|
File tileFolder = new File(zielVerzeichnis, String.format("seite-%03d-tiles", seite));
|
||||||
|
File outputFile = new File(zielVerzeichnis, String.format("seite-%03d.png", seite));
|
||||||
|
|
||||||
|
if (outputFile.exists()) {
|
||||||
|
System.out.printf("⏭️ Seite %d bereits vorhanden (PNG) – wird übersprungen%n", seite);
|
||||||
|
} else {
|
||||||
|
System.out.printf("📄 Verarbeite Seite %d...%n", seite);
|
||||||
|
|
||||||
|
// Download
|
||||||
|
downloader.downloadVisibleTiles(null, zielVerzeichnis, seite);
|
||||||
|
|
||||||
|
// Stitch
|
||||||
|
TileStitcherFixedGrid.stitch(tileFolder, outputFile);
|
||||||
|
|
||||||
|
// Tile-Ordner löschen
|
||||||
|
if (tileFolder.exists()) {
|
||||||
|
deleteDirectoryRecursive(tileFolder.toPath());
|
||||||
|
System.out.printf("🗑️ Tile-Ordner gelöscht: %s%n", tileFolder.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// letzte Seite erreicht?
|
||||||
|
if (seite == seitenAnzahl) {
|
||||||
|
System.out.println("✅ Alle gewünschten Seiten verarbeitet.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Weiterblättern
|
||||||
|
List<WebElement> nextBtns = driver.findElements(By.cssSelector("a.next-page"));
|
||||||
|
if (nextBtns.isEmpty()) {
|
||||||
|
System.out.println("🛑 Kein 'Nächste Seite'-Button gefunden – Abbruch.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
WebElement dropdown = driver.findElement(By.cssSelector("select.page-select"));
|
||||||
|
String currentValue = dropdown.getAttribute("value");
|
||||||
|
|
||||||
|
nextBtns.get(0).click();
|
||||||
|
|
||||||
|
new WebDriverWait(driver, Duration.ofSeconds(10)).until(d ->
|
||||||
|
!dropdown.getAttribute("value").equals(currentValue)
|
||||||
|
);
|
||||||
|
Thread.sleep(500);
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("✅ Batch-Abschluss: Alle Seiten wurden verarbeitet.");
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("❌ Fehler im Batch-Prozess: " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void deleteDirectoryRecursive(Path path) {
|
||||||
|
try {
|
||||||
|
Files.walk(path)
|
||||||
|
.sorted((a, b) -> b.compareTo(a)) // erst Dateien, dann Verzeichnisse
|
||||||
|
.forEach(p -> {
|
||||||
|
try {
|
||||||
|
Files.deleteIfExists(p);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.printf("⚠️ Fehler beim Löschen von %s: %s%n", p, e.getMessage());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.printf("⚠️ Fehler beim Löschen des Verzeichnisses %s: %s%n", path, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int ermittleSeitenAnzahl(String startUrl) throws InterruptedException {
|
||||||
|
driver.get(startUrl);
|
||||||
|
Thread.sleep(2000); // initiale Ladezeit
|
||||||
|
return ermittleSeitenAnzahl();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ermittelt die Seitenanzahl aus dem Dropdown-Menü im Archion-Viewer.
|
||||||
|
*/
|
||||||
|
public int ermittleSeitenAnzahl() {
|
||||||
|
try {
|
||||||
|
WebElement dropdown = driver.findElement(By.cssSelector("select.page-select"));
|
||||||
|
List<WebElement> options = dropdown.findElements(By.tagName("option"));
|
||||||
|
int anzahl = options.size();
|
||||||
|
System.out.printf("📄 Seitenanzahl erkannt: %d Seiten%n", anzahl);
|
||||||
|
return anzahl;
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("❌ Fehler beim Ermitteln der Seitenanzahl: " + e.getMessage());
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,166 @@
|
|||||||
|
package de.roko.genalogy.downloader.tools;
|
||||||
|
|
||||||
|
import org.openqa.selenium.*;
|
||||||
|
import org.openqa.selenium.interactions.Actions;
|
||||||
|
import org.openqa.selenium.support.ui.ExpectedConditions;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class BookTileDownloader {
|
||||||
|
|
||||||
|
private final WebDriver driver;
|
||||||
|
private final JavascriptExecutor js;
|
||||||
|
private final WebDriverWait wait;
|
||||||
|
private final Actions actions;
|
||||||
|
|
||||||
|
public BookTileDownloader(WebDriver driver) {
|
||||||
|
this.driver = driver;
|
||||||
|
this.js = (JavascriptExecutor) driver;
|
||||||
|
this.wait = new WebDriverWait(driver, Duration.ofSeconds(10));
|
||||||
|
this.actions = new Actions(driver);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int extractPixel(String style, String property) {
|
||||||
|
try {
|
||||||
|
for (String part : style.split(";")) {
|
||||||
|
part = part.trim();
|
||||||
|
if (part.startsWith(property)) {
|
||||||
|
return Integer.parseInt(part.replaceAll("[^0-9]", ""));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.printf("⚠️ Fehler beim Extrahieren von '%s' aus Style: %s%n", property, style);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void downloadTilesForPage(String viewerUrl, String zielVerzeichnis, int seitenIndex) {
|
||||||
|
try {
|
||||||
|
System.out.printf("🌐 Lade Viewer-Seite: %s%n", viewerUrl);
|
||||||
|
driver.get(viewerUrl);
|
||||||
|
wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector(".zoom-holder")));
|
||||||
|
|
||||||
|
// Zoom maximieren
|
||||||
|
for (int z = 0; z < 20; z++) {
|
||||||
|
try {
|
||||||
|
WebElement zoomState = driver.findElement(By.cssSelector("a.zoom-state .current"));
|
||||||
|
String style = zoomState.getAttribute("style");
|
||||||
|
if (style != null && style.contains("left: 120px")) break;
|
||||||
|
WebElement zoomInButton = driver.findElement(By.cssSelector("a.zoom-in"));
|
||||||
|
if (zoomInButton.isDisplayed() && zoomInButton.isEnabled()) {
|
||||||
|
zoomInButton.click();
|
||||||
|
Thread.sleep(400);
|
||||||
|
} else break;
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Fehler beim Zoom: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
WebElement zoomHolder = driver.findElement(By.cssSelector(".zoom-holder"));
|
||||||
|
File tileFolder = new File(zielVerzeichnis, String.format("seite-%03d-tiles", seitenIndex));
|
||||||
|
if (!tileFolder.exists()) Files.createDirectories(tileFolder.toPath());
|
||||||
|
|
||||||
|
int scrollWidth = 8192;
|
||||||
|
int scrollHeight = 8192;
|
||||||
|
try {
|
||||||
|
scrollWidth = ((Long) js.executeScript("return arguments[0].scrollWidth;", zoomHolder)).intValue();
|
||||||
|
scrollHeight = ((Long) js.executeScript("return arguments[0].scrollHeight;", zoomHolder)).intValue();
|
||||||
|
System.out.printf("📐 Scrollbereich erkannt: %d x %d px%n", scrollWidth, scrollHeight);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Fehler beim Auslesen der Scrollgröße: " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<String> seenSrcs = new HashSet<>();
|
||||||
|
int step = 256;
|
||||||
|
int tilesSaved = 0;
|
||||||
|
int tilesSkipped = 0;
|
||||||
|
int errors = 0;
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
|
||||||
|
for (int y = 0; y <= scrollHeight; y += step) {
|
||||||
|
for (int x = 0; x <= scrollWidth; x += step) {
|
||||||
|
try {
|
||||||
|
System.out.printf("🔄 Scrolle zu Position x=%d, y=%d...%n", x, y);
|
||||||
|
js.executeScript("arguments[0].scrollTo(arguments[1], arguments[2]);", zoomHolder, x, y);
|
||||||
|
actions.moveToElement(zoomHolder, x % 200 + 20, y % 200 + 20).perform();
|
||||||
|
Thread.sleep(800);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.printf("⚠️ Fehler beim Scrollen zu (%d,%d): %s%n", x, y, e.getMessage());
|
||||||
|
errors++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<WebElement> tiles;
|
||||||
|
try {
|
||||||
|
tiles = driver.findElements(By.cssSelector(".zoom-tiles img"));
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Fehler beim Finden der Tiles: " + e.getMessage());
|
||||||
|
errors++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.printf("🔍 %d Tiles gefunden an Position (%d,%d)%n", tiles.size(), x, y);
|
||||||
|
|
||||||
|
for (WebElement img : tiles) {
|
||||||
|
try {
|
||||||
|
String src = img.getAttribute("_src");
|
||||||
|
if (src == null) {
|
||||||
|
tilesSkipped++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (src.startsWith("/")) {
|
||||||
|
src = "https://www.archion.de" + src;
|
||||||
|
}
|
||||||
|
if (seenSrcs.contains(src)) {
|
||||||
|
tilesSkipped++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String style = img.getAttribute("style");
|
||||||
|
int left = extractPixel(style, "left");
|
||||||
|
int top = extractPixel(style, "top");
|
||||||
|
String filename = String.format("tile_%d_%d.png", left, top);
|
||||||
|
File tileFile = new File(tileFolder, filename);
|
||||||
|
|
||||||
|
try (InputStream in = new URL(src).openStream()) {
|
||||||
|
BufferedImage tile = ImageIO.read(in);
|
||||||
|
if (tile != null) {
|
||||||
|
ImageIO.write(tile, "png", tileFile);
|
||||||
|
tilesSaved++;
|
||||||
|
System.out.printf("💾 Gespeichert: %s%n", filename);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.printf("❌ Fehler beim Laden oder Schreiben von: %s%n", src);
|
||||||
|
errors++;
|
||||||
|
}
|
||||||
|
|
||||||
|
seenSrcs.add(src);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Fehler beim Verarbeiten eines Tiles: " + e.getMessage());
|
||||||
|
errors++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
long elapsed = (System.currentTimeMillis() - startTime) / 1000;
|
||||||
|
System.out.printf("📊 Gesamt: %d gespeichert, %d übersprungen, %d Fehler, %ds seit Start%n%n",
|
||||||
|
tilesSaved, tilesSkipped, errors, elapsed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.printf("✅ Alle Tiles für Seite %d gespeichert (%d Tiles) in %s%n",
|
||||||
|
seitenIndex, tilesSaved, tileFolder.getAbsolutePath());
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("❌ Schwerer Fehler: " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,122 @@
|
|||||||
|
package de.roko.genalogy.downloader.tools;
|
||||||
|
|
||||||
|
import org.openqa.selenium.*;
|
||||||
|
import org.openqa.selenium.interactions.Actions;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
import org.openqa.selenium.support.ui.ExpectedConditions;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class BookTileDownloaderLite {
|
||||||
|
|
||||||
|
private final WebDriver driver;
|
||||||
|
private final JavascriptExecutor js;
|
||||||
|
private final WebDriverWait wait;
|
||||||
|
private final Actions actions;
|
||||||
|
|
||||||
|
public BookTileDownloaderLite(WebDriver driver) {
|
||||||
|
this.driver = driver;
|
||||||
|
this.js = (JavascriptExecutor) driver;
|
||||||
|
this.wait = new WebDriverWait(driver, Duration.ofSeconds(10));
|
||||||
|
this.actions = new Actions(driver);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int extractPixel(String style, String property) {
|
||||||
|
try {
|
||||||
|
for (String part : style.split(";")) {
|
||||||
|
part = part.trim();
|
||||||
|
if (part.startsWith(property)) {
|
||||||
|
return Integer.parseInt(part.replaceAll("[^0-9]", ""));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.printf("⚠️ Fehler beim Extrahieren von '%s' aus Style: %s%n", property, style);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void downloadVisibleTiles(String viewerUrlOrNull, String zielVerzeichnis, int seitenIndex) {
|
||||||
|
try {
|
||||||
|
if (viewerUrlOrNull != null) {
|
||||||
|
driver.get(viewerUrlOrNull);
|
||||||
|
wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector(".zoom-holder")));
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zoom unabhängig vom Seitenaufruf
|
||||||
|
try {
|
||||||
|
for (int z = 0; z < 20; z++) {
|
||||||
|
WebElement zoomState = driver.findElement(By.cssSelector("a.zoom-state .current"));
|
||||||
|
String style = zoomState.getAttribute("style");
|
||||||
|
if (style != null && style.contains("left: 120px")) break;
|
||||||
|
WebElement zoomInButton = driver.findElement(By.cssSelector("a.zoom-in"));
|
||||||
|
if (zoomInButton.isDisplayed() && zoomInButton.isEnabled()) {
|
||||||
|
zoomInButton.click();
|
||||||
|
Thread.sleep(400);
|
||||||
|
} else break;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Fehler beim Zoom: " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
WebElement zoomHolder = driver.findElement(By.cssSelector(".zoom-holder"));
|
||||||
|
js.executeScript("arguments[0].scrollTo(0, 0);", zoomHolder);
|
||||||
|
actions.moveToElement(zoomHolder, 100, 100).perform();
|
||||||
|
Thread.sleep(800); // Lazy loading
|
||||||
|
|
||||||
|
File tileFolder = new File(zielVerzeichnis, String.format("seite-%03d-tiles", seitenIndex));
|
||||||
|
if (!tileFolder.exists()) Files.createDirectories(tileFolder.toPath());
|
||||||
|
|
||||||
|
Set<String> seenSrcs = new HashSet<>();
|
||||||
|
int tilesSaved = 0;
|
||||||
|
int errors = 0;
|
||||||
|
|
||||||
|
List<WebElement> tiles = driver.findElements(By.cssSelector(".zoom-tiles img"));
|
||||||
|
System.out.printf("🔍 Seite %d: %d sichtbare Tiles gefunden%n", seitenIndex, tiles.size());
|
||||||
|
|
||||||
|
for (WebElement img : tiles) {
|
||||||
|
try {
|
||||||
|
String src = img.getAttribute("_src");
|
||||||
|
if (src == null) continue;
|
||||||
|
if (src.startsWith("/")) src = "https://www.archion.de" + src;
|
||||||
|
if (!seenSrcs.add(src)) continue;
|
||||||
|
|
||||||
|
String style = img.getAttribute("style");
|
||||||
|
int left = extractPixel(style, "left");
|
||||||
|
int top = extractPixel(style, "top");
|
||||||
|
String filename = String.format("tile_%d_%d.png", left, top);
|
||||||
|
File tileFile = new File(tileFolder, filename);
|
||||||
|
|
||||||
|
try (InputStream in = new URL(src).openStream()) {
|
||||||
|
BufferedImage tile = ImageIO.read(in);
|
||||||
|
if (tile != null) {
|
||||||
|
ImageIO.write(tile, "png", tileFile);
|
||||||
|
tilesSaved++;
|
||||||
|
System.out.printf("💾 %s gespeichert%n", filename);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.printf("❌ Fehler beim Speichern von: %s%n", src);
|
||||||
|
errors++;
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Fehler beim Tile-Handling: " + e.getMessage());
|
||||||
|
errors++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.printf("✅ Seite %d: %d Tiles gespeichert, %d Fehler%n", seitenIndex, tilesSaved, errors);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("❌ Schwerer Fehler beim Download der Seite: " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,124 @@
|
|||||||
|
package de.roko.genalogy.downloader.tools;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
import java.awt.*;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class TileStitcherFixedGrid {
|
||||||
|
|
||||||
|
private static final Pattern TILE_PATTERN = Pattern.compile("tile_(\\d+)_(\\d+)\\.png");
|
||||||
|
|
||||||
|
public static void stitch(File inputFolder, File outputFile) {
|
||||||
|
File[] files = inputFolder.listFiles();
|
||||||
|
if (files == null || files.length == 0) {
|
||||||
|
System.err.println("❌ Keine Tiles im Ordner: " + inputFolder.getAbsolutePath());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Tile> tiles = new ArrayList<Tile>();
|
||||||
|
TreeSet<Integer> colSet = new TreeSet<Integer>();
|
||||||
|
TreeSet<Integer> rowSet = new TreeSet<Integer>();
|
||||||
|
|
||||||
|
for (File file : files) {
|
||||||
|
Matcher matcher = TILE_PATTERN.matcher(file.getName());
|
||||||
|
if (!matcher.matches()) continue;
|
||||||
|
|
||||||
|
int x = Integer.parseInt(matcher.group(1));
|
||||||
|
int y = Integer.parseInt(matcher.group(2));
|
||||||
|
|
||||||
|
tiles.add(new Tile(file, x, y));
|
||||||
|
colSet.add(x);
|
||||||
|
rowSet.add(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tiles.isEmpty()) {
|
||||||
|
System.err.println("❌ Keine gültigen Tiles gefunden.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Integer> colList = new ArrayList<Integer>(colSet);
|
||||||
|
List<Integer> rowList = new ArrayList<Integer>(rowSet);
|
||||||
|
|
||||||
|
Map<Integer, Integer> colIndexMap = new HashMap<Integer, Integer>();
|
||||||
|
Map<Integer, Integer> rowIndexMap = new HashMap<Integer, Integer>();
|
||||||
|
for (int i = 0; i < colList.size(); i++) {
|
||||||
|
colIndexMap.put(colList.get(i), i);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < rowList.size(); i++) {
|
||||||
|
rowIndexMap.put(rowList.get(i), i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Beispiel-Kachel lesen
|
||||||
|
BufferedImage sampleTile = null;
|
||||||
|
for (Tile tile : tiles) {
|
||||||
|
try {
|
||||||
|
sampleTile = ImageIO.read(tile.file);
|
||||||
|
if (sampleTile != null) break;
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sampleTile == null) {
|
||||||
|
System.err.println("❌ Keine lesbare Kachel gefunden.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int tileWidth = sampleTile.getWidth();
|
||||||
|
int tileHeight = sampleTile.getHeight();
|
||||||
|
int cols = colList.size();
|
||||||
|
int rows = rowList.size();
|
||||||
|
int fullWidth = cols * tileWidth;
|
||||||
|
int fullHeight = rows * tileHeight;
|
||||||
|
|
||||||
|
System.out.printf("📐 %d×%d Tiles à %dx%d → Bildgröße: %dx%d px%n",
|
||||||
|
cols, rows, tileWidth, tileHeight, fullWidth, fullHeight);
|
||||||
|
|
||||||
|
BufferedImage result = new BufferedImage(fullWidth, fullHeight, BufferedImage.TYPE_INT_RGB);
|
||||||
|
Graphics2D g = result.createGraphics();
|
||||||
|
|
||||||
|
int count = 0;
|
||||||
|
for (Tile tile : tiles) {
|
||||||
|
try {
|
||||||
|
BufferedImage img = ImageIO.read(tile.file);
|
||||||
|
if (img == null) continue;
|
||||||
|
|
||||||
|
int colIndex = colIndexMap.get(tile.x);
|
||||||
|
int rowIndex = rowIndexMap.get(tile.y);
|
||||||
|
int dx = colIndex * tileWidth;
|
||||||
|
int dy = rowIndex * tileHeight;
|
||||||
|
|
||||||
|
g.drawImage(img, dx, dy, null);
|
||||||
|
count++;
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.printf("⚠️ Fehler bei %s: %s%n", tile.file.getName(), e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g.dispose();
|
||||||
|
|
||||||
|
try {
|
||||||
|
ImageIO.write(result, "png", outputFile);
|
||||||
|
System.out.printf("✅ Gesamtbild gespeichert: %s (%d Tiles verwendet)%n",
|
||||||
|
outputFile.getAbsolutePath(), count);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("❌ Fehler beim Speichern: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hilfsklasse (Java 8-kompatibel, kein record)
|
||||||
|
private static class Tile {
|
||||||
|
final File file;
|
||||||
|
final int x;
|
||||||
|
final int y;
|
||||||
|
|
||||||
|
Tile(File file, int x, int y) {
|
||||||
|
this.file = file;
|
||||||
|
this.x = x;
|
||||||
|
this.y = y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
0
src/main/resources/archive.csv
Normal file
0
src/main/resources/archive.csv
Normal file
|
|
@ -0,0 +1,59 @@
|
|||||||
|
package de.roko.genalogy.downloader.util;
|
||||||
|
|
||||||
|
import de.roko.genalogy.downloader.archion.ArchionLoginHelper;
|
||||||
|
import de.roko.genalogy.downloader.tools.ArchionBatchDownloader;
|
||||||
|
import org.junit.jupiter.api.AfterEach;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
|
||||||
|
class ArchionBatchDownloaderTest {
|
||||||
|
|
||||||
|
public static WebDriver driver;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
static void setup() {
|
||||||
|
String username = "robatkoch";
|
||||||
|
String password = "PaLiNa2016$$";
|
||||||
|
driver = new ChromeDriver();
|
||||||
|
|
||||||
|
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
|
||||||
|
|
||||||
|
archionLoginHelper.login(username, password);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void downloadAndStitchPages() {
|
||||||
|
|
||||||
|
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10));
|
||||||
|
|
||||||
|
String url = "https://www.archion.de/de/viewer/churchRegister/287339?cHash=ca3bf31106a5081448b44947b5d5bd95";
|
||||||
|
String zielPfad = System.getProperty("user.home") + "/Dokumente/archion";
|
||||||
|
|
||||||
|
ArchionBatchDownloader batch = new ArchionBatchDownloader(driver);
|
||||||
|
batch.downloadAndStitchPages(
|
||||||
|
url, // Startseite im Viewer
|
||||||
|
zielPfad, // Zielverzeichnis
|
||||||
|
8 // Anzahl Seiten
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
void ermittleSeitenAnzahl() throws InterruptedException {
|
||||||
|
|
||||||
|
String startUrl = "https://www.archion.de/de/viewer/churchRegister/287339?cHash=ca3bf31106a5081448b44947b5d5bd95";
|
||||||
|
ArchionBatchDownloader batch = new ArchionBatchDownloader(driver);
|
||||||
|
System.out.println("Seitenanzahl: " + batch.ermittleSeitenAnzahl(startUrl));
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterEach
|
||||||
|
void tearDown() {
|
||||||
|
driver.quit();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,34 @@
|
|||||||
|
package de.roko.genalogy.downloader.util;
|
||||||
|
|
||||||
|
import de.roko.genalogy.downloader.archion.ArchionLoginHelper;
|
||||||
|
import de.roko.genalogy.downloader.tools.BookTileDownloaderLite;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
|
|
||||||
|
class BookTileDownloaderLiteTest {
|
||||||
|
public static WebDriver driver;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
static void setup() {
|
||||||
|
String username = "robatkoch";
|
||||||
|
String password = "PaLiNa2016$$";
|
||||||
|
driver = new ChromeDriver();
|
||||||
|
|
||||||
|
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
|
||||||
|
|
||||||
|
archionLoginHelper.login(username, password);
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
void downloadVisibleTiles() {
|
||||||
|
|
||||||
|
BookTileDownloaderLite downloader = new BookTileDownloaderLite(driver);
|
||||||
|
|
||||||
|
String url = "https://www.archion.de/de/viewer/churchRegister/287339?cHash=ca3bf31106a5081448b44947b5d5bd95";
|
||||||
|
String kachelOrdner = System.getProperty("user.home") + "/Dokumente/archion/seite-001";
|
||||||
|
String zielPfad = System.getProperty("user.home") + "/Dokumente/archion";
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,22 @@
|
|||||||
|
package de.roko.genalogy.downloader.util;
|
||||||
|
|
||||||
|
import de.roko.genalogy.downloader.tools.TileStitcherFixedGrid;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
class TileStitcherFixedGridTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void stitch() {
|
||||||
|
|
||||||
|
String kachelOrdner = System.getProperty("user.home") + "/Dokumente/archion/seite-001-tiles";
|
||||||
|
String zielPfad = System.getProperty("user.home") + "/Dokumente/archion";
|
||||||
|
|
||||||
|
|
||||||
|
File inputFolder = new File(kachelOrdner);
|
||||||
|
File outputImage = new File(System.getProperty("user.home") + "/Dokumente/archion/seite-001.png");
|
||||||
|
|
||||||
|
TileStitcherFixedGrid.stitch(inputFolder, outputImage);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user