folder downloader ready

This commit is contained in:
Robert Koch 2025-05-20 18:52:56 +02:00
parent d8165cd91c
commit a94597964a
20 changed files with 1190 additions and 61 deletions

13
.idea/easycode.ignore generated Normal file
View File

@ -0,0 +1,13 @@
.idea
.vscode
node_modules/
dist/
vendor/
cache/
.*/
*.min.*
*.test.*
*.spec.*
*.bundle.*
*.bundle-min.*
*.log

6
.idea/easycode/codebase-v2.xml generated Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="com.obiscr.chatgpt.settings.EasyCodeState">
<option name="projectFiles" value="$PROJECT_DIR$/src/main/java/de/roko/genalogy/downloader/archion/ArchionArchiveToCSV.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archion/ArchionLoginHelper.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/buch/Bild.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/buch/Buch.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/buch/Seite.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/Archiv.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/Kirchenkreis.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/Ort.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/ArchionDatabaseSetup.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/ArchiveInserter.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/ArchiveReader.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/ArchivStrukturParser.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/KirchenkreisExtractor.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/KreisOrtExtractor.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/OrtExtractor.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/ImageDownloader.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/Main.java;/Users/robertkoch/dev/arch/src/test/java/de/roko/genalogy/downloader/database/KreisOrtExtractorTest.java" />
</component>
</project>

2
.idea/misc.xml generated
View File

@ -8,7 +8,7 @@
</list> </list>
</option> </option>
</component> </component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_23" default="true" project-jdk-name="openjdk-23" project-jdk-type="JavaSDK"> <component name="ProjectRootManager" version="2" languageLevel="JDK_21_PREVIEW" project-jdk-name="21" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" /> <output url="file://$PROJECT_DIR$/out" />
</component> </component>
</project> </project>

124
.idea/uiDesigner.xml generated Normal file
View File

@ -0,0 +1,124 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Palette2">
<group name="Swing">
<item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
</item>
<item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
</item>
<item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
</item>
<item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
<default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
</item>
<item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
<initial-values>
<property name="text" value="Button" />
</initial-values>
</item>
<item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
<initial-values>
<property name="text" value="RadioButton" />
</initial-values>
</item>
<item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
<initial-values>
<property name="text" value="CheckBox" />
</initial-values>
</item>
<item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
<initial-values>
<property name="text" value="Label" />
</initial-values>
</item>
<item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
</item>
<item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
<preferred-size width="200" height="200" />
</default-constraints>
</item>
<item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
<preferred-size width="200" height="200" />
</default-constraints>
</item>
<item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
</item>
<item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
</item>
<item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
</item>
<item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
</item>
<item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
<preferred-size width="-1" height="20" />
</default-constraints>
</item>
<item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
</item>
<item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
</item>
</group>
</component>
</project>

Binary file not shown.

21
pom.xml
View File

@ -9,8 +9,8 @@
<version>1.0-SNAPSHOT</version> <version>1.0-SNAPSHOT</version>
<properties> <properties>
<maven.compiler.source>23</maven.compiler.source> <maven.compiler.source>21</maven.compiler.source>
<maven.compiler.target>23</maven.compiler.target> <maven.compiler.target>21</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties> </properties>
@ -18,7 +18,12 @@
<dependency> <dependency>
<groupId>org.seleniumhq.selenium</groupId> <groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId> <artifactId>selenium-java</artifactId>
<version>4.21.0</version> <version>4.15.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>31.1-jre</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>io.github.bonigarcia</groupId> <groupId>io.github.bonigarcia</groupId>
@ -41,6 +46,16 @@
<version>5.10.0</version> <version>5.10.0</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>net.lightbody.bmp</groupId>
<artifactId>browsermob-core</artifactId>
<version>2.1.5</version>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>4.15.0</version>
</dependency>
</dependencies> </dependencies>
</project> </project>

View File

@ -0,0 +1,32 @@
package de.roko.genalogy.downloader;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.*;
public class ImageDownloader {
public static void main(String[] args) {
String imageUrl = "https://www.archion.de/typo3conf/ext/archion_sitepackage/Resources/Public/Images/logo_quer_weiss.svg";
String targetDirectory = System.getProperty("user.home") + "/Pictures";
try {
downloadImage(imageUrl, targetDirectory);
} catch (IOException e) {
System.err.println("❌ Fehler beim Herunterladen: " + e.getMessage());
}
}
public static void downloadImage(String imageUrl, String targetDirPath) throws IOException {
URL url = new URL(imageUrl);
String fileName = Paths.get(url.getPath()).getFileName().toString(); // Datei extrahieren
Path targetPath = Paths.get(targetDirPath, fileName);
try (InputStream in = url.openStream()) {
Files.createDirectories(Paths.get(targetDirPath)); // Sicherstellen, dass Verzeichnis existiert
Files.copy(in, targetPath, StandardCopyOption.REPLACE_EXISTING);
System.out.println("✅ Bild gespeichert unter: " + targetPath);
}
}
}

View File

@ -0,0 +1,81 @@
package de.roko.genalogy.downloader;
import de.roko.genalogy.downloader.archion.ArchionLoginHelper;
import de.roko.genalogy.downloader.database.*;
import io.github.bonigarcia.wdm.WebDriverManager;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.io.File;
public class Main {
public static ChromeDriver driver;
public static void main(String[] args) throws Exception {
String username = "robatkoch";
String password = "PaLiNa2016$$";
String userHome = System.getProperty("user.home");
String downloadFolder = userHome + "/Pictures/archion";
new File(downloadFolder).mkdirs();
// ChromeDriver automatisch verwalten
WebDriverManager.chromedriver().setup();
ChromeOptions options = new ChromeOptions();
options.addArguments("--remote-allow-origins=*");
// WebDriver starten
driver = new ChromeDriver(options);
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
archionLoginHelper.login(username, password);
if(archionLoginHelper.isLoggedIn()) {
DatenbankReset.reset();
DatenbankDebugger datenbankDebuggerdebugger = new DatenbankDebugger();
//Abgleich
archiveAuslesen();
datenbankDebuggerdebugger.printTable("archive");
kirchenkreiseAuslesen();
datenbankDebuggerdebugger.printTable("kreis");
orteAuslesen();
System.out.println("break");
}
}
public static void archiveAuslesen() {
//Lese Archive
ArchiveInserter archiveInserter = new ArchiveInserter();
try {
archiveInserter.run(driver);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static void kirchenkreiseAuslesen() throws Exception {
KirchenkreisExtractor kirchenkreisExtractor = new KirchenkreisExtractor();
kirchenkreisExtractor.readNwrite();
}
public static void orteAuslesen() throws Exception {
KreisOrtExtractor kreisOrtExtractor = new KreisOrtExtractor();
kreisOrtExtractor.readNwrite();
}
}

View File

@ -36,21 +36,33 @@ public class ArchionDatabaseSetup {
stmt.execute(""" stmt.execute("""
CREATE TABLE IF NOT EXISTS ort ( CREATE TABLE IF NOT EXISTS ort (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT,
kreis_id INTEGER, -- NULL, wenn Ort direkt unter Archiv hängt kreis_id INTEGER,
archiv_id INTEGER NOT NULL, archiv_id INTEGER NOT NULL,
name TEXT NOT NULL, name TEXT NOT NULL,
link TEXT NOT NULL, link TEXT NOT NULL,
UNIQUE(kreis_id, archiv_id, name) -- Diese Kombination muss zum ON CONFLICT passen UNIQUE(archiv_id, name),
FOREIGN KEY (kreis_id) REFERENCES kreis(id),
FOREIGN KEY (archiv_id) REFERENCES archive(id)
); );
"""); """);
stmt.execute(""" stmt.execute("""
CREATE TABLE IF NOT EXISTS buch ( CREATE TABLE IF NOT EXISTS buch (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT, -- technische ID
ort_id INTEGER NOT NULL,
titel TEXT NOT NULL, ort_id INTEGER NOT NULL, -- Bezug zum Ort
zeitraum TEXT, titel TEXT NOT NULL, -- Titel des Buchs
FOREIGN KEY (ort_id) REFERENCES ort(id) typ TEXT, -- Symboltyp (z.B. taufbuch, beerdigungsbuch)
zeitraum TEXT, -- z.B. 17001750
enthaelt TEXT, -- Zusatzangaben, z.B. auch Konfirmationen
anmerkung TEXT, -- redaktionelle Hinweise
signatur TEXT, -- Signatur lokal
archivname TEXT, -- Archivname aus Detailseite
link TEXT NOT NULL, -- Detail-Link auf Archion
viewer_link TEXT, -- direkter Link zum Viewer
FOREIGN KEY (ort_id) REFERENCES ort(id),
UNIQUE (ort_id, titel) -- wichtig für conflict-handling
); );
"""); """);

View File

@ -44,7 +44,25 @@ public class ArchivStrukturParser {
String name = a.text().trim(); String name = a.text().trim();
String href = a.absUrl("href").trim(); String href = a.absUrl("href").trim();
if (name.toLowerCase().contains("kirchenkreis") || name.toLowerCase().contains("dekanat")) { if (name.toLowerCase().contains("kirchenkreis")
|| name.toLowerCase().contains("dekanat")
|| name.toLowerCase().contains("juden")
|| name.toLowerCase().contains("mennoiten")
|| name.toLowerCase().contains("militärseelsorge")
|| name.toLowerCase().contains("reformierte kirche")
|| name.toLowerCase().contains("auslandsgemeinde")
|| name.toLowerCase().contains("thüringen")
|| name.toLowerCase().contains("israeliten")
|| name.toLowerCase().contains("krankenhausseelsorge")
|| name.toLowerCase().contains("kreis")
|| name.toLowerCase().contains("reformierter kirchenkreis")
|| name.toLowerCase().contains("sonderbestände")
|| name.toLowerCase().contains("allgemeine hilfsmittel")
|| name.toLowerCase().contains("allgemeines ortschaftsverzeichnis")
|| name.toLowerCase().contains("kirchenbezirk")
|| name.toLowerCase().contains("hinterpommern")
|| name.toLowerCase().contains("militärkirchenbücher")
){
kreise.add(new Kirchenkreis(archivId, name, href)); kreise.add(new Kirchenkreis(archivId, name, href));
} else { } else {
orte.add(new Ort(null, archivId, name, href)); orte.add(new Ort(null, archivId, name, href));

View File

@ -0,0 +1,43 @@
package de.roko.genalogy.downloader.database;
import java.sql.*;
public class DatenbankDebugger {
private static final String DB = "archion.db";
public DatenbankDebugger() {
}
public void printTable(String tableName) throws Exception {
System.out.println("\n📋 Inhalt der Tabelle: " + tableName);
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB);
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("SELECT * FROM " + tableName)) {
ResultSetMetaData meta = rs.getMetaData();
int columnCount = meta.getColumnCount();
// Spaltenüberschriften ausgeben
for (int i = 1; i <= columnCount; i++) {
System.out.print(meta.getColumnName(i) + "\t");
}
System.out.println("\n" + "-".repeat(60));
// Zeilen ausgeben
while (rs.next()) {
for (int i = 1; i <= columnCount; i++) {
Object val = rs.getObject(i);
System.out.print((val != null ? val.toString() : "NULL") + "\t");
}
System.out.println();
}
} catch (SQLException e) {
System.err.println("⚠️ Fehler beim Zugriff auf Tabelle '" + tableName + "': " + e.getMessage());
}
}
}

View File

@ -0,0 +1,29 @@
package de.roko.genalogy.downloader.database;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
public class DatenbankReset {
private static final String DB = "archion.db";
public static void main(String[] args) throws Exception {
reset();
System.out.println("✅ Datenbank erfolgreich geleert.");
}
public static void reset() throws Exception {
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB);
Statement stmt = conn.createStatement()) {
// Reihenfolge beachten wegen FK-Beziehungen (falls vorhanden)
stmt.executeUpdate("DELETE FROM ort");
stmt.executeUpdate("DELETE FROM kreis");
stmt.executeUpdate("DELETE FROM archive");
// Optional: IDs zurücksetzen (nur nötig bei AUTOINCREMENT-Reset)
stmt.executeUpdate("DELETE FROM sqlite_sequence WHERE name IN ('archive', 'kreis', 'ort')");
}
}
}

View File

@ -0,0 +1,367 @@
package de.roko.genalogy.downloader.database;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
public class DokumentExtractor {
private static final String DB = "/Users/robertkoch/dev/arch/archion.db";
public void run() throws Exception {
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
String sql = "SELECT id, name, link FROM ort ORDER BY id";
try (PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet rs = stmt.executeQuery()) {
while (rs.next()) {
int ortId = rs.getInt("id");
String ortName = rs.getString("name");
String ortLink = rs.getString("link");
System.out.println("\n📘 Lese Bücher für Ort: " + ortName);
List<Buch> buecher = extractBuecher(ortId, ortLink);
saveBuecher(buecher, conn);
System.out.println("" + buecher.size() + " Bücher gespeichert.");
}
}
}
}
public void run(int id) throws Exception {
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
System.out.println("📂 Aktive DB-Datei: " + new java.io.File(DB).getAbsolutePath());
try (Statement stmt = conn.createStatement()) {
ResultSet rs = stmt.executeQuery("PRAGMA table_info(buch)");
System.out.println("📋 Spalten in 'buch':");
while (rs.next()) {
System.out.println(" - " + rs.getString("name"));
}
rs = stmt.executeQuery("PRAGMA index_list('buch')");
System.out.println("📊 Indizes auf 'buch':");
while (rs.next()) {
System.out.println(" - " + rs.getString("name") + ", unique: " + rs.getBoolean("unique"));
}
}
String sql = "SELECT id, name, link FROM ort WHERE id = " + id;
try (PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet rs = stmt.executeQuery()) {
while (rs.next()) {
int ortId = rs.getInt("id");
String ortName = rs.getString("name");
String ortLink = rs.getString("link");
System.out.println("\n📘 Lese Bücher für Ort: " + ortName);
System.out.println("Link: " + ortLink);
List<Buch> buecher = extractBuecher(ortId, ortLink);
saveBuecher(buecher, conn);
System.out.println("" + buecher.size() + " Bücher gespeichert.");
}
}
}
}
private List<Buch> extractBuecher(int ortId, String ortLink) {
List<Buch> list = new ArrayList<>();
try {
Document doc = Jsoup.connect(ortLink).get();
Elements buchEintraege = doc.select("#archive-nav li.item");
for (Element li : buchEintraege) {
Element a = li.selectFirst("a");
if (a == null) continue;
Element span = a.selectFirst("span");
if (span == null) { // 🔧 NEU: Schutz gegen fehlendes <span>
System.err.println("⚠️ Kein <span> in: " + a);
continue;
}
String titel = span.text().trim();
String link = a.absUrl("href").trim();
String typ = null;
Element img = a.selectFirst("img");
if (img != null) {
String src = img.attr("src");
int lastSlash = src.lastIndexOf('/');
int dot = src.lastIndexOf('.');
if (lastSlash != -1 && dot != -1 && dot > lastSlash) {
typ = src.substring(lastSlash + 1, dot);
}
}
Buch buch = extractBuchDetails(ortId, titel, link, typ);
list.add(buch);
}
} catch (Exception e) {
System.err.println("⚠️ Fehler beim Ort-Link " + ortLink + ": " + e.getMessage());
}
return list;
}
private Buch extractBuchDetails(int ortId, String titel, String link, String typ) {
String zeitraum = null, enthaelt = null, anmerkung = null, signatur = null, archivname = null, viewerLink = null;
try {
Document detailDoc = Jsoup.connect(link).get();
Element viewerEl = detailDoc.selectFirst("#steckbrief a[href*=\"/viewer/\"]");
if (viewerEl != null) {
viewerLink = viewerEl.absUrl("href");
}
Elements dl = detailDoc.select("#steckbrief dl");
for (Element dt : dl.select("dt")) {
String label = dt.text().trim().toLowerCase();
Element dd = dt.nextElementSibling();
if (dd == null) continue;
String value = dd.text().trim();
switch (label) {
case "zeitraum" -> zeitraum = value;
case "enthält auch" -> enthaelt = value;
case "anmerkung" -> anmerkung = value;
case "signatur lokal" -> signatur = value;
case "archiv" -> archivname = value;
}
}
} catch (Exception e) {
System.err.println("⚠️ Fehler beim Detail-Link " + link + ": " + e.getMessage());
}
return new Buch(ortId, titel, link, typ, zeitraum, enthaelt, anmerkung, signatur, archivname, viewerLink);
}
private void saveBuecher(List<Buch> list, Connection conn) throws SQLException {
if (list.isEmpty()) return;
List<String> vorhandeneTitel = getGespeicherteTitel(list.get(0).ortId(), conn); // 🔧 NEU
List<Buch> neu = list.stream()
.filter(b -> !vorhandeneTitel.contains(b.titel())) // 🔧 NEU: Duplikate überspringen
.toList();
System.out.println("🧮 " + (list.size() - neu.size()) + " Bücher existieren bereits und werden übersprungen."); // 🔧 NEU
System.out.println("" + neu.size() + " neue Bücher werden gespeichert."); // 🔧 NEU
if (neu.isEmpty()) return;
String sql = """
INSERT INTO buch (
ort_id, titel, typ, zeitraum, enthaelt,
anmerkung, signatur, archivname, link, viewer_link
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(ort_id, titel)
DO UPDATE SET
typ = excluded.typ,
zeitraum = excluded.zeitraum,
enthaelt = excluded.enthaelt,
anmerkung = excluded.anmerkung,
signatur = excluded.signatur,
archivname = excluded.archivname,
link = excluded.link,
viewer_link = excluded.viewer_link;
""";
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
for (Buch b : list) {
stmt.setInt(1, b.ortId());
stmt.setString(2, b.titel());
stmt.setString(3, b.typ());
stmt.setString(4, b.zeitraum());
stmt.setString(5, b.enthaelt());
stmt.setString(6, b.anmerkung());
stmt.setString(7, b.signatur());
stmt.setString(8, b.archivname());
stmt.setString(9, b.link());
stmt.setString(10, b.viewerLink());
stmt.addBatch();
}
stmt.executeBatch();
}
}
private List<String> getGespeicherteTitel(int ortId, Connection conn) throws SQLException {
List<String> vorhandeneTitel = new ArrayList<>();
String sql = "SELECT titel FROM buch WHERE ort_id = ?";
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
stmt.setInt(1, ortId);
try (ResultSet rs = stmt.executeQuery()) {
while (rs.next()) {
vorhandeneTitel.add(rs.getString("titel"));
}
}
}
return vorhandeneTitel;
}
public void runForArchiv(String archivName) throws Exception {
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
String sql = """
SELECT ort.id, ort.name, ort.link
FROM ort
JOIN kreis ON ort.kreis_id = kreis.id
JOIN archive ON kreis.archiv_id = archive.id
WHERE archive.name = ?
ORDER BY ort.id
""";
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
stmt.setString(1, archivName);
runForOrtResultSet(stmt.executeQuery(), conn, archivName);
}
}
}
public void runForArchiv(int archivId) throws Exception {
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
String sql = """
SELECT ort.id, ort.name, ort.link
FROM ort
JOIN kreis ON ort.kreis_id = kreis.id
WHERE kreis.archiv_id = ?
ORDER BY ort.id
""";
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
stmt.setInt(1, archivId);
runForOrtResultSet(stmt.executeQuery(), conn, "ID=" + archivId);
}
}
}
public void runForBundesland(String bundesland) throws Exception {
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
String sql = """
SELECT ort.id, ort.name, ort.link
FROM ort
JOIN kreis ON ort.kreis_id = kreis.id
JOIN archive ON kreis.archiv_id = archive.id
WHERE archive.bundesland = ?
ORDER BY ort.id
""";
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
stmt.setString(1, bundesland);
runForOrtResultSet(stmt.executeQuery(), conn, "Bundesland=" + bundesland);
}
}
}
// 🔧 Hilfsmethode zum Verarbeiten des ResultSets
private void runForOrtResultSet(ResultSet rs, Connection conn, String label) throws Exception {
int count = 0;
while (rs.next()) {
int ortId = rs.getInt("id");
String ortName = rs.getString("name");
String ortLink = rs.getString("link");
System.out.println("\n📘 Lese Bücher für Ort: " + ortName);
List<Buch> buecher = extractBuecher(ortId, ortLink);
saveBuecher(buecher, conn);
System.out.println("" + buecher.size() + " Bücher gespeichert.");
count++;
}
if (count == 0) {
System.out.println("⚠️ Keine Orte für Archiv '" + label + "' gefunden.");
}
}
private File buildBildOrdnerPfad(
String baseDir,
String bundesland,
String archivname,
String kreisname,
String ort,
String buchTitel
) {
// Hilfsfunktion für Dateisystem-sichere Namen
Function<String, String> safe = s ->
s == null ? "unbekannt" : s.replaceAll("[^\\wäöüÄÖÜß\\-\\s]", "").trim();
List<String> pfad = new ArrayList<>();
pfad.add(baseDir);
pfad.add(safe.apply(bundesland));
pfad.add(safe.apply(archivname));
if (kreisname != null && !kreisname.isBlank()) {
pfad.add(safe.apply(kreisname));
}
pfad.add(safe.apply(ort));
pfad.add(safe.apply(buchTitel));
File ordner = new File(String.join(File.separator, pfad));
if (!ordner.exists()) {
ordner.mkdirs();
}
return ordner;
}
public void erstelleBildOrdnerFuerAlleBuecher(String basisPfad) throws Exception {
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
String sql = """
SELECT
buch.id AS buch_id,
buch.titel AS buch_titel,
ort.name AS ort_name,
kreis.name AS kreis_name,
archive.name AS archiv_name,
archive.bundesland AS bundesland
FROM buch
JOIN ort ON buch.ort_id = ort.id
LEFT JOIN kreis ON ort.kreis_id = kreis.id
JOIN archive ON ort.archiv_id = archive.id
ORDER BY archive.bundesland, archive.name, kreis.name, ort.name, buch.titel
""";
try (PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet rs = stmt.executeQuery()) {
int count = 0;
while (rs.next()) {
String buchTitel = rs.getString("buch_titel");
String ort = rs.getString("ort_name");
String kreis = rs.getString("kreis_name");
String archiv = rs.getString("archiv_name");
String bundesland = rs.getString("bundesland");
File ordner = buildBildOrdnerPfad(basisPfad, bundesland, archiv, kreis, ort, buchTitel);
System.out.println("📁 Ordner: " + ordner.getAbsolutePath());
count++;
}
System.out.println("" + count + " Bildordner vorbereitet.");
}
}
}
public record Buch(
int ortId,
String titel,
String typ,
String zeitraum,
String enthaelt,
String anmerkung,
String signatur,
String archivname,
String link,
String viewerLink
) {}
}

View File

@ -15,7 +15,6 @@ public class KirchenkreisExtractor {
public void readNwrite() throws Exception { public void readNwrite() throws Exception {
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) { try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
String sql = "SELECT id, name, link FROM archive ORDER BY id"; String sql = "SELECT id, name, link FROM archive ORDER BY id";
try (PreparedStatement stmt = conn.prepareStatement(sql); try (PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet rs = stmt.executeQuery()) { ResultSet rs = stmt.executeQuery()) {
@ -24,33 +23,60 @@ public class KirchenkreisExtractor {
int id = rs.getInt("id"); int id = rs.getInt("id");
String name = rs.getString("name"); String name = rs.getString("name");
String link = rs.getString("link"); String link = rs.getString("link");
System.out.println("\n🔍 Lade Kirchenkreise f\u00fcr Archiv: " + name);
List<Kirchenkreis> kreise = extractKirchenkreise(id, link); System.out.println("\n🔍 Lade Struktur für Archiv: " + name);
saveKirchenkreise(kreise, conn);
System.out.println("" + kreise.size() + " Kirchenkreise gespeichert.");
}
}
}
}
public static List<Kirchenkreis> extractKirchenkreise(int archivId, String url) { Document doc = Jsoup.connect(link).get();
List<Kirchenkreis> list = new ArrayList<>();
try {
Document doc = Jsoup.connect(url).get();
Elements items = doc.select("#archive-nav li.item a"); Elements items = doc.select("#archive-nav li.item a");
for (Element link : items) { List<Kirchenkreis> kreise = new ArrayList<>();
String name = link.text().trim(); List<Ort> orte = new ArrayList<>();
String href = link.absUrl("href").trim();
if (!name.isEmpty() && !href.isEmpty()) { for (Element linkEl : items) {
list.add(new Kirchenkreis(archivId, name, href)); String eintragName = linkEl.text().trim();
String href = linkEl.absUrl("href").trim();
if (eintragName.isEmpty() || href.isEmpty()) continue;
if (isKirchenkreisebene(eintragName)) {
kreise.add(new Kirchenkreis(id, eintragName, href));
} else {
orte.add(new Ort(id, null, eintragName, href));
} }
} }
} catch (Exception e) {
System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage()); saveKirchenkreise(kreise, conn);
saveOrte(orte, conn);
System.out.println("" + kreise.size() + " Kirchenkreise gespeichert.");
System.out.println("" + orte.size() + " direkte Orte gespeichert.");
} }
return list; }
}
}
private boolean isKirchenkreisebene(String name) {
String n = name.toLowerCase();
return n.contains("kirchenkreis") ||
n.contains("dekanat") ||
n.contains("juden") ||
n.contains("mennoiten") ||
n.contains("militärseelsorge") ||
n.contains("reformierte kirche") ||
n.contains("auslandsgemeinde") ||
n.contains("thüringen") ||
n.contains("israeliten") ||
n.contains("krankenhausseelsorge") ||
n.contains("kreis") ||
n.contains("reformierter kirchenkreis") ||
n.contains("sonderbestände") ||
n.contains("allgemeine hilfsmittel") ||
n.contains("allgemeines ortschaftsverzeichnis") ||
n.contains("kirchenbezirk") ||
n.contains("hinterpommern") ||
n.contains("Kirchenbücher der Garnisonen und Militärgemeinden") ||
n.contains("Regimentskirchenbücher") ||
n.contains("Zivilregister") ||
n.contains("militärkirchenbücher");
} }
public static void saveKirchenkreise(List<Kirchenkreis> list, Connection conn) throws SQLException { public static void saveKirchenkreise(List<Kirchenkreis> list, Connection conn) throws SQLException {
@ -72,6 +98,30 @@ public class KirchenkreisExtractor {
} }
} }
public record Kirchenkreis(int archivId, String name, String link) {} public static void saveOrte(List<Ort> list, Connection conn) throws SQLException {
String sql = """
INSERT INTO ort (archiv_id, kreis_id, name, link)
VALUES (?, ?, ?, ?)
ON CONFLICT(archiv_id, name)
DO UPDATE SET link = excluded.link;
""";
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
for (Ort o : list) {
stmt.setInt(1, o.archivId());
if (o.kreisId() != null)
stmt.setInt(2, o.kreisId());
else
stmt.setNull(2, Types.INTEGER);
stmt.setString(3, o.name());
stmt.setString(4, o.link());
stmt.addBatch();
}
stmt.executeBatch();
}
} }
public record Kirchenkreis(int archivId, String name, String link) {}
public record Ort(int archivId, Integer kreisId, String name, String link) {}
}

View File

@ -15,6 +15,7 @@ public class KreisOrtExtractor {
public void readNwrite() throws Exception { public void readNwrite() throws Exception {
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) { try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
String sql = "SELECT id, archiv_id, name, link FROM kreis ORDER BY id"; String sql = "SELECT id, archiv_id, name, link FROM kreis ORDER BY id";
try (PreparedStatement stmt = conn.prepareStatement(sql); try (PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet rs = stmt.executeQuery()) { ResultSet rs = stmt.executeQuery()) {
@ -25,45 +26,47 @@ public class KreisOrtExtractor {
String name = rs.getString("name"); String name = rs.getString("name");
String link = rs.getString("link"); String link = rs.getString("link");
System.out.println("\n🔍 Lade Orte für Kirchenkreis/Dekanat: " + name); System.out.println("\n🔎 Lade Orte unter Kirchenkreis: " + name);
List<Ort> orte = extractOrte(kreisId, archivId, link);
List<Ort> orte = extractOrte(archivId, kreisId, link);
saveOrte(orte, conn); saveOrte(orte, conn);
System.out.println("" + orte.size() + " Orte gespeichert."); System.out.println("" + orte.size() + " Orte gespeichert.");
} }
} }
} }
} }
private static List<Ort> extractOrte(int kreisId, int archivId, String url) { private List<Ort> extractOrte(int archivId, int kreisId, String url) {
List<Ort> list = new ArrayList<>(); List<Ort> orte = new ArrayList<>();
try { try {
Document doc = Jsoup.connect(url).get(); Document doc = Jsoup.connect(url).get();
Elements items = doc.select(".list li a"); Elements items = doc.select("#archive-nav li.item a");
for (Element link : items) {
String name = link.text().trim(); for (Element linkEl : items) {
String href = link.absUrl("href").trim(); String name = linkEl.text().trim();
String href = linkEl.absUrl("href").trim();
if (!name.isEmpty() && !href.isEmpty()) { if (!name.isEmpty() && !href.isEmpty()) {
list.add(new Ort(kreisId, archivId, name, href)); orte.add(new Ort(archivId, kreisId, name, href));
} }
} }
} catch (Exception e) { } catch (Exception e) {
System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage()); System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage());
} }
return list; return orte;
} }
private static void saveOrte(List<Ort> list, Connection conn) throws SQLException { private void saveOrte(List<Ort> list, Connection conn) throws SQLException {
String sql = """ String sql = """
INSERT INTO ort (kreis_id, archiv_id, name, link) INSERT INTO ort (archiv_id, kreis_id, name, link)
VALUES (?, ?, ?, ?) VALUES (?, ?, ?, ?)
ON CONFLICT(kreis_id, archiv_id, name) ON CONFLICT(archiv_id, name)
DO UPDATE SET link = excluded.link; DO UPDATE SET link = excluded.link, kreis_id = excluded.kreis_id;
"""; """;
try (PreparedStatement stmt = conn.prepareStatement(sql)) { try (PreparedStatement stmt = conn.prepareStatement(sql)) {
for (Ort o : list) { for (Ort o : list) {
stmt.setInt(1, o.kreisId()); stmt.setInt(1, o.archivId());
stmt.setInt(2, o.archivId()); stmt.setInt(2, o.kreisId());
stmt.setString(3, o.name()); stmt.setString(3, o.name());
stmt.setString(4, o.link()); stmt.setString(4, o.link());
stmt.addBatch(); stmt.addBatch();
@ -72,6 +75,5 @@ public class KreisOrtExtractor {
} }
} }
public record Ort(int archivId, int kreisId, String name, String link) {}
public record Ort(int kreisId, int archivId, String name, String link) {}
} }

View File

@ -0,0 +1,71 @@
package de.roko.genalogy.downloader.viewer;
import org.openqa.selenium.*;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
public class ViewerBildParser {
public record BildInfo(int seite, String bildUrl) {}
public String resolveViewerRedirect(WebDriver driver, String viewerUrl) {
driver.get(viewerUrl);
try {
Thread.sleep(1000);
boolean keinZugang = driver.getPageSource().contains("Kein Zugang zum Viewer");
if (keinZugang) {
System.out.println("🔐 Kein Zugang Weiterleitung wird ausgelöst ...");
WebElement loginButton = new WebDriverWait(driver, Duration.ofSeconds(5))
.until(ExpectedConditions.elementToBeClickable(
By.cssSelector("a.btn.btn-primary[href*='/de/login']")));
loginButton.click();
new WebDriverWait(driver, Duration.ofSeconds(10))
.until(d -> d.getCurrentUrl().contains("/viewer/churchRegister/"));
String redirectedUrl = driver.getCurrentUrl();
System.out.println("✅ Weitergeleitet zum Viewer: " + redirectedUrl);
return redirectedUrl;
}
System.out.println("✅ Direktzugriff auf Viewer ohne Zwischenseite.");
return driver.getCurrentUrl();
} catch (Exception e) {
System.err.println("❌ Fehler bei der Weiterleitung: " + e.getMessage());
return viewerUrl;
}
}
public List<BildInfo> extractBildUrlsWithSelenium(WebDriver driver) {
List<BildInfo> bilder = new ArrayList<>();
try {
List<WebElement> seiten = driver.findElements(By.cssSelector(".dvpages .dvpage img[data-src]"));
int seiteNr = 1;
for (WebElement img : seiten) {
String relativeUrl = img.getAttribute("data-src");
if (relativeUrl == null || relativeUrl.isBlank()) continue;
String fullUrl = "https://www.archion.de" + relativeUrl.split("\\?")[0];
bilder.add(new BildInfo(seiteNr++, fullUrl));
}
System.out.println("" + bilder.size() + " Bild-URLs extrahiert.");
} catch (Exception e) {
System.err.println("❌ Fehler beim Extrahieren der Bilder: " + e.getMessage());
}
return bilder;
}
}

View File

@ -0,0 +1,17 @@
package de.roko.genalogy.downloader.database;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
class DatenbankDebuggerTest {
@Test
void printTable() throws Exception {
DatenbankDebugger dbdebugger = new DatenbankDebugger();
//dbdebugger.printTable("ort");
dbdebugger.printTable("buch");
}
}

View File

@ -0,0 +1,81 @@
package de.roko.genalogy.downloader.database;
import de.roko.genalogy.downloader.archion.ArchionLoginHelper;
import org.junit.jupiter.api.*;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.io.File;
import static org.junit.jupiter.api.Assertions.*;
class DokumentExtractorTest {
public String username = "robatkoch";
public String password = "PaLiNa2016$$";
public ChromeDriver driver;
@BeforeAll
static void setUp() {
String userHome = System.getProperty("user.home");
String downloadFolder = userHome + "/Pictures/archion";
new File(downloadFolder).mkdirs();
}
@BeforeEach void login() throws InterruptedException {
ChromeOptions options = new ChromeOptions();
options.addArguments("--remote-allow-origins=*");
// WebDriver starten
driver = new ChromeDriver(options);
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
archionLoginHelper.login(username, password);
//assertFalse(archionLoginHelper.isLoggedIn(), "Login nicht möglich");
}
@Test
void run() throws Exception {
try {
DokumentExtractor dokumentExtractor = new DokumentExtractor();
dokumentExtractor.run();
} finally {
driver.quit();
}
}
@Test
void runForArchiv() throws Exception {
//new DokumentExtractor().runForArchiv("Landeskirchenarchiv der Evangelischen Kirche Mitteldeutschland/Eisenach");
//new DokumentExtractor().runForArchiv("Landeskirchliches Archiv der Evangelisch-Lutherischen Kirche in Norddeutschland");
//new DokumentExtractor().runForArchiv("Archiv der Evangelischen Landeskirche Anhalts");
//new DokumentExtractor().runForArchiv("Landeskirchenarchiv der Evangelischen Kirche Mitteldeutschland/Magdeburg");
new DokumentExtractor().runForArchiv("Landeskirchliches Archiv der Evangelisch-Lutherischen Landeskirche Sachsens");
new DokumentExtractor().erstelleBildOrdnerFuerAlleBuecher("/Users/robertkoch/archion_bilder");
}
@Test
void erstelleBildOrdnerFuerAlleBuecher() throws Exception {
new DokumentExtractor().erstelleBildOrdnerFuerAlleBuecher("/Users/robertkoch/archion_bilder");
}
@Test
void runForBundesland() throws Exception {
new DokumentExtractor().runForBundesland("Thüringen");
}
@AfterEach void quit() {
driver.quit();
}
}

View File

@ -0,0 +1,56 @@
package de.roko.genalogy.downloader.database;
import de.roko.genalogy.downloader.archion.ArchionLoginHelper;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.io.File;
import static org.junit.jupiter.api.Assertions.*;
class KreisOrtExtractorTest {
public String username = "robatkoch";
public String password = "PaLiNa2016$$";
public ChromeDriver driver;
@BeforeAll
static void setUp() {
String userHome = System.getProperty("user.home");
String downloadFolder = userHome + "/Pictures/archion";
new File(downloadFolder).mkdirs();
}
@BeforeEach void login() {
ChromeOptions options = new ChromeOptions();
options.addArguments("--remote-allow-origins=*");
// WebDriver starten
driver = new ChromeDriver(options);
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
archionLoginHelper.login(username, password);
assertFalse(archionLoginHelper.isLoggedIn(), "Login nicht möglich");
}
@Test
void read() throws Exception {
KreisOrtExtractor kreisOrtExtractor = new KreisOrtExtractor();
//kreisOrtExtractor.read();
}
}

View File

@ -0,0 +1,112 @@
package de.roko.genalogy.downloader.viewer;
import de.roko.genalogy.downloader.archion.ArchionLoginHelper;
import net.lightbody.bmp.BrowserMobProxy;
import net.lightbody.bmp.BrowserMobProxyServer;
import net.lightbody.bmp.client.ClientUtil;
import net.lightbody.bmp.core.har.HarEntry;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.openqa.selenium.*;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
class ViewerBildParserTest {
public String username = "robatkoch";
public String password = "PaLiNa2016$$";
public ChromeDriver driver;
@BeforeAll
static void setUp() {
String userHome = System.getProperty("user.home");
String downloadFolder = userHome + "/Pictures/archion";
new File(downloadFolder).mkdirs();
}
@BeforeEach
void login() throws InterruptedException {
/*
ChromeOptions options = new ChromeOptions();
options.addArguments("--remote-allow-origins=*");
// WebDriver starten
driver = new ChromeDriver(options);
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
archionLoginHelper.login(username, password);
//assertFalse(archionLoginHelper.isLoggedIn(), "Login nicht möglich");
*/
}
@Test
void parse() throws InterruptedException {
String viewerUrl = "https://www.archion.de/de/viewer/churchRegister/287040?cHash=c61b3fc9f95353f6ba795fe0b90b3288";
// Proxy starten
BrowserMobProxy proxy = new BrowserMobProxyServer();
proxy.start(0); // auf freiem Port starten
// Proxy in Selenium einbinden
Proxy seleniumProxy = ClientUtil.createSeleniumProxy(proxy);
ChromeOptions options = new ChromeOptions();
options.setProxy(seleniumProxy);
options.addArguments("--start-maximized");
WebDriver driver = new ChromeDriver(options);
// HAR-Protokoll starten
proxy.newHar("archion-view");
// Viewer-URL aufrufen (muss eingeloggt sein!)
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
archionLoginHelper.login(username, password);
driver.get(viewerUrl);
// Warten und Seiten durchblättern (optional)
for (int i = 0; i < 10; i++) {
try {
Thread.sleep(1500);
WebElement next = driver.findElement(By.cssSelector("a.dvnavnext"));
if (next != null && next.isDisplayed()) {
next.click();
} else {
break;
}
} catch (Exception e) {
break; // Ende erreicht oder Fehler
}
}
// Alle Bild-URLs auslesen
List<String> imageUrls = new ArrayList<>();
for (HarEntry entry : proxy.getHar().getLog().getEntries()) {
String url = entry.getRequest().getUrl();
if (url.contains("/si/") && url.endsWith("/image.jpg")) {
imageUrls.add(url);
}
}
// Ausgabe
System.out.println("\n🔍 Gefundene Bild-URLs:");
for (int i = 0; i < imageUrls.size(); i++) {
System.out.printf("Bild %03d: %s%n", i + 1, imageUrls.get(i));
}
// Aufräumen
driver.quit();
proxy.stop();
}
}