folder downloader ready
This commit is contained in:
parent
d8165cd91c
commit
a94597964a
13
.idea/easycode.ignore
generated
Normal file
13
.idea/easycode.ignore
generated
Normal file
@ -0,0 +1,13 @@
|
||||
.idea
|
||||
.vscode
|
||||
node_modules/
|
||||
dist/
|
||||
vendor/
|
||||
cache/
|
||||
.*/
|
||||
*.min.*
|
||||
*.test.*
|
||||
*.spec.*
|
||||
*.bundle.*
|
||||
*.bundle-min.*
|
||||
*.log
|
||||
6
.idea/easycode/codebase-v2.xml
generated
Normal file
6
.idea/easycode/codebase-v2.xml
generated
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="com.obiscr.chatgpt.settings.EasyCodeState">
|
||||
<option name="projectFiles" value="$PROJECT_DIR$/src/main/java/de/roko/genalogy/downloader/archion/ArchionArchiveToCSV.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archion/ArchionLoginHelper.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/buch/Bild.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/buch/Buch.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/buch/Seite.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/Archiv.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/Kirchenkreis.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/archiv/Ort.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/ArchionDatabaseSetup.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/ArchiveInserter.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/ArchiveReader.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/ArchivStrukturParser.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/KirchenkreisExtractor.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/KreisOrtExtractor.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/database/OrtExtractor.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/ImageDownloader.java;/Users/robertkoch/dev/arch/src/main/java/de/roko/genalogy/downloader/Main.java;/Users/robertkoch/dev/arch/src/test/java/de/roko/genalogy/downloader/database/KreisOrtExtractorTest.java" />
|
||||
</component>
|
||||
</project>
|
||||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
@ -8,7 +8,7 @@
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_23" default="true" project-jdk-name="openjdk-23" project-jdk-type="JavaSDK">
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_21_PREVIEW" project-jdk-name="21" project-jdk-type="JavaSDK">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
</project>
|
||||
124
.idea/uiDesigner.xml
generated
Normal file
124
.idea/uiDesigner.xml
generated
Normal file
@ -0,0 +1,124 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Palette2">
|
||||
<group name="Swing">
|
||||
<item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
|
||||
</item>
|
||||
<item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
|
||||
</item>
|
||||
<item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
|
||||
</item>
|
||||
<item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
|
||||
<default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
|
||||
</item>
|
||||
<item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
|
||||
<initial-values>
|
||||
<property name="text" value="Button" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
||||
<initial-values>
|
||||
<property name="text" value="RadioButton" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
||||
<initial-values>
|
||||
<property name="text" value="CheckBox" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
|
||||
<initial-values>
|
||||
<property name="text" value="Label" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||
<preferred-size width="150" height="-1" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||
<preferred-size width="150" height="-1" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||
<preferred-size width="150" height="-1" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
||||
<preferred-size width="200" height="200" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
||||
<preferred-size width="200" height="200" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
|
||||
</item>
|
||||
<item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
|
||||
<preferred-size width="-1" height="20" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
|
||||
</item>
|
||||
</group>
|
||||
</component>
|
||||
</project>
|
||||
BIN
archion.db
BIN
archion.db
Binary file not shown.
21
pom.xml
21
pom.xml
@ -9,8 +9,8 @@
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>23</maven.compiler.source>
|
||||
<maven.compiler.target>23</maven.compiler.target>
|
||||
<maven.compiler.source>21</maven.compiler.source>
|
||||
<maven.compiler.target>21</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
@ -18,7 +18,12 @@
|
||||
<dependency>
|
||||
<groupId>org.seleniumhq.selenium</groupId>
|
||||
<artifactId>selenium-java</artifactId>
|
||||
<version>4.21.0</version>
|
||||
<version>4.15.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>31.1-jre</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.github.bonigarcia</groupId>
|
||||
@ -41,6 +46,16 @@
|
||||
<version>5.10.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.lightbody.bmp</groupId>
|
||||
<artifactId>browsermob-core</artifactId>
|
||||
<version>2.1.5</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.seleniumhq.selenium</groupId>
|
||||
<artifactId>selenium-java</artifactId>
|
||||
<version>4.15.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
@ -0,0 +1,32 @@
|
||||
package de.roko.genalogy.downloader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.nio.file.*;
|
||||
|
||||
public class ImageDownloader {
|
||||
|
||||
public static void main(String[] args) {
|
||||
String imageUrl = "https://www.archion.de/typo3conf/ext/archion_sitepackage/Resources/Public/Images/logo_quer_weiss.svg";
|
||||
String targetDirectory = System.getProperty("user.home") + "/Pictures";
|
||||
|
||||
try {
|
||||
downloadImage(imageUrl, targetDirectory);
|
||||
} catch (IOException e) {
|
||||
System.err.println("❌ Fehler beim Herunterladen: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public static void downloadImage(String imageUrl, String targetDirPath) throws IOException {
|
||||
URL url = new URL(imageUrl);
|
||||
String fileName = Paths.get(url.getPath()).getFileName().toString(); // Datei extrahieren
|
||||
Path targetPath = Paths.get(targetDirPath, fileName);
|
||||
|
||||
try (InputStream in = url.openStream()) {
|
||||
Files.createDirectories(Paths.get(targetDirPath)); // Sicherstellen, dass Verzeichnis existiert
|
||||
Files.copy(in, targetPath, StandardCopyOption.REPLACE_EXISTING);
|
||||
System.out.println("✅ Bild gespeichert unter: " + targetPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
81
src/main/java/de/roko/genalogy/downloader/Main.java
Normal file
81
src/main/java/de/roko/genalogy/downloader/Main.java
Normal file
@ -0,0 +1,81 @@
|
||||
package de.roko.genalogy.downloader;
|
||||
|
||||
import de.roko.genalogy.downloader.archion.ArchionLoginHelper;
|
||||
import de.roko.genalogy.downloader.database.*;
|
||||
import io.github.bonigarcia.wdm.WebDriverManager;
|
||||
import org.openqa.selenium.chrome.ChromeDriver;
|
||||
import org.openqa.selenium.chrome.ChromeOptions;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
|
||||
|
||||
public class Main {
|
||||
|
||||
public static ChromeDriver driver;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
String username = "robatkoch";
|
||||
String password = "PaLiNa2016$$";
|
||||
|
||||
String userHome = System.getProperty("user.home");
|
||||
String downloadFolder = userHome + "/Pictures/archion";
|
||||
new File(downloadFolder).mkdirs();
|
||||
|
||||
// ChromeDriver automatisch verwalten
|
||||
WebDriverManager.chromedriver().setup();
|
||||
|
||||
|
||||
ChromeOptions options = new ChromeOptions();
|
||||
options.addArguments("--remote-allow-origins=*");
|
||||
|
||||
// WebDriver starten
|
||||
driver = new ChromeDriver(options);
|
||||
|
||||
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
|
||||
|
||||
archionLoginHelper.login(username, password);
|
||||
|
||||
if(archionLoginHelper.isLoggedIn()) {
|
||||
|
||||
|
||||
DatenbankReset.reset();
|
||||
DatenbankDebugger datenbankDebuggerdebugger = new DatenbankDebugger();
|
||||
|
||||
//Abgleich
|
||||
archiveAuslesen();
|
||||
datenbankDebuggerdebugger.printTable("archive");
|
||||
|
||||
kirchenkreiseAuslesen();
|
||||
datenbankDebuggerdebugger.printTable("kreis");
|
||||
|
||||
orteAuslesen();
|
||||
|
||||
|
||||
|
||||
|
||||
System.out.println("break");
|
||||
}
|
||||
}
|
||||
|
||||
public static void archiveAuslesen() {
|
||||
//Lese Archive
|
||||
ArchiveInserter archiveInserter = new ArchiveInserter();
|
||||
try {
|
||||
archiveInserter.run(driver);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static void kirchenkreiseAuslesen() throws Exception {
|
||||
KirchenkreisExtractor kirchenkreisExtractor = new KirchenkreisExtractor();
|
||||
kirchenkreisExtractor.readNwrite();
|
||||
}
|
||||
|
||||
public static void orteAuslesen() throws Exception {
|
||||
KreisOrtExtractor kreisOrtExtractor = new KreisOrtExtractor();
|
||||
kreisOrtExtractor.readNwrite();
|
||||
}
|
||||
}
|
||||
@ -35,23 +35,35 @@ public class ArchionDatabaseSetup {
|
||||
|
||||
stmt.execute("""
|
||||
CREATE TABLE IF NOT EXISTS ort (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
kreis_id INTEGER, -- NULL, wenn Ort direkt unter Archiv hängt
|
||||
archiv_id INTEGER NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
link TEXT NOT NULL,
|
||||
UNIQUE(kreis_id, archiv_id, name) -- ← Diese Kombination muss zum ON CONFLICT passen
|
||||
);
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
kreis_id INTEGER,
|
||||
archiv_id INTEGER NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
link TEXT NOT NULL,
|
||||
UNIQUE(archiv_id, name),
|
||||
FOREIGN KEY (kreis_id) REFERENCES kreis(id),
|
||||
FOREIGN KEY (archiv_id) REFERENCES archive(id)
|
||||
);
|
||||
""");
|
||||
|
||||
stmt.execute("""
|
||||
CREATE TABLE IF NOT EXISTS buch (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
ort_id INTEGER NOT NULL,
|
||||
titel TEXT NOT NULL,
|
||||
zeitraum TEXT,
|
||||
FOREIGN KEY (ort_id) REFERENCES ort(id)
|
||||
);
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT, -- technische ID
|
||||
|
||||
ort_id INTEGER NOT NULL, -- Bezug zum Ort
|
||||
titel TEXT NOT NULL, -- Titel des Buchs
|
||||
typ TEXT, -- Symboltyp (z. B. taufbuch, beerdigungsbuch)
|
||||
zeitraum TEXT, -- z. B. 1700–1750
|
||||
enthaelt TEXT, -- Zusatzangaben, z. B. auch Konfirmationen
|
||||
anmerkung TEXT, -- redaktionelle Hinweise
|
||||
signatur TEXT, -- Signatur lokal
|
||||
archivname TEXT, -- Archivname aus Detailseite
|
||||
link TEXT NOT NULL, -- Detail-Link auf Archion
|
||||
viewer_link TEXT, -- direkter Link zum Viewer
|
||||
|
||||
FOREIGN KEY (ort_id) REFERENCES ort(id),
|
||||
UNIQUE (ort_id, titel) -- wichtig für conflict-handling
|
||||
);
|
||||
""");
|
||||
|
||||
stmt.execute("""
|
||||
|
||||
@ -44,7 +44,25 @@ public class ArchivStrukturParser {
|
||||
String name = a.text().trim();
|
||||
String href = a.absUrl("href").trim();
|
||||
|
||||
if (name.toLowerCase().contains("kirchenkreis") || name.toLowerCase().contains("dekanat")) {
|
||||
if (name.toLowerCase().contains("kirchenkreis")
|
||||
|| name.toLowerCase().contains("dekanat")
|
||||
|| name.toLowerCase().contains("juden")
|
||||
|| name.toLowerCase().contains("mennoiten")
|
||||
|| name.toLowerCase().contains("militärseelsorge")
|
||||
|| name.toLowerCase().contains("reformierte kirche")
|
||||
|| name.toLowerCase().contains("auslandsgemeinde")
|
||||
|| name.toLowerCase().contains("thüringen")
|
||||
|| name.toLowerCase().contains("israeliten")
|
||||
|| name.toLowerCase().contains("krankenhausseelsorge")
|
||||
|| name.toLowerCase().contains("kreis")
|
||||
|| name.toLowerCase().contains("reformierter kirchenkreis")
|
||||
|| name.toLowerCase().contains("sonderbestände")
|
||||
|| name.toLowerCase().contains("allgemeine hilfsmittel")
|
||||
|| name.toLowerCase().contains("allgemeines ortschaftsverzeichnis")
|
||||
|| name.toLowerCase().contains("kirchenbezirk")
|
||||
|| name.toLowerCase().contains("hinterpommern")
|
||||
|| name.toLowerCase().contains("militärkirchenbücher")
|
||||
){
|
||||
kreise.add(new Kirchenkreis(archivId, name, href));
|
||||
} else {
|
||||
orte.add(new Ort(null, archivId, name, href));
|
||||
|
||||
@ -0,0 +1,43 @@
|
||||
package de.roko.genalogy.downloader.database;
|
||||
|
||||
import java.sql.*;
|
||||
|
||||
public class DatenbankDebugger {
|
||||
|
||||
private static final String DB = "archion.db";
|
||||
|
||||
|
||||
public DatenbankDebugger() {
|
||||
|
||||
}
|
||||
|
||||
public void printTable(String tableName) throws Exception {
|
||||
System.out.println("\n📋 Inhalt der Tabelle: " + tableName);
|
||||
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB);
|
||||
Statement stmt = conn.createStatement();
|
||||
ResultSet rs = stmt.executeQuery("SELECT * FROM " + tableName)) {
|
||||
|
||||
ResultSetMetaData meta = rs.getMetaData();
|
||||
int columnCount = meta.getColumnCount();
|
||||
|
||||
// Spaltenüberschriften ausgeben
|
||||
for (int i = 1; i <= columnCount; i++) {
|
||||
System.out.print(meta.getColumnName(i) + "\t");
|
||||
}
|
||||
System.out.println("\n" + "-".repeat(60));
|
||||
|
||||
// Zeilen ausgeben
|
||||
while (rs.next()) {
|
||||
for (int i = 1; i <= columnCount; i++) {
|
||||
Object val = rs.getObject(i);
|
||||
System.out.print((val != null ? val.toString() : "NULL") + "\t");
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
} catch (SQLException e) {
|
||||
System.err.println("⚠️ Fehler beim Zugriff auf Tabelle '" + tableName + "': " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,29 @@
|
||||
package de.roko.genalogy.downloader.database;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.Statement;
|
||||
|
||||
public class DatenbankReset {
|
||||
|
||||
private static final String DB = "archion.db";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
reset();
|
||||
System.out.println("✅ Datenbank erfolgreich geleert.");
|
||||
}
|
||||
|
||||
public static void reset() throws Exception {
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB);
|
||||
Statement stmt = conn.createStatement()) {
|
||||
|
||||
// Reihenfolge beachten wegen FK-Beziehungen (falls vorhanden)
|
||||
stmt.executeUpdate("DELETE FROM ort");
|
||||
stmt.executeUpdate("DELETE FROM kreis");
|
||||
stmt.executeUpdate("DELETE FROM archive");
|
||||
|
||||
// Optional: IDs zurücksetzen (nur nötig bei AUTOINCREMENT-Reset)
|
||||
stmt.executeUpdate("DELETE FROM sqlite_sequence WHERE name IN ('archive', 'kreis', 'ort')");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,367 @@
|
||||
package de.roko.genalogy.downloader.database;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.File;
|
||||
import java.sql.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
|
||||
public class DokumentExtractor {
|
||||
|
||||
private static final String DB = "/Users/robertkoch/dev/arch/archion.db";
|
||||
|
||||
public void run() throws Exception {
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||
String sql = "SELECT id, name, link FROM ort ORDER BY id";
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql);
|
||||
ResultSet rs = stmt.executeQuery()) {
|
||||
|
||||
while (rs.next()) {
|
||||
int ortId = rs.getInt("id");
|
||||
String ortName = rs.getString("name");
|
||||
String ortLink = rs.getString("link");
|
||||
|
||||
System.out.println("\n📘 Lese Bücher für Ort: " + ortName);
|
||||
List<Buch> buecher = extractBuecher(ortId, ortLink);
|
||||
saveBuecher(buecher, conn);
|
||||
System.out.println("→ " + buecher.size() + " Bücher gespeichert.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void run(int id) throws Exception {
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||
System.out.println("📂 Aktive DB-Datei: " + new java.io.File(DB).getAbsolutePath());
|
||||
|
||||
try (Statement stmt = conn.createStatement()) {
|
||||
ResultSet rs = stmt.executeQuery("PRAGMA table_info(buch)");
|
||||
System.out.println("📋 Spalten in 'buch':");
|
||||
while (rs.next()) {
|
||||
System.out.println(" - " + rs.getString("name"));
|
||||
}
|
||||
|
||||
rs = stmt.executeQuery("PRAGMA index_list('buch')");
|
||||
System.out.println("📊 Indizes auf 'buch':");
|
||||
while (rs.next()) {
|
||||
System.out.println(" - " + rs.getString("name") + ", unique: " + rs.getBoolean("unique"));
|
||||
}
|
||||
}
|
||||
|
||||
String sql = "SELECT id, name, link FROM ort WHERE id = " + id;
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql);
|
||||
ResultSet rs = stmt.executeQuery()) {
|
||||
|
||||
while (rs.next()) {
|
||||
int ortId = rs.getInt("id");
|
||||
String ortName = rs.getString("name");
|
||||
String ortLink = rs.getString("link");
|
||||
|
||||
System.out.println("\n📘 Lese Bücher für Ort: " + ortName);
|
||||
System.out.println("Link: " + ortLink);
|
||||
List<Buch> buecher = extractBuecher(ortId, ortLink);
|
||||
saveBuecher(buecher, conn);
|
||||
System.out.println("→ " + buecher.size() + " Bücher gespeichert.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<Buch> extractBuecher(int ortId, String ortLink) {
|
||||
List<Buch> list = new ArrayList<>();
|
||||
try {
|
||||
Document doc = Jsoup.connect(ortLink).get();
|
||||
Elements buchEintraege = doc.select("#archive-nav li.item");
|
||||
|
||||
for (Element li : buchEintraege) {
|
||||
Element a = li.selectFirst("a");
|
||||
if (a == null) continue;
|
||||
|
||||
|
||||
Element span = a.selectFirst("span");
|
||||
if (span == null) { // 🔧 NEU: Schutz gegen fehlendes <span>
|
||||
System.err.println("⚠️ Kein <span> in: " + a);
|
||||
continue;
|
||||
}
|
||||
|
||||
String titel = span.text().trim();
|
||||
String link = a.absUrl("href").trim();
|
||||
|
||||
String typ = null;
|
||||
Element img = a.selectFirst("img");
|
||||
|
||||
if (img != null) {
|
||||
String src = img.attr("src");
|
||||
int lastSlash = src.lastIndexOf('/');
|
||||
int dot = src.lastIndexOf('.');
|
||||
if (lastSlash != -1 && dot != -1 && dot > lastSlash) {
|
||||
typ = src.substring(lastSlash + 1, dot);
|
||||
}
|
||||
}
|
||||
|
||||
Buch buch = extractBuchDetails(ortId, titel, link, typ);
|
||||
list.add(buch);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("⚠️ Fehler beim Ort-Link " + ortLink + ": " + e.getMessage());
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
private Buch extractBuchDetails(int ortId, String titel, String link, String typ) {
|
||||
String zeitraum = null, enthaelt = null, anmerkung = null, signatur = null, archivname = null, viewerLink = null;
|
||||
|
||||
try {
|
||||
Document detailDoc = Jsoup.connect(link).get();
|
||||
|
||||
Element viewerEl = detailDoc.selectFirst("#steckbrief a[href*=\"/viewer/\"]");
|
||||
if (viewerEl != null) {
|
||||
viewerLink = viewerEl.absUrl("href");
|
||||
}
|
||||
|
||||
Elements dl = detailDoc.select("#steckbrief dl");
|
||||
for (Element dt : dl.select("dt")) {
|
||||
String label = dt.text().trim().toLowerCase();
|
||||
Element dd = dt.nextElementSibling();
|
||||
if (dd == null) continue;
|
||||
String value = dd.text().trim();
|
||||
|
||||
switch (label) {
|
||||
case "zeitraum" -> zeitraum = value;
|
||||
case "enthält auch" -> enthaelt = value;
|
||||
case "anmerkung" -> anmerkung = value;
|
||||
case "signatur lokal" -> signatur = value;
|
||||
case "archiv" -> archivname = value;
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("⚠️ Fehler beim Detail-Link " + link + ": " + e.getMessage());
|
||||
}
|
||||
|
||||
return new Buch(ortId, titel, link, typ, zeitraum, enthaelt, anmerkung, signatur, archivname, viewerLink);
|
||||
}
|
||||
|
||||
private void saveBuecher(List<Buch> list, Connection conn) throws SQLException {
|
||||
|
||||
if (list.isEmpty()) return;
|
||||
|
||||
List<String> vorhandeneTitel = getGespeicherteTitel(list.get(0).ortId(), conn); // 🔧 NEU
|
||||
List<Buch> neu = list.stream()
|
||||
.filter(b -> !vorhandeneTitel.contains(b.titel())) // 🔧 NEU: Duplikate überspringen
|
||||
.toList();
|
||||
|
||||
System.out.println("🧮 " + (list.size() - neu.size()) + " Bücher existieren bereits und werden übersprungen."); // 🔧 NEU
|
||||
System.out.println("✅ " + neu.size() + " neue Bücher werden gespeichert."); // 🔧 NEU
|
||||
|
||||
if (neu.isEmpty()) return;
|
||||
|
||||
|
||||
String sql = """
|
||||
INSERT INTO buch (
|
||||
ort_id, titel, typ, zeitraum, enthaelt,
|
||||
anmerkung, signatur, archivname, link, viewer_link
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(ort_id, titel)
|
||||
DO UPDATE SET
|
||||
typ = excluded.typ,
|
||||
zeitraum = excluded.zeitraum,
|
||||
enthaelt = excluded.enthaelt,
|
||||
anmerkung = excluded.anmerkung,
|
||||
signatur = excluded.signatur,
|
||||
archivname = excluded.archivname,
|
||||
link = excluded.link,
|
||||
viewer_link = excluded.viewer_link;
|
||||
""";
|
||||
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||
for (Buch b : list) {
|
||||
stmt.setInt(1, b.ortId());
|
||||
stmt.setString(2, b.titel());
|
||||
stmt.setString(3, b.typ());
|
||||
stmt.setString(4, b.zeitraum());
|
||||
stmt.setString(5, b.enthaelt());
|
||||
stmt.setString(6, b.anmerkung());
|
||||
stmt.setString(7, b.signatur());
|
||||
stmt.setString(8, b.archivname());
|
||||
stmt.setString(9, b.link());
|
||||
stmt.setString(10, b.viewerLink());
|
||||
stmt.addBatch();
|
||||
}
|
||||
stmt.executeBatch();
|
||||
}
|
||||
}
|
||||
|
||||
private List<String> getGespeicherteTitel(int ortId, Connection conn) throws SQLException {
|
||||
List<String> vorhandeneTitel = new ArrayList<>();
|
||||
String sql = "SELECT titel FROM buch WHERE ort_id = ?";
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||
stmt.setInt(1, ortId);
|
||||
try (ResultSet rs = stmt.executeQuery()) {
|
||||
while (rs.next()) {
|
||||
vorhandeneTitel.add(rs.getString("titel"));
|
||||
}
|
||||
}
|
||||
}
|
||||
return vorhandeneTitel;
|
||||
}
|
||||
|
||||
public void runForArchiv(String archivName) throws Exception {
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||
String sql = """
|
||||
SELECT ort.id, ort.name, ort.link
|
||||
FROM ort
|
||||
JOIN kreis ON ort.kreis_id = kreis.id
|
||||
JOIN archive ON kreis.archiv_id = archive.id
|
||||
WHERE archive.name = ?
|
||||
ORDER BY ort.id
|
||||
""";
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||
stmt.setString(1, archivName);
|
||||
runForOrtResultSet(stmt.executeQuery(), conn, archivName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void runForArchiv(int archivId) throws Exception {
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||
String sql = """
|
||||
SELECT ort.id, ort.name, ort.link
|
||||
FROM ort
|
||||
JOIN kreis ON ort.kreis_id = kreis.id
|
||||
WHERE kreis.archiv_id = ?
|
||||
ORDER BY ort.id
|
||||
""";
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||
stmt.setInt(1, archivId);
|
||||
runForOrtResultSet(stmt.executeQuery(), conn, "ID=" + archivId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void runForBundesland(String bundesland) throws Exception {
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||
String sql = """
|
||||
SELECT ort.id, ort.name, ort.link
|
||||
FROM ort
|
||||
JOIN kreis ON ort.kreis_id = kreis.id
|
||||
JOIN archive ON kreis.archiv_id = archive.id
|
||||
WHERE archive.bundesland = ?
|
||||
ORDER BY ort.id
|
||||
""";
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||
stmt.setString(1, bundesland);
|
||||
runForOrtResultSet(stmt.executeQuery(), conn, "Bundesland=" + bundesland);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// 🔧 Hilfsmethode zum Verarbeiten des ResultSets
|
||||
private void runForOrtResultSet(ResultSet rs, Connection conn, String label) throws Exception {
|
||||
int count = 0;
|
||||
while (rs.next()) {
|
||||
int ortId = rs.getInt("id");
|
||||
String ortName = rs.getString("name");
|
||||
String ortLink = rs.getString("link");
|
||||
|
||||
System.out.println("\n📘 Lese Bücher für Ort: " + ortName);
|
||||
List<Buch> buecher = extractBuecher(ortId, ortLink);
|
||||
saveBuecher(buecher, conn);
|
||||
System.out.println("→ " + buecher.size() + " Bücher gespeichert.");
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count == 0) {
|
||||
System.out.println("⚠️ Keine Orte für Archiv '" + label + "' gefunden.");
|
||||
}
|
||||
}
|
||||
|
||||
private File buildBildOrdnerPfad(
|
||||
String baseDir,
|
||||
String bundesland,
|
||||
String archivname,
|
||||
String kreisname,
|
||||
String ort,
|
||||
String buchTitel
|
||||
) {
|
||||
// Hilfsfunktion für Dateisystem-sichere Namen
|
||||
Function<String, String> safe = s ->
|
||||
s == null ? "unbekannt" : s.replaceAll("[^\\wäöüÄÖÜß\\-\\s]", "").trim();
|
||||
|
||||
List<String> pfad = new ArrayList<>();
|
||||
pfad.add(baseDir);
|
||||
pfad.add(safe.apply(bundesland));
|
||||
pfad.add(safe.apply(archivname));
|
||||
if (kreisname != null && !kreisname.isBlank()) {
|
||||
pfad.add(safe.apply(kreisname));
|
||||
}
|
||||
pfad.add(safe.apply(ort));
|
||||
pfad.add(safe.apply(buchTitel));
|
||||
|
||||
File ordner = new File(String.join(File.separator, pfad));
|
||||
if (!ordner.exists()) {
|
||||
ordner.mkdirs();
|
||||
}
|
||||
return ordner;
|
||||
}
|
||||
|
||||
public void erstelleBildOrdnerFuerAlleBuecher(String basisPfad) throws Exception {
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||
String sql = """
|
||||
SELECT
|
||||
buch.id AS buch_id,
|
||||
buch.titel AS buch_titel,
|
||||
ort.name AS ort_name,
|
||||
kreis.name AS kreis_name,
|
||||
archive.name AS archiv_name,
|
||||
archive.bundesland AS bundesland
|
||||
FROM buch
|
||||
JOIN ort ON buch.ort_id = ort.id
|
||||
LEFT JOIN kreis ON ort.kreis_id = kreis.id
|
||||
JOIN archive ON ort.archiv_id = archive.id
|
||||
ORDER BY archive.bundesland, archive.name, kreis.name, ort.name, buch.titel
|
||||
""";
|
||||
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql);
|
||||
ResultSet rs = stmt.executeQuery()) {
|
||||
|
||||
int count = 0;
|
||||
while (rs.next()) {
|
||||
String buchTitel = rs.getString("buch_titel");
|
||||
String ort = rs.getString("ort_name");
|
||||
String kreis = rs.getString("kreis_name");
|
||||
String archiv = rs.getString("archiv_name");
|
||||
String bundesland = rs.getString("bundesland");
|
||||
|
||||
File ordner = buildBildOrdnerPfad(basisPfad, bundesland, archiv, kreis, ort, buchTitel);
|
||||
System.out.println("📁 Ordner: " + ordner.getAbsolutePath());
|
||||
count++;
|
||||
}
|
||||
|
||||
System.out.println("✅ " + count + " Bildordner vorbereitet.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public record Buch(
|
||||
int ortId,
|
||||
String titel,
|
||||
String typ,
|
||||
String zeitraum,
|
||||
String enthaelt,
|
||||
String anmerkung,
|
||||
String signatur,
|
||||
String archivname,
|
||||
String link,
|
||||
String viewerLink
|
||||
) {}
|
||||
}
|
||||
@ -15,7 +15,6 @@ public class KirchenkreisExtractor {
|
||||
|
||||
public void readNwrite() throws Exception {
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||
|
||||
String sql = "SELECT id, name, link FROM archive ORDER BY id";
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql);
|
||||
ResultSet rs = stmt.executeQuery()) {
|
||||
@ -24,33 +23,60 @@ public class KirchenkreisExtractor {
|
||||
int id = rs.getInt("id");
|
||||
String name = rs.getString("name");
|
||||
String link = rs.getString("link");
|
||||
System.out.println("\n🔍 Lade Kirchenkreise f\u00fcr Archiv: " + name);
|
||||
|
||||
List<Kirchenkreis> kreise = extractKirchenkreise(id, link);
|
||||
System.out.println("\n🔍 Lade Struktur für Archiv: " + name);
|
||||
|
||||
Document doc = Jsoup.connect(link).get();
|
||||
Elements items = doc.select("#archive-nav li.item a");
|
||||
|
||||
List<Kirchenkreis> kreise = new ArrayList<>();
|
||||
List<Ort> orte = new ArrayList<>();
|
||||
|
||||
for (Element linkEl : items) {
|
||||
String eintragName = linkEl.text().trim();
|
||||
String href = linkEl.absUrl("href").trim();
|
||||
if (eintragName.isEmpty() || href.isEmpty()) continue;
|
||||
|
||||
if (isKirchenkreisebene(eintragName)) {
|
||||
kreise.add(new Kirchenkreis(id, eintragName, href));
|
||||
} else {
|
||||
orte.add(new Ort(id, null, eintragName, href));
|
||||
}
|
||||
}
|
||||
|
||||
saveKirchenkreise(kreise, conn);
|
||||
saveOrte(orte, conn);
|
||||
|
||||
System.out.println("→ " + kreise.size() + " Kirchenkreise gespeichert.");
|
||||
System.out.println("→ " + orte.size() + " direkte Orte gespeichert.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static List<Kirchenkreis> extractKirchenkreise(int archivId, String url) {
|
||||
List<Kirchenkreis> list = new ArrayList<>();
|
||||
try {
|
||||
Document doc = Jsoup.connect(url).get();
|
||||
Elements items = doc.select("#archive-nav li.item a");
|
||||
|
||||
for (Element link : items) {
|
||||
String name = link.text().trim();
|
||||
String href = link.absUrl("href").trim();
|
||||
if (!name.isEmpty() && !href.isEmpty()) {
|
||||
list.add(new Kirchenkreis(archivId, name, href));
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage());
|
||||
}
|
||||
return list;
|
||||
private boolean isKirchenkreisebene(String name) {
|
||||
String n = name.toLowerCase();
|
||||
return n.contains("kirchenkreis") ||
|
||||
n.contains("dekanat") ||
|
||||
n.contains("juden") ||
|
||||
n.contains("mennoiten") ||
|
||||
n.contains("militärseelsorge") ||
|
||||
n.contains("reformierte kirche") ||
|
||||
n.contains("auslandsgemeinde") ||
|
||||
n.contains("thüringen") ||
|
||||
n.contains("israeliten") ||
|
||||
n.contains("krankenhausseelsorge") ||
|
||||
n.contains("kreis") ||
|
||||
n.contains("reformierter kirchenkreis") ||
|
||||
n.contains("sonderbestände") ||
|
||||
n.contains("allgemeine hilfsmittel") ||
|
||||
n.contains("allgemeines ortschaftsverzeichnis") ||
|
||||
n.contains("kirchenbezirk") ||
|
||||
n.contains("hinterpommern") ||
|
||||
n.contains("Kirchenbücher der Garnisonen und Militärgemeinden") ||
|
||||
n.contains("Regimentskirchenbücher") ||
|
||||
n.contains("Zivilregister") ||
|
||||
n.contains("militärkirchenbücher");
|
||||
}
|
||||
|
||||
public static void saveKirchenkreise(List<Kirchenkreis> list, Connection conn) throws SQLException {
|
||||
@ -72,6 +98,30 @@ public class KirchenkreisExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
public record Kirchenkreis(int archivId, String name, String link) {}
|
||||
}
|
||||
public static void saveOrte(List<Ort> list, Connection conn) throws SQLException {
|
||||
String sql = """
|
||||
INSERT INTO ort (archiv_id, kreis_id, name, link)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(archiv_id, name)
|
||||
DO UPDATE SET link = excluded.link;
|
||||
""";
|
||||
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||
for (Ort o : list) {
|
||||
stmt.setInt(1, o.archivId());
|
||||
if (o.kreisId() != null)
|
||||
stmt.setInt(2, o.kreisId());
|
||||
else
|
||||
stmt.setNull(2, Types.INTEGER);
|
||||
stmt.setString(3, o.name());
|
||||
stmt.setString(4, o.link());
|
||||
stmt.addBatch();
|
||||
}
|
||||
stmt.executeBatch();
|
||||
}
|
||||
}
|
||||
|
||||
public record Kirchenkreis(int archivId, String name, String link) {}
|
||||
|
||||
public record Ort(int archivId, Integer kreisId, String name, String link) {}
|
||||
}
|
||||
|
||||
@ -15,6 +15,7 @@ public class KreisOrtExtractor {
|
||||
|
||||
public void readNwrite() throws Exception {
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:" + DB)) {
|
||||
|
||||
String sql = "SELECT id, archiv_id, name, link FROM kreis ORDER BY id";
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql);
|
||||
ResultSet rs = stmt.executeQuery()) {
|
||||
@ -25,45 +26,47 @@ public class KreisOrtExtractor {
|
||||
String name = rs.getString("name");
|
||||
String link = rs.getString("link");
|
||||
|
||||
System.out.println("\n🔍 Lade Orte für Kirchenkreis/Dekanat: " + name);
|
||||
List<Ort> orte = extractOrte(kreisId, archivId, link);
|
||||
System.out.println("\n🔎 Lade Orte unter Kirchenkreis: " + name);
|
||||
|
||||
List<Ort> orte = extractOrte(archivId, kreisId, link);
|
||||
saveOrte(orte, conn);
|
||||
|
||||
System.out.println("→ " + orte.size() + " Orte gespeichert.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<Ort> extractOrte(int kreisId, int archivId, String url) {
|
||||
List<Ort> list = new ArrayList<>();
|
||||
private List<Ort> extractOrte(int archivId, int kreisId, String url) {
|
||||
List<Ort> orte = new ArrayList<>();
|
||||
try {
|
||||
Document doc = Jsoup.connect(url).get();
|
||||
Elements items = doc.select(".list li a");
|
||||
for (Element link : items) {
|
||||
String name = link.text().trim();
|
||||
String href = link.absUrl("href").trim();
|
||||
Elements items = doc.select("#archive-nav li.item a");
|
||||
|
||||
for (Element linkEl : items) {
|
||||
String name = linkEl.text().trim();
|
||||
String href = linkEl.absUrl("href").trim();
|
||||
if (!name.isEmpty() && !href.isEmpty()) {
|
||||
list.add(new Ort(kreisId, archivId, name, href));
|
||||
orte.add(new Ort(archivId, kreisId, name, href));
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("⚠️ Fehler bei URL " + url + ": " + e.getMessage());
|
||||
}
|
||||
return list;
|
||||
return orte;
|
||||
}
|
||||
|
||||
private static void saveOrte(List<Ort> list, Connection conn) throws SQLException {
|
||||
private void saveOrte(List<Ort> list, Connection conn) throws SQLException {
|
||||
String sql = """
|
||||
INSERT INTO ort (kreis_id, archiv_id, name, link)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(kreis_id, archiv_id, name)
|
||||
DO UPDATE SET link = excluded.link;
|
||||
""";
|
||||
|
||||
INSERT INTO ort (archiv_id, kreis_id, name, link)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(archiv_id, name)
|
||||
DO UPDATE SET link = excluded.link, kreis_id = excluded.kreis_id;
|
||||
""";
|
||||
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
||||
for (Ort o : list) {
|
||||
stmt.setInt(1, o.kreisId());
|
||||
stmt.setInt(2, o.archivId());
|
||||
stmt.setInt(1, o.archivId());
|
||||
stmt.setInt(2, o.kreisId());
|
||||
stmt.setString(3, o.name());
|
||||
stmt.setString(4, o.link());
|
||||
stmt.addBatch();
|
||||
@ -72,6 +75,5 @@ public class KreisOrtExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public record Ort(int kreisId, int archivId, String name, String link) {}
|
||||
public record Ort(int archivId, int kreisId, String name, String link) {}
|
||||
}
|
||||
|
||||
@ -0,0 +1,71 @@
|
||||
|
||||
package de.roko.genalogy.downloader.viewer;
|
||||
|
||||
import org.openqa.selenium.*;
|
||||
import org.openqa.selenium.support.ui.ExpectedConditions;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class ViewerBildParser {
|
||||
|
||||
public record BildInfo(int seite, String bildUrl) {}
|
||||
|
||||
public String resolveViewerRedirect(WebDriver driver, String viewerUrl) {
|
||||
driver.get(viewerUrl);
|
||||
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
|
||||
boolean keinZugang = driver.getPageSource().contains("Kein Zugang zum Viewer");
|
||||
|
||||
if (keinZugang) {
|
||||
System.out.println("🔐 Kein Zugang – Weiterleitung wird ausgelöst ...");
|
||||
|
||||
WebElement loginButton = new WebDriverWait(driver, Duration.ofSeconds(5))
|
||||
.until(ExpectedConditions.elementToBeClickable(
|
||||
By.cssSelector("a.btn.btn-primary[href*='/de/login']")));
|
||||
|
||||
loginButton.click();
|
||||
|
||||
new WebDriverWait(driver, Duration.ofSeconds(10))
|
||||
.until(d -> d.getCurrentUrl().contains("/viewer/churchRegister/"));
|
||||
|
||||
String redirectedUrl = driver.getCurrentUrl();
|
||||
System.out.println("✅ Weitergeleitet zum Viewer: " + redirectedUrl);
|
||||
return redirectedUrl;
|
||||
}
|
||||
|
||||
System.out.println("✅ Direktzugriff auf Viewer ohne Zwischenseite.");
|
||||
return driver.getCurrentUrl();
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("❌ Fehler bei der Weiterleitung: " + e.getMessage());
|
||||
return viewerUrl;
|
||||
}
|
||||
}
|
||||
|
||||
public List<BildInfo> extractBildUrlsWithSelenium(WebDriver driver) {
|
||||
List<BildInfo> bilder = new ArrayList<>();
|
||||
try {
|
||||
List<WebElement> seiten = driver.findElements(By.cssSelector(".dvpages .dvpage img[data-src]"));
|
||||
|
||||
int seiteNr = 1;
|
||||
for (WebElement img : seiten) {
|
||||
String relativeUrl = img.getAttribute("data-src");
|
||||
if (relativeUrl == null || relativeUrl.isBlank()) continue;
|
||||
|
||||
String fullUrl = "https://www.archion.de" + relativeUrl.split("\\?")[0];
|
||||
bilder.add(new BildInfo(seiteNr++, fullUrl));
|
||||
}
|
||||
|
||||
System.out.println("✅ " + bilder.size() + " Bild-URLs extrahiert.");
|
||||
} catch (Exception e) {
|
||||
System.err.println("❌ Fehler beim Extrahieren der Bilder: " + e.getMessage());
|
||||
}
|
||||
|
||||
return bilder;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,17 @@
|
||||
package de.roko.genalogy.downloader.database;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class DatenbankDebuggerTest {
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
void printTable() throws Exception {
|
||||
DatenbankDebugger dbdebugger = new DatenbankDebugger();
|
||||
//dbdebugger.printTable("ort");
|
||||
dbdebugger.printTable("buch");
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,81 @@
|
||||
package de.roko.genalogy.downloader.database;
|
||||
|
||||
import de.roko.genalogy.downloader.archion.ArchionLoginHelper;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.openqa.selenium.chrome.ChromeDriver;
|
||||
import org.openqa.selenium.chrome.ChromeOptions;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class DokumentExtractorTest {
|
||||
|
||||
public String username = "robatkoch";
|
||||
public String password = "PaLiNa2016$$";
|
||||
|
||||
public ChromeDriver driver;
|
||||
|
||||
@BeforeAll
|
||||
static void setUp() {
|
||||
|
||||
String userHome = System.getProperty("user.home");
|
||||
String downloadFolder = userHome + "/Pictures/archion";
|
||||
new File(downloadFolder).mkdirs();
|
||||
|
||||
|
||||
}
|
||||
|
||||
@BeforeEach void login() throws InterruptedException {
|
||||
|
||||
ChromeOptions options = new ChromeOptions();
|
||||
options.addArguments("--remote-allow-origins=*");
|
||||
|
||||
// WebDriver starten
|
||||
driver = new ChromeDriver(options);
|
||||
|
||||
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
|
||||
archionLoginHelper.login(username, password);
|
||||
//assertFalse(archionLoginHelper.isLoggedIn(), "Login nicht möglich");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void run() throws Exception {
|
||||
try {
|
||||
DokumentExtractor dokumentExtractor = new DokumentExtractor();
|
||||
dokumentExtractor.run();
|
||||
} finally {
|
||||
driver.quit();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void runForArchiv() throws Exception {
|
||||
//new DokumentExtractor().runForArchiv("Landeskirchenarchiv der Evangelischen Kirche Mitteldeutschland/Eisenach");
|
||||
//new DokumentExtractor().runForArchiv("Landeskirchliches Archiv der Evangelisch-Lutherischen Kirche in Norddeutschland");
|
||||
//new DokumentExtractor().runForArchiv("Archiv der Evangelischen Landeskirche Anhalts");
|
||||
//new DokumentExtractor().runForArchiv("Landeskirchenarchiv der Evangelischen Kirche Mitteldeutschland/Magdeburg");
|
||||
new DokumentExtractor().runForArchiv("Landeskirchliches Archiv der Evangelisch-Lutherischen Landeskirche Sachsens");
|
||||
new DokumentExtractor().erstelleBildOrdnerFuerAlleBuecher("/Users/robertkoch/archion_bilder");
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void erstelleBildOrdnerFuerAlleBuecher() throws Exception {
|
||||
new DokumentExtractor().erstelleBildOrdnerFuerAlleBuecher("/Users/robertkoch/archion_bilder");
|
||||
}
|
||||
|
||||
@Test
|
||||
void runForBundesland() throws Exception {
|
||||
new DokumentExtractor().runForBundesland("Thüringen");
|
||||
}
|
||||
|
||||
@AfterEach void quit() {
|
||||
driver.quit();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,56 @@
|
||||
package de.roko.genalogy.downloader.database;
|
||||
|
||||
import de.roko.genalogy.downloader.archion.ArchionLoginHelper;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.chrome.ChromeDriver;
|
||||
import org.openqa.selenium.chrome.ChromeOptions;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class KreisOrtExtractorTest {
|
||||
|
||||
public String username = "robatkoch";
|
||||
public String password = "PaLiNa2016$$";
|
||||
|
||||
public ChromeDriver driver;
|
||||
|
||||
|
||||
|
||||
@BeforeAll
|
||||
static void setUp() {
|
||||
|
||||
String userHome = System.getProperty("user.home");
|
||||
String downloadFolder = userHome + "/Pictures/archion";
|
||||
new File(downloadFolder).mkdirs();
|
||||
|
||||
|
||||
}
|
||||
|
||||
@BeforeEach void login() {
|
||||
|
||||
ChromeOptions options = new ChromeOptions();
|
||||
options.addArguments("--remote-allow-origins=*");
|
||||
|
||||
// WebDriver starten
|
||||
driver = new ChromeDriver(options);
|
||||
|
||||
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
|
||||
archionLoginHelper.login(username, password);
|
||||
assertFalse(archionLoginHelper.isLoggedIn(), "Login nicht möglich");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void read() throws Exception {
|
||||
|
||||
KreisOrtExtractor kreisOrtExtractor = new KreisOrtExtractor();
|
||||
//kreisOrtExtractor.read();
|
||||
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,112 @@
|
||||
package de.roko.genalogy.downloader.viewer;
|
||||
|
||||
import de.roko.genalogy.downloader.archion.ArchionLoginHelper;
|
||||
import net.lightbody.bmp.BrowserMobProxy;
|
||||
import net.lightbody.bmp.BrowserMobProxyServer;
|
||||
import net.lightbody.bmp.client.ClientUtil;
|
||||
import net.lightbody.bmp.core.har.HarEntry;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.openqa.selenium.*;
|
||||
import org.openqa.selenium.chrome.ChromeDriver;
|
||||
import org.openqa.selenium.chrome.ChromeOptions;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class ViewerBildParserTest {
|
||||
|
||||
public String username = "robatkoch";
|
||||
public String password = "PaLiNa2016$$";
|
||||
|
||||
public ChromeDriver driver;
|
||||
|
||||
@BeforeAll
|
||||
static void setUp() {
|
||||
|
||||
String userHome = System.getProperty("user.home");
|
||||
String downloadFolder = userHome + "/Pictures/archion";
|
||||
new File(downloadFolder).mkdirs();
|
||||
|
||||
|
||||
}
|
||||
|
||||
@BeforeEach
|
||||
void login() throws InterruptedException {
|
||||
/*
|
||||
ChromeOptions options = new ChromeOptions();
|
||||
options.addArguments("--remote-allow-origins=*");
|
||||
|
||||
// WebDriver starten
|
||||
driver = new ChromeDriver(options);
|
||||
|
||||
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
|
||||
archionLoginHelper.login(username, password);
|
||||
//assertFalse(archionLoginHelper.isLoggedIn(), "Login nicht möglich");
|
||||
*/
|
||||
}
|
||||
|
||||
@Test
|
||||
void parse() throws InterruptedException {
|
||||
|
||||
String viewerUrl = "https://www.archion.de/de/viewer/churchRegister/287040?cHash=c61b3fc9f95353f6ba795fe0b90b3288";
|
||||
|
||||
// Proxy starten
|
||||
BrowserMobProxy proxy = new BrowserMobProxyServer();
|
||||
proxy.start(0); // auf freiem Port starten
|
||||
|
||||
// Proxy in Selenium einbinden
|
||||
Proxy seleniumProxy = ClientUtil.createSeleniumProxy(proxy);
|
||||
ChromeOptions options = new ChromeOptions();
|
||||
options.setProxy(seleniumProxy);
|
||||
options.addArguments("--start-maximized");
|
||||
|
||||
WebDriver driver = new ChromeDriver(options);
|
||||
|
||||
// HAR-Protokoll starten
|
||||
proxy.newHar("archion-view");
|
||||
|
||||
// Viewer-URL aufrufen (muss eingeloggt sein!)
|
||||
ArchionLoginHelper archionLoginHelper = new ArchionLoginHelper(driver);
|
||||
archionLoginHelper.login(username, password);
|
||||
driver.get(viewerUrl);
|
||||
|
||||
// Warten und Seiten durchblättern (optional)
|
||||
for (int i = 0; i < 10; i++) {
|
||||
try {
|
||||
Thread.sleep(1500);
|
||||
WebElement next = driver.findElement(By.cssSelector("a.dvnavnext"));
|
||||
if (next != null && next.isDisplayed()) {
|
||||
next.click();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
break; // Ende erreicht oder Fehler
|
||||
}
|
||||
}
|
||||
|
||||
// Alle Bild-URLs auslesen
|
||||
List<String> imageUrls = new ArrayList<>();
|
||||
for (HarEntry entry : proxy.getHar().getLog().getEntries()) {
|
||||
String url = entry.getRequest().getUrl();
|
||||
if (url.contains("/si/") && url.endsWith("/image.jpg")) {
|
||||
imageUrls.add(url);
|
||||
}
|
||||
}
|
||||
|
||||
// Ausgabe
|
||||
System.out.println("\n🔍 Gefundene Bild-URLs:");
|
||||
for (int i = 0; i < imageUrls.size(); i++) {
|
||||
System.out.printf("Bild %03d: %s%n", i + 1, imageUrls.get(i));
|
||||
}
|
||||
|
||||
// Aufräumen
|
||||
driver.quit();
|
||||
proxy.stop();
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user