arch/src/main/java/de/roko/genalogy/downloader/tools/BookTileDownloaderLite.java
2025-05-20 14:05:28 +02:00

123 lines
4.9 KiB
Java

package de.roko.genalogy.downloader.tools;
import org.openqa.selenium.*;
import org.openqa.selenium.interactions.Actions;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.openqa.selenium.support.ui.ExpectedConditions;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.time.Duration;
import java.util.*;
public class BookTileDownloaderLite {
private final WebDriver driver;
private final JavascriptExecutor js;
private final WebDriverWait wait;
private final Actions actions;
public BookTileDownloaderLite(WebDriver driver) {
this.driver = driver;
this.js = (JavascriptExecutor) driver;
this.wait = new WebDriverWait(driver, Duration.ofSeconds(10));
this.actions = new Actions(driver);
}
private int extractPixel(String style, String property) {
try {
for (String part : style.split(";")) {
part = part.trim();
if (part.startsWith(property)) {
return Integer.parseInt(part.replaceAll("[^0-9]", ""));
}
}
} catch (Exception e) {
System.err.printf("⚠️ Fehler beim Extrahieren von '%s' aus Style: %s%n", property, style);
}
return 0;
}
public void downloadVisibleTiles(String viewerUrlOrNull, String zielVerzeichnis, int seitenIndex) {
try {
if (viewerUrlOrNull != null) {
driver.get(viewerUrlOrNull);
wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector(".zoom-holder")));
Thread.sleep(1000);
}
// Zoom unabhängig vom Seitenaufruf
try {
for (int z = 0; z < 20; z++) {
WebElement zoomState = driver.findElement(By.cssSelector("a.zoom-state .current"));
String style = zoomState.getAttribute("style");
if (style != null && style.contains("left: 120px")) break;
WebElement zoomInButton = driver.findElement(By.cssSelector("a.zoom-in"));
if (zoomInButton.isDisplayed() && zoomInButton.isEnabled()) {
zoomInButton.click();
Thread.sleep(400);
} else break;
}
} catch (Exception e) {
System.err.println("⚠️ Fehler beim Zoom: " + e.getMessage());
}
WebElement zoomHolder = driver.findElement(By.cssSelector(".zoom-holder"));
js.executeScript("arguments[0].scrollTo(0, 0);", zoomHolder);
actions.moveToElement(zoomHolder, 100, 100).perform();
Thread.sleep(800); // Lazy loading
File tileFolder = new File(zielVerzeichnis, String.format("seite-%03d-tiles", seitenIndex));
if (!tileFolder.exists()) Files.createDirectories(tileFolder.toPath());
Set<String> seenSrcs = new HashSet<>();
int tilesSaved = 0;
int errors = 0;
List<WebElement> tiles = driver.findElements(By.cssSelector(".zoom-tiles img"));
System.out.printf("🔍 Seite %d: %d sichtbare Tiles gefunden%n", seitenIndex, tiles.size());
for (WebElement img : tiles) {
try {
String src = img.getAttribute("_src");
if (src == null) continue;
if (src.startsWith("/")) src = "https://www.archion.de" + src;
if (!seenSrcs.add(src)) continue;
String style = img.getAttribute("style");
int left = extractPixel(style, "left");
int top = extractPixel(style, "top");
String filename = String.format("tile_%d_%d.png", left, top);
File tileFile = new File(tileFolder, filename);
try (InputStream in = new URL(src).openStream()) {
BufferedImage tile = ImageIO.read(in);
if (tile != null) {
ImageIO.write(tile, "png", tileFile);
tilesSaved++;
System.out.printf("💾 %s gespeichert%n", filename);
}
} catch (Exception e) {
System.err.printf("❌ Fehler beim Speichern von: %s%n", src);
errors++;
}
} catch (Exception e) {
System.err.println("⚠️ Fehler beim Tile-Handling: " + e.getMessage());
errors++;
}
}
System.out.printf("✅ Seite %d: %d Tiles gespeichert, %d Fehler%n", seitenIndex, tilesSaved, errors);
} catch (Exception e) {
System.err.println("❌ Schwerer Fehler beim Download der Seite: " + e.getMessage());
e.printStackTrace();
}
}
}