commit 5de0c22d41611748c0e9bf660517a830343b75c6 Author: Radek Davidek Date: Tue Mar 31 20:02:58 2026 +0200 first commit diff --git a/.codex b/.codex new file mode 100644 index 0000000..e69de29 diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..7b12add --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +### Eclipse +.classpath +.project +.settings/ + +HELP.md +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/** +!**/src/test/** + +### STS ### +.mvn +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache +mvnw +mvnw.cmd + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ + +### VS Code ### +.vscode/ + +### Log4j2 ### +logs +*.log + +# Ignore Mac DS_Store files +.DS_Store + +dependency-reduced-pom.xml + +cert.pem \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..50ff75d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM eclipse-temurin:11-jre-alpine +WORKDIR /app + +COPY target/ybcl-parser-1.0-SNAPSHOT.jar app.jar + +ENTRYPOINT ["java", "-jar", "/app/app.jar"] diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..b9a7f8a --- /dev/null +++ b/pom.xml @@ -0,0 +1,38 @@ + + 4.0.0 + + cz.kamma.ybclparser + ybcl-parser + 1.0-SNAPSHOT + + + 11 + 11 + + + + + + com.fasterxml.jackson.core + jackson-databind + 2.17.0 + + + + + com.sun.mail + jakarta.mail + 2.0.1 + + + + + org.projectlombok + lombok + 1.18.32 + provided + + + diff --git a/src/main/java/cz/kamma/ybclparser/YbclParser.java b/src/main/java/cz/kamma/ybclparser/YbclParser.java new file mode 100644 index 0000000..c7dfe2f --- /dev/null +++ b/src/main/java/cz/kamma/ybclparser/YbclParser.java @@ -0,0 +1,271 @@ +package cz.kamma.ybclparser; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Locale; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import jakarta.mail.Authenticator; +import jakarta.mail.Message; +import jakarta.mail.PasswordAuthentication; +import jakarta.mail.Session; +import jakarta.mail.Transport; +import jakarta.mail.internet.InternetAddress; +import jakarta.mail.internet.MimeMessage; + +public class YbclParser { + + private static final String BASE_URL = "https://www.championsleague.basketball/en/youth/news"; + private static final int CHECK_INTERVAL_MINUTES = 5; + private static final HttpClient CLIENT = HttpClient.newHttpClient(); + private static final Set knownArticles = new HashSet<>(); + private static final String SMTP_HOST = "smtp.gmail.com"; + private static final String SMTP_PORT = "587"; + private static final String SMTP_USER = "kamma.cz@gmail.com"; + private static final String SMTP_APP_PASSWORD = "fohq wkzu alnj iepi"; + private static final String EMAIL_FROM = SMTP_USER; + private static final String EMAIL_TO = "kamma.cz@gmail.com"; + + private static final class ArticleInfo { + private final String url; + private final String title; + + private ArticleInfo(String url, String title) { + this.url = url; + this.title = title; + } + + private String getUrl() { + return url; + } + + private String getTitle() { + return title; + } + } + + public static void main(String[] args) throws Exception { + String content = download(BASE_URL); + Map initialArticles = extractArticles(content); + + knownArticles.addAll(initialArticles.keySet()); + System.out.println("První průchod dokončen (" + LocalDateTime.now() + ")."); + System.out.println("Uloženo článků do paměti: " + knownArticles.size()); + + ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(); + scheduler.scheduleAtFixedRate(YbclParser::checkForNewArticles, CHECK_INTERVAL_MINUTES, CHECK_INTERVAL_MINUTES, TimeUnit.MINUTES); + + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + System.out.println("Ukončuji plánovač."); + scheduler.shutdownNow(); + })); + + System.out.println("Monitoring spuštěn. Kontrola každých " + CHECK_INTERVAL_MINUTES + " minut."); + sendStartupEmail(); + } + + private static String download(String url) throws IOException, InterruptedException { + HttpRequest request = HttpRequest.newBuilder() + .uri(URI.create(url)) + .GET() + .build(); + + HttpResponse response = CLIENT.send(request, HttpResponse.BodyHandlers.ofString()); + + return response.body(); + } + + private static void checkForNewArticles() { + try { + String content = download(BASE_URL); + Map currentArticles = extractArticles(content); + + List newArticles = new ArrayList<>(); + for (Map.Entry article : currentArticles.entrySet()) { + if (!knownArticles.contains(article.getKey())) { + newArticles.add(new ArticleInfo(article.getKey(), article.getValue())); + } + } + + if (newArticles.isEmpty()) { + System.out.println("[" + LocalDateTime.now() + "] Žádný nový článek."); + return; + } + + newArticles.sort(Comparator.comparing(ArticleInfo::getUrl)); + newArticles.forEach(article -> knownArticles.add(article.getUrl())); + + System.out.println("[" + LocalDateTime.now() + "] Nalezeno nových článků: " + newArticles.size()); + newArticles.forEach(article -> System.out.println(article.getTitle() + " -> " + article.getUrl())); + + sendEmailNotification(newArticles); + } catch (Exception e) { + System.err.println("Chyba při kontrole článků: " + e.getMessage()); + e.printStackTrace(); + } + } + + private static void sendEmailNotification(List newArticles) { + StringBuilder body = new StringBuilder("Byly nalezeny nové články:\n\n"); + for (ArticleInfo article : newArticles) { + body.append(article.getTitle()).append('\n'); + body.append(article.getUrl()).append("\n\n"); + } + body.append("\nČas kontroly: ").append(LocalDateTime.now()); + + sendEmail( + "YBCL: nalezeny nové články (" + newArticles.size() + ")", + body.toString() + ); + } + + private static void sendStartupEmail() { + String body = "Aplikace YBCL parser byla úspěšně spuštěna.\n" + + "Čas spuštění: " + LocalDateTime.now() + "\n" + + "Interval kontroly: " + CHECK_INTERVAL_MINUTES + " minut.\n" + + "Počet načtených článků: " + knownArticles.size() + "."; + + sendEmail("YBCL: aplikace běží", body); + } + + private static void sendEmail(String subject, String body) { + if (isBlank(SMTP_USER) || isBlank(SMTP_APP_PASSWORD) || isBlank(EMAIL_FROM) || isBlank(EMAIL_TO)) { + System.err.println("E-mail se neodeslal: doplň konstanty SMTP_USER/SMTP_APP_PASSWORD/EMAIL_FROM/EMAIL_TO."); + return; + } + + try { + Properties props = new Properties(); + props.put("mail.smtp.auth", "true"); + props.put("mail.smtp.starttls.enable", "true"); + props.put("mail.smtp.host", SMTP_HOST); + props.put("mail.smtp.port", SMTP_PORT); + + Session session = Session.getInstance(props, new Authenticator() { + @Override + protected PasswordAuthentication getPasswordAuthentication() { + return new PasswordAuthentication(SMTP_USER, SMTP_APP_PASSWORD); + } + }); + + Message message = new MimeMessage(session); + message.setFrom(new InternetAddress(EMAIL_FROM)); + message.setRecipients(Message.RecipientType.TO, InternetAddress.parse(EMAIL_TO)); + message.setSubject(subject); + message.setText(body); + Transport.send(message); + System.out.println("E-mail odeslán na: " + EMAIL_TO); + } catch (Exception e) { + System.err.println("Nepodařilo se odeslat e-mail: " + e.getMessage()); + e.printStackTrace(); + } + } + + private static boolean isBlank(String value) { + return value == null || value.trim().isEmpty(); + } + + private static Map extractArticles(String content) { + Map result = new java.util.LinkedHashMap<>(); + + // 1) Primární zdroj: obsah "news" objektů -> localized.title + slug + Pattern localizedTitleSlugPattern = Pattern.compile("\"localized\"\\s*:\\s*\\{[\\s\\S]*?\"title\"\\s*:\\s*\"((?:\\\\.|[^\"])*)\"[\\s\\S]*?\\}[\\s\\S]*?\"slug\"\\s*:\\s*\"([a-z0-9\\-]+)\"", Pattern.CASE_INSENSITIVE); + Matcher localizedTitleSlugMatcher = localizedTitleSlugPattern.matcher(content); + + while (localizedTitleSlugMatcher.find()) { + String title = normalizeTitle(localizedTitleSlugMatcher.group(1)); + String slug = localizedTitleSlugMatcher.group(2); + String url = buildArticleUrl(slug); + result.put(url, isBlank(title) ? titleFromSlug(slug) : title); + } + + // 2) TopNews/karusel: title + link.url + Pattern titleLinkPattern = Pattern.compile("\"title\"\\s*:\\s*\"((?:\\\\.|[^\"])*)\"\\s*,\\s*\"link\"\\s*:\\s*\\{\\s*\"url\"\\s*:\\s*\"(https://www\\.championsleague\\.basketball/en/youth/news/[a-z0-9\\-]+)\"", Pattern.CASE_INSENSITIVE); + Matcher titleLinkMatcher = titleLinkPattern.matcher(content); + + while (titleLinkMatcher.find()) { + String title = normalizeTitle(titleLinkMatcher.group(1)); + String url = titleLinkMatcher.group(2); + String slug = extractSlugFromUrl(url); + result.putIfAbsent(url, isBlank(title) ? titleFromSlug(slug) : title); + } + + // 3) Fallback: přímé URL + Pattern linkPattern = Pattern.compile("https://www\\.championsleague\\.basketball/en/youth/news/[a-z0-9\\-]+", Pattern.CASE_INSENSITIVE); + Matcher matcher = linkPattern.matcher(content); + + while (matcher.find()) { + String url = matcher.group(); + result.putIfAbsent(url, titleFromSlug(extractSlugFromUrl(url))); + } + + // 4) Fallback: pouze slug + Pattern slugPattern = Pattern.compile("\"slug\":\"([a-z0-9\\-]+)\""); + Matcher slugMatcher = slugPattern.matcher(content); + + while (slugMatcher.find()) { + String slug = slugMatcher.group(1); + String url = buildArticleUrl(slug); + result.putIfAbsent(url, titleFromSlug(slug)); + } + + return result; + } + + private static String buildArticleUrl(String slug) { + return "https://www.championsleague.basketball/en/youth/news/" + slug; + } + + private static String extractSlugFromUrl(String url) { + int idx = url.lastIndexOf('/'); + return idx >= 0 ? url.substring(idx + 1) : url; + } + + private static String titleFromSlug(String slug) { + if (isBlank(slug)) { + return "Neznámý název"; + } + String[] parts = slug.split("-"); + StringBuilder title = new StringBuilder(); + for (String part : parts) { + if (part.isBlank()) { + continue; + } + if (title.length() != 0) { + title.append(' '); + } + title.append(part.substring(0, 1).toUpperCase(Locale.ROOT)); + if (part.length() > 1) { + title.append(part.substring(1)); + } + } + return title.length() == 0 ? slug : title.toString(); + } + + private static String normalizeTitle(String rawTitle) { + if (rawTitle == null) { + return null; + } + return rawTitle + .replace("\\\"", "\"") + .replace("\\/", "/") + .replace("\\n", " ") + .replace("\\t", " ") + .replace("\\u0026", "&") + .trim(); + } +}