first commit
This commit is contained in:
commit
5de0c22d41
50
.gitignore
vendored
Executable file
50
.gitignore
vendored
Executable file
@ -0,0 +1,50 @@
|
|||||||
|
### Eclipse
|
||||||
|
.classpath
|
||||||
|
.project
|
||||||
|
.settings/
|
||||||
|
|
||||||
|
HELP.md
|
||||||
|
target/
|
||||||
|
!.mvn/wrapper/maven-wrapper.jar
|
||||||
|
!**/src/main/**
|
||||||
|
!**/src/test/**
|
||||||
|
|
||||||
|
### STS ###
|
||||||
|
.mvn
|
||||||
|
.apt_generated
|
||||||
|
.classpath
|
||||||
|
.factorypath
|
||||||
|
.project
|
||||||
|
.settings
|
||||||
|
.springBeans
|
||||||
|
.sts4-cache
|
||||||
|
mvnw
|
||||||
|
mvnw.cmd
|
||||||
|
|
||||||
|
### IntelliJ IDEA ###
|
||||||
|
.idea
|
||||||
|
*.iws
|
||||||
|
*.iml
|
||||||
|
*.ipr
|
||||||
|
|
||||||
|
### NetBeans ###
|
||||||
|
/nbproject/private/
|
||||||
|
/nbbuild/
|
||||||
|
/dist/
|
||||||
|
/nbdist/
|
||||||
|
/.nb-gradle/
|
||||||
|
build/
|
||||||
|
|
||||||
|
### VS Code ###
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
### Log4j2 ###
|
||||||
|
logs
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Ignore Mac DS_Store files
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
dependency-reduced-pom.xml
|
||||||
|
|
||||||
|
cert.pem
|
||||||
6
Dockerfile
Normal file
6
Dockerfile
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
FROM eclipse-temurin:11-jre-alpine
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY target/ybcl-parser-1.0-SNAPSHOT.jar app.jar
|
||||||
|
|
||||||
|
ENTRYPOINT ["java", "-jar", "/app/app.jar"]
|
||||||
38
pom.xml
Normal file
38
pom.xml
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<groupId>cz.kamma.ybclparser</groupId>
|
||||||
|
<artifactId>ybcl-parser</artifactId>
|
||||||
|
<version>1.0-SNAPSHOT</version>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>11</maven.compiler.source>
|
||||||
|
<maven.compiler.target>11</maven.compiler.target>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<!-- Jackson -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-databind</artifactId>
|
||||||
|
<version>2.17.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Email (SMTP) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.sun.mail</groupId>
|
||||||
|
<artifactId>jakarta.mail</artifactId>
|
||||||
|
<version>2.0.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Lombok -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.projectlombok</groupId>
|
||||||
|
<artifactId>lombok</artifactId>
|
||||||
|
<version>1.18.32</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
||||||
271
src/main/java/cz/kamma/ybclparser/YbclParser.java
Normal file
271
src/main/java/cz/kamma/ybclparser/YbclParser.java
Normal file
@ -0,0 +1,271 @@
|
|||||||
|
package cz.kamma.ybclparser;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.http.HttpClient;
|
||||||
|
import java.net.http.HttpRequest;
|
||||||
|
import java.net.http.HttpResponse;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import jakarta.mail.Authenticator;
|
||||||
|
import jakarta.mail.Message;
|
||||||
|
import jakarta.mail.PasswordAuthentication;
|
||||||
|
import jakarta.mail.Session;
|
||||||
|
import jakarta.mail.Transport;
|
||||||
|
import jakarta.mail.internet.InternetAddress;
|
||||||
|
import jakarta.mail.internet.MimeMessage;
|
||||||
|
|
||||||
|
public class YbclParser {
|
||||||
|
|
||||||
|
private static final String BASE_URL = "https://www.championsleague.basketball/en/youth/news";
|
||||||
|
private static final int CHECK_INTERVAL_MINUTES = 5;
|
||||||
|
private static final HttpClient CLIENT = HttpClient.newHttpClient();
|
||||||
|
private static final Set<String> knownArticles = new HashSet<>();
|
||||||
|
private static final String SMTP_HOST = "smtp.gmail.com";
|
||||||
|
private static final String SMTP_PORT = "587";
|
||||||
|
private static final String SMTP_USER = "kamma.cz@gmail.com";
|
||||||
|
private static final String SMTP_APP_PASSWORD = "fohq wkzu alnj iepi";
|
||||||
|
private static final String EMAIL_FROM = SMTP_USER;
|
||||||
|
private static final String EMAIL_TO = "kamma.cz@gmail.com";
|
||||||
|
|
||||||
|
private static final class ArticleInfo {
|
||||||
|
private final String url;
|
||||||
|
private final String title;
|
||||||
|
|
||||||
|
private ArticleInfo(String url, String title) {
|
||||||
|
this.url = url;
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getUrl() {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String content = download(BASE_URL);
|
||||||
|
Map<String, String> initialArticles = extractArticles(content);
|
||||||
|
|
||||||
|
knownArticles.addAll(initialArticles.keySet());
|
||||||
|
System.out.println("První průchod dokončen (" + LocalDateTime.now() + ").");
|
||||||
|
System.out.println("Uloženo článků do paměti: " + knownArticles.size());
|
||||||
|
|
||||||
|
ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
|
||||||
|
scheduler.scheduleAtFixedRate(YbclParser::checkForNewArticles, CHECK_INTERVAL_MINUTES, CHECK_INTERVAL_MINUTES, TimeUnit.MINUTES);
|
||||||
|
|
||||||
|
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
|
||||||
|
System.out.println("Ukončuji plánovač.");
|
||||||
|
scheduler.shutdownNow();
|
||||||
|
}));
|
||||||
|
|
||||||
|
System.out.println("Monitoring spuštěn. Kontrola každých " + CHECK_INTERVAL_MINUTES + " minut.");
|
||||||
|
sendStartupEmail();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String download(String url) throws IOException, InterruptedException {
|
||||||
|
HttpRequest request = HttpRequest.newBuilder()
|
||||||
|
.uri(URI.create(url))
|
||||||
|
.GET()
|
||||||
|
.build();
|
||||||
|
|
||||||
|
HttpResponse<String> response = CLIENT.send(request, HttpResponse.BodyHandlers.ofString());
|
||||||
|
|
||||||
|
return response.body();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void checkForNewArticles() {
|
||||||
|
try {
|
||||||
|
String content = download(BASE_URL);
|
||||||
|
Map<String, String> currentArticles = extractArticles(content);
|
||||||
|
|
||||||
|
List<ArticleInfo> newArticles = new ArrayList<>();
|
||||||
|
for (Map.Entry<String, String> article : currentArticles.entrySet()) {
|
||||||
|
if (!knownArticles.contains(article.getKey())) {
|
||||||
|
newArticles.add(new ArticleInfo(article.getKey(), article.getValue()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newArticles.isEmpty()) {
|
||||||
|
System.out.println("[" + LocalDateTime.now() + "] Žádný nový článek.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
newArticles.sort(Comparator.comparing(ArticleInfo::getUrl));
|
||||||
|
newArticles.forEach(article -> knownArticles.add(article.getUrl()));
|
||||||
|
|
||||||
|
System.out.println("[" + LocalDateTime.now() + "] Nalezeno nových článků: " + newArticles.size());
|
||||||
|
newArticles.forEach(article -> System.out.println(article.getTitle() + " -> " + article.getUrl()));
|
||||||
|
|
||||||
|
sendEmailNotification(newArticles);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Chyba při kontrole článků: " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void sendEmailNotification(List<ArticleInfo> newArticles) {
|
||||||
|
StringBuilder body = new StringBuilder("Byly nalezeny nové články:\n\n");
|
||||||
|
for (ArticleInfo article : newArticles) {
|
||||||
|
body.append(article.getTitle()).append('\n');
|
||||||
|
body.append(article.getUrl()).append("\n\n");
|
||||||
|
}
|
||||||
|
body.append("\nČas kontroly: ").append(LocalDateTime.now());
|
||||||
|
|
||||||
|
sendEmail(
|
||||||
|
"YBCL: nalezeny nové články (" + newArticles.size() + ")",
|
||||||
|
body.toString()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void sendStartupEmail() {
|
||||||
|
String body = "Aplikace YBCL parser byla úspěšně spuštěna.\n"
|
||||||
|
+ "Čas spuštění: " + LocalDateTime.now() + "\n"
|
||||||
|
+ "Interval kontroly: " + CHECK_INTERVAL_MINUTES + " minut.\n"
|
||||||
|
+ "Počet načtených článků: " + knownArticles.size() + ".";
|
||||||
|
|
||||||
|
sendEmail("YBCL: aplikace běží", body);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void sendEmail(String subject, String body) {
|
||||||
|
if (isBlank(SMTP_USER) || isBlank(SMTP_APP_PASSWORD) || isBlank(EMAIL_FROM) || isBlank(EMAIL_TO)) {
|
||||||
|
System.err.println("E-mail se neodeslal: doplň konstanty SMTP_USER/SMTP_APP_PASSWORD/EMAIL_FROM/EMAIL_TO.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
Properties props = new Properties();
|
||||||
|
props.put("mail.smtp.auth", "true");
|
||||||
|
props.put("mail.smtp.starttls.enable", "true");
|
||||||
|
props.put("mail.smtp.host", SMTP_HOST);
|
||||||
|
props.put("mail.smtp.port", SMTP_PORT);
|
||||||
|
|
||||||
|
Session session = Session.getInstance(props, new Authenticator() {
|
||||||
|
@Override
|
||||||
|
protected PasswordAuthentication getPasswordAuthentication() {
|
||||||
|
return new PasswordAuthentication(SMTP_USER, SMTP_APP_PASSWORD);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Message message = new MimeMessage(session);
|
||||||
|
message.setFrom(new InternetAddress(EMAIL_FROM));
|
||||||
|
message.setRecipients(Message.RecipientType.TO, InternetAddress.parse(EMAIL_TO));
|
||||||
|
message.setSubject(subject);
|
||||||
|
message.setText(body);
|
||||||
|
Transport.send(message);
|
||||||
|
System.out.println("E-mail odeslán na: " + EMAIL_TO);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Nepodařilo se odeslat e-mail: " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isBlank(String value) {
|
||||||
|
return value == null || value.trim().isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Map<String, String> extractArticles(String content) {
|
||||||
|
Map<String, String> result = new java.util.LinkedHashMap<>();
|
||||||
|
|
||||||
|
// 1) Primární zdroj: obsah "news" objektů -> localized.title + slug
|
||||||
|
Pattern localizedTitleSlugPattern = Pattern.compile("\"localized\"\\s*:\\s*\\{[\\s\\S]*?\"title\"\\s*:\\s*\"((?:\\\\.|[^\"])*)\"[\\s\\S]*?\\}[\\s\\S]*?\"slug\"\\s*:\\s*\"([a-z0-9\\-]+)\"", Pattern.CASE_INSENSITIVE);
|
||||||
|
Matcher localizedTitleSlugMatcher = localizedTitleSlugPattern.matcher(content);
|
||||||
|
|
||||||
|
while (localizedTitleSlugMatcher.find()) {
|
||||||
|
String title = normalizeTitle(localizedTitleSlugMatcher.group(1));
|
||||||
|
String slug = localizedTitleSlugMatcher.group(2);
|
||||||
|
String url = buildArticleUrl(slug);
|
||||||
|
result.put(url, isBlank(title) ? titleFromSlug(slug) : title);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) TopNews/karusel: title + link.url
|
||||||
|
Pattern titleLinkPattern = Pattern.compile("\"title\"\\s*:\\s*\"((?:\\\\.|[^\"])*)\"\\s*,\\s*\"link\"\\s*:\\s*\\{\\s*\"url\"\\s*:\\s*\"(https://www\\.championsleague\\.basketball/en/youth/news/[a-z0-9\\-]+)\"", Pattern.CASE_INSENSITIVE);
|
||||||
|
Matcher titleLinkMatcher = titleLinkPattern.matcher(content);
|
||||||
|
|
||||||
|
while (titleLinkMatcher.find()) {
|
||||||
|
String title = normalizeTitle(titleLinkMatcher.group(1));
|
||||||
|
String url = titleLinkMatcher.group(2);
|
||||||
|
String slug = extractSlugFromUrl(url);
|
||||||
|
result.putIfAbsent(url, isBlank(title) ? titleFromSlug(slug) : title);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) Fallback: přímé URL
|
||||||
|
Pattern linkPattern = Pattern.compile("https://www\\.championsleague\\.basketball/en/youth/news/[a-z0-9\\-]+", Pattern.CASE_INSENSITIVE);
|
||||||
|
Matcher matcher = linkPattern.matcher(content);
|
||||||
|
|
||||||
|
while (matcher.find()) {
|
||||||
|
String url = matcher.group();
|
||||||
|
result.putIfAbsent(url, titleFromSlug(extractSlugFromUrl(url)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4) Fallback: pouze slug
|
||||||
|
Pattern slugPattern = Pattern.compile("\"slug\":\"([a-z0-9\\-]+)\"");
|
||||||
|
Matcher slugMatcher = slugPattern.matcher(content);
|
||||||
|
|
||||||
|
while (slugMatcher.find()) {
|
||||||
|
String slug = slugMatcher.group(1);
|
||||||
|
String url = buildArticleUrl(slug);
|
||||||
|
result.putIfAbsent(url, titleFromSlug(slug));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String buildArticleUrl(String slug) {
|
||||||
|
return "https://www.championsleague.basketball/en/youth/news/" + slug;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String extractSlugFromUrl(String url) {
|
||||||
|
int idx = url.lastIndexOf('/');
|
||||||
|
return idx >= 0 ? url.substring(idx + 1) : url;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String titleFromSlug(String slug) {
|
||||||
|
if (isBlank(slug)) {
|
||||||
|
return "Neznámý název";
|
||||||
|
}
|
||||||
|
String[] parts = slug.split("-");
|
||||||
|
StringBuilder title = new StringBuilder();
|
||||||
|
for (String part : parts) {
|
||||||
|
if (part.isBlank()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (title.length() != 0) {
|
||||||
|
title.append(' ');
|
||||||
|
}
|
||||||
|
title.append(part.substring(0, 1).toUpperCase(Locale.ROOT));
|
||||||
|
if (part.length() > 1) {
|
||||||
|
title.append(part.substring(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return title.length() == 0 ? slug : title.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String normalizeTitle(String rawTitle) {
|
||||||
|
if (rawTitle == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return rawTitle
|
||||||
|
.replace("\\\"", "\"")
|
||||||
|
.replace("\\/", "/")
|
||||||
|
.replace("\\n", " ")
|
||||||
|
.replace("\\t", " ")
|
||||||
|
.replace("\\u0026", "&")
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user