first commit
This commit is contained in:
commit
5de0c22d41
50
.gitignore
vendored
Executable file
50
.gitignore
vendored
Executable file
@ -0,0 +1,50 @@
|
||||
### Eclipse
|
||||
.classpath
|
||||
.project
|
||||
.settings/
|
||||
|
||||
HELP.md
|
||||
target/
|
||||
!.mvn/wrapper/maven-wrapper.jar
|
||||
!**/src/main/**
|
||||
!**/src/test/**
|
||||
|
||||
### STS ###
|
||||
.mvn
|
||||
.apt_generated
|
||||
.classpath
|
||||
.factorypath
|
||||
.project
|
||||
.settings
|
||||
.springBeans
|
||||
.sts4-cache
|
||||
mvnw
|
||||
mvnw.cmd
|
||||
|
||||
### IntelliJ IDEA ###
|
||||
.idea
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
|
||||
### NetBeans ###
|
||||
/nbproject/private/
|
||||
/nbbuild/
|
||||
/dist/
|
||||
/nbdist/
|
||||
/.nb-gradle/
|
||||
build/
|
||||
|
||||
### VS Code ###
|
||||
.vscode/
|
||||
|
||||
### Log4j2 ###
|
||||
logs
|
||||
*.log
|
||||
|
||||
# Ignore Mac DS_Store files
|
||||
.DS_Store
|
||||
|
||||
dependency-reduced-pom.xml
|
||||
|
||||
cert.pem
|
||||
6
Dockerfile
Normal file
6
Dockerfile
Normal file
@ -0,0 +1,6 @@
|
||||
FROM eclipse-temurin:11-jre-alpine
|
||||
WORKDIR /app
|
||||
|
||||
COPY target/ybcl-parser-1.0-SNAPSHOT.jar app.jar
|
||||
|
||||
ENTRYPOINT ["java", "-jar", "/app/app.jar"]
|
||||
38
pom.xml
Normal file
38
pom.xml
Normal file
@ -0,0 +1,38 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>cz.kamma.ybclparser</groupId>
|
||||
<artifactId>ybcl-parser</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>11</maven.compiler.source>
|
||||
<maven.compiler.target>11</maven.compiler.target>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- Jackson -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>2.17.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Email (SMTP) -->
|
||||
<dependency>
|
||||
<groupId>com.sun.mail</groupId>
|
||||
<artifactId>jakarta.mail</artifactId>
|
||||
<version>2.0.1</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Lombok -->
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>1.18.32</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
271
src/main/java/cz/kamma/ybclparser/YbclParser.java
Normal file
271
src/main/java/cz/kamma/ybclparser/YbclParser.java
Normal file
@ -0,0 +1,271 @@
|
||||
package cz.kamma.ybclparser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import jakarta.mail.Authenticator;
|
||||
import jakarta.mail.Message;
|
||||
import jakarta.mail.PasswordAuthentication;
|
||||
import jakarta.mail.Session;
|
||||
import jakarta.mail.Transport;
|
||||
import jakarta.mail.internet.InternetAddress;
|
||||
import jakarta.mail.internet.MimeMessage;
|
||||
|
||||
public class YbclParser {
|
||||
|
||||
private static final String BASE_URL = "https://www.championsleague.basketball/en/youth/news";
|
||||
private static final int CHECK_INTERVAL_MINUTES = 5;
|
||||
private static final HttpClient CLIENT = HttpClient.newHttpClient();
|
||||
private static final Set<String> knownArticles = new HashSet<>();
|
||||
private static final String SMTP_HOST = "smtp.gmail.com";
|
||||
private static final String SMTP_PORT = "587";
|
||||
private static final String SMTP_USER = "kamma.cz@gmail.com";
|
||||
private static final String SMTP_APP_PASSWORD = "fohq wkzu alnj iepi";
|
||||
private static final String EMAIL_FROM = SMTP_USER;
|
||||
private static final String EMAIL_TO = "kamma.cz@gmail.com";
|
||||
|
||||
private static final class ArticleInfo {
|
||||
private final String url;
|
||||
private final String title;
|
||||
|
||||
private ArticleInfo(String url, String title) {
|
||||
this.url = url;
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
private String getUrl() {
|
||||
return url;
|
||||
}
|
||||
|
||||
private String getTitle() {
|
||||
return title;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String content = download(BASE_URL);
|
||||
Map<String, String> initialArticles = extractArticles(content);
|
||||
|
||||
knownArticles.addAll(initialArticles.keySet());
|
||||
System.out.println("První průchod dokončen (" + LocalDateTime.now() + ").");
|
||||
System.out.println("Uloženo článků do paměti: " + knownArticles.size());
|
||||
|
||||
ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
|
||||
scheduler.scheduleAtFixedRate(YbclParser::checkForNewArticles, CHECK_INTERVAL_MINUTES, CHECK_INTERVAL_MINUTES, TimeUnit.MINUTES);
|
||||
|
||||
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
|
||||
System.out.println("Ukončuji plánovač.");
|
||||
scheduler.shutdownNow();
|
||||
}));
|
||||
|
||||
System.out.println("Monitoring spuštěn. Kontrola každých " + CHECK_INTERVAL_MINUTES + " minut.");
|
||||
sendStartupEmail();
|
||||
}
|
||||
|
||||
private static String download(String url) throws IOException, InterruptedException {
|
||||
HttpRequest request = HttpRequest.newBuilder()
|
||||
.uri(URI.create(url))
|
||||
.GET()
|
||||
.build();
|
||||
|
||||
HttpResponse<String> response = CLIENT.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
|
||||
return response.body();
|
||||
}
|
||||
|
||||
private static void checkForNewArticles() {
|
||||
try {
|
||||
String content = download(BASE_URL);
|
||||
Map<String, String> currentArticles = extractArticles(content);
|
||||
|
||||
List<ArticleInfo> newArticles = new ArrayList<>();
|
||||
for (Map.Entry<String, String> article : currentArticles.entrySet()) {
|
||||
if (!knownArticles.contains(article.getKey())) {
|
||||
newArticles.add(new ArticleInfo(article.getKey(), article.getValue()));
|
||||
}
|
||||
}
|
||||
|
||||
if (newArticles.isEmpty()) {
|
||||
System.out.println("[" + LocalDateTime.now() + "] Žádný nový článek.");
|
||||
return;
|
||||
}
|
||||
|
||||
newArticles.sort(Comparator.comparing(ArticleInfo::getUrl));
|
||||
newArticles.forEach(article -> knownArticles.add(article.getUrl()));
|
||||
|
||||
System.out.println("[" + LocalDateTime.now() + "] Nalezeno nových článků: " + newArticles.size());
|
||||
newArticles.forEach(article -> System.out.println(article.getTitle() + " -> " + article.getUrl()));
|
||||
|
||||
sendEmailNotification(newArticles);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Chyba při kontrole článků: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static void sendEmailNotification(List<ArticleInfo> newArticles) {
|
||||
StringBuilder body = new StringBuilder("Byly nalezeny nové články:\n\n");
|
||||
for (ArticleInfo article : newArticles) {
|
||||
body.append(article.getTitle()).append('\n');
|
||||
body.append(article.getUrl()).append("\n\n");
|
||||
}
|
||||
body.append("\nČas kontroly: ").append(LocalDateTime.now());
|
||||
|
||||
sendEmail(
|
||||
"YBCL: nalezeny nové články (" + newArticles.size() + ")",
|
||||
body.toString()
|
||||
);
|
||||
}
|
||||
|
||||
private static void sendStartupEmail() {
|
||||
String body = "Aplikace YBCL parser byla úspěšně spuštěna.\n"
|
||||
+ "Čas spuštění: " + LocalDateTime.now() + "\n"
|
||||
+ "Interval kontroly: " + CHECK_INTERVAL_MINUTES + " minut.\n"
|
||||
+ "Počet načtených článků: " + knownArticles.size() + ".";
|
||||
|
||||
sendEmail("YBCL: aplikace běží", body);
|
||||
}
|
||||
|
||||
private static void sendEmail(String subject, String body) {
|
||||
if (isBlank(SMTP_USER) || isBlank(SMTP_APP_PASSWORD) || isBlank(EMAIL_FROM) || isBlank(EMAIL_TO)) {
|
||||
System.err.println("E-mail se neodeslal: doplň konstanty SMTP_USER/SMTP_APP_PASSWORD/EMAIL_FROM/EMAIL_TO.");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
Properties props = new Properties();
|
||||
props.put("mail.smtp.auth", "true");
|
||||
props.put("mail.smtp.starttls.enable", "true");
|
||||
props.put("mail.smtp.host", SMTP_HOST);
|
||||
props.put("mail.smtp.port", SMTP_PORT);
|
||||
|
||||
Session session = Session.getInstance(props, new Authenticator() {
|
||||
@Override
|
||||
protected PasswordAuthentication getPasswordAuthentication() {
|
||||
return new PasswordAuthentication(SMTP_USER, SMTP_APP_PASSWORD);
|
||||
}
|
||||
});
|
||||
|
||||
Message message = new MimeMessage(session);
|
||||
message.setFrom(new InternetAddress(EMAIL_FROM));
|
||||
message.setRecipients(Message.RecipientType.TO, InternetAddress.parse(EMAIL_TO));
|
||||
message.setSubject(subject);
|
||||
message.setText(body);
|
||||
Transport.send(message);
|
||||
System.out.println("E-mail odeslán na: " + EMAIL_TO);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Nepodařilo se odeslat e-mail: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isBlank(String value) {
|
||||
return value == null || value.trim().isEmpty();
|
||||
}
|
||||
|
||||
private static Map<String, String> extractArticles(String content) {
|
||||
Map<String, String> result = new java.util.LinkedHashMap<>();
|
||||
|
||||
// 1) Primární zdroj: obsah "news" objektů -> localized.title + slug
|
||||
Pattern localizedTitleSlugPattern = Pattern.compile("\"localized\"\\s*:\\s*\\{[\\s\\S]*?\"title\"\\s*:\\s*\"((?:\\\\.|[^\"])*)\"[\\s\\S]*?\\}[\\s\\S]*?\"slug\"\\s*:\\s*\"([a-z0-9\\-]+)\"", Pattern.CASE_INSENSITIVE);
|
||||
Matcher localizedTitleSlugMatcher = localizedTitleSlugPattern.matcher(content);
|
||||
|
||||
while (localizedTitleSlugMatcher.find()) {
|
||||
String title = normalizeTitle(localizedTitleSlugMatcher.group(1));
|
||||
String slug = localizedTitleSlugMatcher.group(2);
|
||||
String url = buildArticleUrl(slug);
|
||||
result.put(url, isBlank(title) ? titleFromSlug(slug) : title);
|
||||
}
|
||||
|
||||
// 2) TopNews/karusel: title + link.url
|
||||
Pattern titleLinkPattern = Pattern.compile("\"title\"\\s*:\\s*\"((?:\\\\.|[^\"])*)\"\\s*,\\s*\"link\"\\s*:\\s*\\{\\s*\"url\"\\s*:\\s*\"(https://www\\.championsleague\\.basketball/en/youth/news/[a-z0-9\\-]+)\"", Pattern.CASE_INSENSITIVE);
|
||||
Matcher titleLinkMatcher = titleLinkPattern.matcher(content);
|
||||
|
||||
while (titleLinkMatcher.find()) {
|
||||
String title = normalizeTitle(titleLinkMatcher.group(1));
|
||||
String url = titleLinkMatcher.group(2);
|
||||
String slug = extractSlugFromUrl(url);
|
||||
result.putIfAbsent(url, isBlank(title) ? titleFromSlug(slug) : title);
|
||||
}
|
||||
|
||||
// 3) Fallback: přímé URL
|
||||
Pattern linkPattern = Pattern.compile("https://www\\.championsleague\\.basketball/en/youth/news/[a-z0-9\\-]+", Pattern.CASE_INSENSITIVE);
|
||||
Matcher matcher = linkPattern.matcher(content);
|
||||
|
||||
while (matcher.find()) {
|
||||
String url = matcher.group();
|
||||
result.putIfAbsent(url, titleFromSlug(extractSlugFromUrl(url)));
|
||||
}
|
||||
|
||||
// 4) Fallback: pouze slug
|
||||
Pattern slugPattern = Pattern.compile("\"slug\":\"([a-z0-9\\-]+)\"");
|
||||
Matcher slugMatcher = slugPattern.matcher(content);
|
||||
|
||||
while (slugMatcher.find()) {
|
||||
String slug = slugMatcher.group(1);
|
||||
String url = buildArticleUrl(slug);
|
||||
result.putIfAbsent(url, titleFromSlug(slug));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static String buildArticleUrl(String slug) {
|
||||
return "https://www.championsleague.basketball/en/youth/news/" + slug;
|
||||
}
|
||||
|
||||
private static String extractSlugFromUrl(String url) {
|
||||
int idx = url.lastIndexOf('/');
|
||||
return idx >= 0 ? url.substring(idx + 1) : url;
|
||||
}
|
||||
|
||||
private static String titleFromSlug(String slug) {
|
||||
if (isBlank(slug)) {
|
||||
return "Neznámý název";
|
||||
}
|
||||
String[] parts = slug.split("-");
|
||||
StringBuilder title = new StringBuilder();
|
||||
for (String part : parts) {
|
||||
if (part.isBlank()) {
|
||||
continue;
|
||||
}
|
||||
if (title.length() != 0) {
|
||||
title.append(' ');
|
||||
}
|
||||
title.append(part.substring(0, 1).toUpperCase(Locale.ROOT));
|
||||
if (part.length() > 1) {
|
||||
title.append(part.substring(1));
|
||||
}
|
||||
}
|
||||
return title.length() == 0 ? slug : title.toString();
|
||||
}
|
||||
|
||||
private static String normalizeTitle(String rawTitle) {
|
||||
if (rawTitle == null) {
|
||||
return null;
|
||||
}
|
||||
return rawTitle
|
||||
.replace("\\\"", "\"")
|
||||
.replace("\\/", "/")
|
||||
.replace("\\n", " ")
|
||||
.replace("\\t", " ")
|
||||
.replace("\\u0026", "&")
|
||||
.trim();
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user