diff --git a/composer.development.json b/composer.development.json index bc3ac47..5f4aad0 100644 --- a/composer.development.json +++ b/composer.development.json @@ -21,7 +21,8 @@ "teewurst\/pipeline": "^3.0", "guzzlehttp\/guzzle": "^7.8", "micilini\/video-stream": "^1.0", - "nesbot\/carbon": "^3.0" + "nesbot\/carbon": "^3.0", + "ext-iconv": "*" }, "autoload": { "psr-4": { diff --git a/composer.json b/composer.json index ffd239f..7fd2462 100644 --- a/composer.json +++ b/composer.json @@ -21,7 +21,8 @@ "teewurst/pipeline": "^3.0", "guzzlehttp/guzzle": "^7.8", "micilini/video-stream": "^1.0", - "nesbot/carbon": "^3.0" + "nesbot/carbon": "^3.0", + "ext-iconv": "*" }, "autoload": { "psr-4": { diff --git a/src/ApiDomain/Console/config/console.php b/src/ApiDomain/Console/config/console.php index 1c9a59c..bfb7b9d 100644 --- a/src/ApiDomain/Console/config/console.php +++ b/src/ApiDomain/Console/config/console.php @@ -1,5 +1,6 @@ AutoWiringFactory::class, RbacUpdateCommand::class => AutoWiringFactory::class, AnalyzeVideoTitlesCommand::class => AutoWiringFactory::class, + AnalyzeTagsCommand::class => AutoWiringFactory::class, ReadUntaggedVideosCommand::class => AutoWiringFactory::class, ], ]; diff --git a/src/ApiDomain/Console/src/Command/AnalyzeTagsCommand.php b/src/ApiDomain/Console/src/Command/AnalyzeTagsCommand.php new file mode 100644 index 0000000..d43ecd3 --- /dev/null +++ b/src/ApiDomain/Console/src/Command/AnalyzeTagsCommand.php @@ -0,0 +1,133 @@ +getName()); + + $this->videoRepository = $this->entityManager->getRepository(Video::class); + } + + protected function execute( + InputInterface $input, + OutputInterface $output + ): int { + $io = new SymfonyStyle($input, $output); + + try { + $videos = $this->videoRepository->findAll(); + + /** @var Video $video */ + foreach ($videos as $video) { + $comments[] = $video->getTitle(); + } + + + // Normalisierte Kommentare + $normalized_comments = array_map([$this, 'normalize'], $comments); + + // Tokenisierung und Wortzählung + $word_counts = []; + foreach ($normalized_comments as $comment) { + $words = explode(' ', $comment); + foreach ($words as $word) { + if ($word) { + if (!isset($word_counts[$word])) { + $word_counts[$word] = 0; + } + $word_counts[$word]++; + } + } + } + + // Konsolidierung der Wörter unter Berücksichtigung von Tippfehlern + $corrected_word_counts = []; + $dictionary = array_keys($word_counts); + foreach ($word_counts as $word => $count) { + $correct_word = $this->correct_typo($word, $dictionary); + if (!isset($corrected_word_counts[$correct_word])) { + $corrected_word_counts[$correct_word] = 0; + } + $corrected_word_counts[$correct_word] += $count; + } + + // Sortieren nach Häufigkeit + arsort($corrected_word_counts); + $corrected_word_counts = array_reverse($corrected_word_counts); + + // Ausgabe der häufigsten Wörter + foreach ($corrected_word_counts as $word => $count) { + if ($count > 3 && !$this->isTagSubstringRule->appliesTo($word)) { + echo $word . ": " . $count . "\n"; + } + } + + + $io->success('OK!'); + } catch (\Throwable $e) { + $io->error($e->getMessage()); + $io->error($e->getTraceAsString()); + $this->logger->error($e->getMessage(), ['exception' => $e]); + return Command::FAILURE; + } + + return Command::SUCCESS; + } + + + function normalize($text) { + // Kleinbuchstaben + $text = mb_strtolower($text); + // Akzente entfernen + $text = iconv('UTF-8', 'ASCII//TRANSLIT', $text); + // Interpunktion entfernen + $text = preg_replace("/[^a-z\s]/", "", $text); + // Trimmen + $text = trim($text); + return $text; + } + + +// Tippfehlerkorrektur mit Levenshtein-Distanz + function correct_typo($word, $dictionary) { + $closest_word = $word; + $shortest_distance = -1; + foreach ($dictionary as $dict_word) { + $lev = levenshtein($word, $dict_word); + if ($lev == 0) { + $closest_word = $word; + $shortest_distance = 0; + break; + } + if ($lev <= 2 && ($lev < $shortest_distance || $shortest_distance < 0)) { + $closest_word = $dict_word; + $shortest_distance = $lev; + } + } + return $closest_word; + } +} diff --git a/src/HandlingDomain/Analyze/src/Repository/AnalyzeVideoRepository.php b/src/HandlingDomain/Analyze/src/Repository/AnalyzeVideoRepository.php index 36b4dad..d2ddaef 100644 --- a/src/HandlingDomain/Analyze/src/Repository/AnalyzeVideoRepository.php +++ b/src/HandlingDomain/Analyze/src/Repository/AnalyzeVideoRepository.php @@ -10,17 +10,17 @@ use MyTube\Data\Business\Manager\MyTubeEntityManager; class AnalyzeVideoRepository { - public function __construct( - private readonly MyTubeEntityManager $entityManager - ) { - } - private const FIELD_MAP = [ 'duration' => 'v.duration', 'title' => 'v.title', 'createdAt' => 'v.createdAt' ]; + public function __construct( + private readonly MyTubeEntityManager $entityManager + ) { + } + public function findByFilter( ?string $query, int $page, diff --git a/src/HandlingDomain/Tag/config/service_manager.php b/src/HandlingDomain/Tag/config/service_manager.php index 7e62485..a369ed8 100644 --- a/src/HandlingDomain/Tag/config/service_manager.php +++ b/src/HandlingDomain/Tag/config/service_manager.php @@ -12,6 +12,7 @@ use MyTube\Handling\Tag\Handler\Query\ReadThumbnail\ReadThumbnailQueryBuilder; use MyTube\Handling\Tag\Handler\Query\ReadThumbnail\ReadThumbnailQueryHandler; use MyTube\Handling\Tag\Handler\Query\ReadVideoList\ReadVideoListQueryBuilder; use MyTube\Handling\Tag\Handler\Query\ReadVideoList\ReadVideoListQueryHandler; +use MyTube\Handling\Tag\Rule\IsTagSubstringRule; use MyTube\Handling\Tag\Rule\TagAliasExistsRule; use MyTube\Handling\Tag\Rule\TagExistsRule; use Reinfi\DependencyInjection\Factory\AutoWiringFactory; @@ -24,6 +25,7 @@ return [ /// Rule TagExistsRule::class => InjectionFactory::class, TagAliasExistsRule::class => InjectionFactory::class, + IsTagSubstringRule::class => InjectionFactory::class, /// Builder TagBuilder::class => AutoWiringFactory::class, diff --git a/src/HandlingDomain/Tag/src/Rule/IsTagSubstringRule.php b/src/HandlingDomain/Tag/src/Rule/IsTagSubstringRule.php new file mode 100644 index 0000000..424f820 --- /dev/null +++ b/src/HandlingDomain/Tag/src/Rule/IsTagSubstringRule.php @@ -0,0 +1,34 @@ +tagRepository->createQueryBuilder('t') + ->where('t.description like :substring') + ->setParameter('substring', $substring); + + + return count($qb->getQuery()->getResult()) !== 0; + } +}