tag analyse v1
This commit is contained in:
parent
9e3e2ce15d
commit
7c62023e6f
@ -21,7 +21,8 @@
|
|||||||
"teewurst\/pipeline": "^3.0",
|
"teewurst\/pipeline": "^3.0",
|
||||||
"guzzlehttp\/guzzle": "^7.8",
|
"guzzlehttp\/guzzle": "^7.8",
|
||||||
"micilini\/video-stream": "^1.0",
|
"micilini\/video-stream": "^1.0",
|
||||||
"nesbot\/carbon": "^3.0"
|
"nesbot\/carbon": "^3.0",
|
||||||
|
"ext-iconv": "*"
|
||||||
},
|
},
|
||||||
"autoload": {
|
"autoload": {
|
||||||
"psr-4": {
|
"psr-4": {
|
||||||
|
|||||||
@ -21,7 +21,8 @@
|
|||||||
"teewurst/pipeline": "^3.0",
|
"teewurst/pipeline": "^3.0",
|
||||||
"guzzlehttp/guzzle": "^7.8",
|
"guzzlehttp/guzzle": "^7.8",
|
||||||
"micilini/video-stream": "^1.0",
|
"micilini/video-stream": "^1.0",
|
||||||
"nesbot/carbon": "^3.0"
|
"nesbot/carbon": "^3.0",
|
||||||
|
"ext-iconv": "*"
|
||||||
},
|
},
|
||||||
"autoload": {
|
"autoload": {
|
||||||
"psr-4": {
|
"psr-4": {
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
use MyTube\API\Console\Command\AnalyzeTagsCommand;
|
||||||
use MyTube\API\Console\Command\AnalyzeVideoTitlesCommand;
|
use MyTube\API\Console\Command\AnalyzeVideoTitlesCommand;
|
||||||
use MyTube\API\Console\Command\InitializeDataCommand;
|
use MyTube\API\Console\Command\InitializeDataCommand;
|
||||||
use MyTube\API\Console\Command\RbacUpdateCommand;
|
use MyTube\API\Console\Command\RbacUpdateCommand;
|
||||||
@ -11,5 +12,6 @@ return [
|
|||||||
RbacUpdateCommand::class,
|
RbacUpdateCommand::class,
|
||||||
AnalyzeVideoTitlesCommand::class,
|
AnalyzeVideoTitlesCommand::class,
|
||||||
ReadUntaggedVideosCommand::class,
|
ReadUntaggedVideosCommand::class,
|
||||||
|
AnalyzeTagsCommand::class,
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
use MyTube\API\Console\Command\AnalyzeTagsCommand;
|
||||||
use MyTube\API\Console\Command\AnalyzeVideoTitlesCommand;
|
use MyTube\API\Console\Command\AnalyzeVideoTitlesCommand;
|
||||||
use MyTube\API\Console\Command\InitializeDataCommand;
|
use MyTube\API\Console\Command\InitializeDataCommand;
|
||||||
use MyTube\API\Console\Command\RbacUpdateCommand;
|
use MyTube\API\Console\Command\RbacUpdateCommand;
|
||||||
@ -11,6 +12,7 @@ return [
|
|||||||
InitializeDataCommand::class => AutoWiringFactory::class,
|
InitializeDataCommand::class => AutoWiringFactory::class,
|
||||||
RbacUpdateCommand::class => AutoWiringFactory::class,
|
RbacUpdateCommand::class => AutoWiringFactory::class,
|
||||||
AnalyzeVideoTitlesCommand::class => AutoWiringFactory::class,
|
AnalyzeVideoTitlesCommand::class => AutoWiringFactory::class,
|
||||||
|
AnalyzeTagsCommand::class => AutoWiringFactory::class,
|
||||||
ReadUntaggedVideosCommand::class => AutoWiringFactory::class,
|
ReadUntaggedVideosCommand::class => AutoWiringFactory::class,
|
||||||
],
|
],
|
||||||
];
|
];
|
||||||
|
|||||||
133
src/ApiDomain/Console/src/Command/AnalyzeTagsCommand.php
Normal file
133
src/ApiDomain/Console/src/Command/AnalyzeTagsCommand.php
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace MyTube\API\Console\Command;
|
||||||
|
|
||||||
|
use MyTube\Data\Business\Entity\Tag;
|
||||||
|
use MyTube\Data\Business\Entity\Video;
|
||||||
|
use MyTube\Data\Business\Manager\MyTubeEntityManager;
|
||||||
|
use MyTube\Data\Business\Repository\VideoRepository;
|
||||||
|
use MyTube\Handling\Tag\Rule\IsTagSubstringRule;
|
||||||
|
use MyTube\Handling\Video\Analyzer\VideoDurationAnalyzer;
|
||||||
|
use MyTube\Handling\Video\Analyzer\VideoTitleAnalyzer;
|
||||||
|
use MyTube\Infrastructure\Logging\Logger\Logger;
|
||||||
|
use Symfony\Component\Console\Attribute\AsCommand;
|
||||||
|
use Symfony\Component\Console\Command\Command;
|
||||||
|
use Symfony\Component\Console\Input\InputInterface;
|
||||||
|
use Symfony\Component\Console\Output\OutputInterface;
|
||||||
|
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||||
|
use function Webmozart\Assert\Tests\StaticAnalysis\length;
|
||||||
|
|
||||||
|
#[AsCommand(name: 'analyze:tags', description: 'Analyzes video titles and add tags')]
|
||||||
|
class AnalyzeTagsCommand extends Command
|
||||||
|
{
|
||||||
|
private readonly VideoRepository $videoRepository;
|
||||||
|
|
||||||
|
public function __construct(
|
||||||
|
private readonly MyTubeEntityManager $entityManager,
|
||||||
|
private readonly IsTagSubstringRule $isTagSubstringRule,
|
||||||
|
private readonly Logger $logger,
|
||||||
|
) {
|
||||||
|
parent::__construct($this->getName());
|
||||||
|
|
||||||
|
$this->videoRepository = $this->entityManager->getRepository(Video::class);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function execute(
|
||||||
|
InputInterface $input,
|
||||||
|
OutputInterface $output
|
||||||
|
): int {
|
||||||
|
$io = new SymfonyStyle($input, $output);
|
||||||
|
|
||||||
|
try {
|
||||||
|
$videos = $this->videoRepository->findAll();
|
||||||
|
|
||||||
|
/** @var Video $video */
|
||||||
|
foreach ($videos as $video) {
|
||||||
|
$comments[] = $video->getTitle();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Normalisierte Kommentare
|
||||||
|
$normalized_comments = array_map([$this, 'normalize'], $comments);
|
||||||
|
|
||||||
|
// Tokenisierung und Wortzählung
|
||||||
|
$word_counts = [];
|
||||||
|
foreach ($normalized_comments as $comment) {
|
||||||
|
$words = explode(' ', $comment);
|
||||||
|
foreach ($words as $word) {
|
||||||
|
if ($word) {
|
||||||
|
if (!isset($word_counts[$word])) {
|
||||||
|
$word_counts[$word] = 0;
|
||||||
|
}
|
||||||
|
$word_counts[$word]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Konsolidierung der Wörter unter Berücksichtigung von Tippfehlern
|
||||||
|
$corrected_word_counts = [];
|
||||||
|
$dictionary = array_keys($word_counts);
|
||||||
|
foreach ($word_counts as $word => $count) {
|
||||||
|
$correct_word = $this->correct_typo($word, $dictionary);
|
||||||
|
if (!isset($corrected_word_counts[$correct_word])) {
|
||||||
|
$corrected_word_counts[$correct_word] = 0;
|
||||||
|
}
|
||||||
|
$corrected_word_counts[$correct_word] += $count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sortieren nach Häufigkeit
|
||||||
|
arsort($corrected_word_counts);
|
||||||
|
$corrected_word_counts = array_reverse($corrected_word_counts);
|
||||||
|
|
||||||
|
// Ausgabe der häufigsten Wörter
|
||||||
|
foreach ($corrected_word_counts as $word => $count) {
|
||||||
|
if ($count > 3 && !$this->isTagSubstringRule->appliesTo($word)) {
|
||||||
|
echo $word . ": " . $count . "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
$io->success('OK!');
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
$io->error($e->getMessage());
|
||||||
|
$io->error($e->getTraceAsString());
|
||||||
|
$this->logger->error($e->getMessage(), ['exception' => $e]);
|
||||||
|
return Command::FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Command::SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function normalize($text) {
|
||||||
|
// Kleinbuchstaben
|
||||||
|
$text = mb_strtolower($text);
|
||||||
|
// Akzente entfernen
|
||||||
|
$text = iconv('UTF-8', 'ASCII//TRANSLIT', $text);
|
||||||
|
// Interpunktion entfernen
|
||||||
|
$text = preg_replace("/[^a-z\s]/", "", $text);
|
||||||
|
// Trimmen
|
||||||
|
$text = trim($text);
|
||||||
|
return $text;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Tippfehlerkorrektur mit Levenshtein-Distanz
|
||||||
|
function correct_typo($word, $dictionary) {
|
||||||
|
$closest_word = $word;
|
||||||
|
$shortest_distance = -1;
|
||||||
|
foreach ($dictionary as $dict_word) {
|
||||||
|
$lev = levenshtein($word, $dict_word);
|
||||||
|
if ($lev == 0) {
|
||||||
|
$closest_word = $word;
|
||||||
|
$shortest_distance = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ($lev <= 2 && ($lev < $shortest_distance || $shortest_distance < 0)) {
|
||||||
|
$closest_word = $dict_word;
|
||||||
|
$shortest_distance = $lev;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $closest_word;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -10,17 +10,17 @@ use MyTube\Data\Business\Manager\MyTubeEntityManager;
|
|||||||
|
|
||||||
class AnalyzeVideoRepository
|
class AnalyzeVideoRepository
|
||||||
{
|
{
|
||||||
public function __construct(
|
|
||||||
private readonly MyTubeEntityManager $entityManager
|
|
||||||
) {
|
|
||||||
}
|
|
||||||
|
|
||||||
private const FIELD_MAP = [
|
private const FIELD_MAP = [
|
||||||
'duration' => 'v.duration',
|
'duration' => 'v.duration',
|
||||||
'title' => 'v.title',
|
'title' => 'v.title',
|
||||||
'createdAt' => 'v.createdAt'
|
'createdAt' => 'v.createdAt'
|
||||||
];
|
];
|
||||||
|
|
||||||
|
public function __construct(
|
||||||
|
private readonly MyTubeEntityManager $entityManager
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
public function findByFilter(
|
public function findByFilter(
|
||||||
?string $query,
|
?string $query,
|
||||||
int $page,
|
int $page,
|
||||||
|
|||||||
@ -12,6 +12,7 @@ use MyTube\Handling\Tag\Handler\Query\ReadThumbnail\ReadThumbnailQueryBuilder;
|
|||||||
use MyTube\Handling\Tag\Handler\Query\ReadThumbnail\ReadThumbnailQueryHandler;
|
use MyTube\Handling\Tag\Handler\Query\ReadThumbnail\ReadThumbnailQueryHandler;
|
||||||
use MyTube\Handling\Tag\Handler\Query\ReadVideoList\ReadVideoListQueryBuilder;
|
use MyTube\Handling\Tag\Handler\Query\ReadVideoList\ReadVideoListQueryBuilder;
|
||||||
use MyTube\Handling\Tag\Handler\Query\ReadVideoList\ReadVideoListQueryHandler;
|
use MyTube\Handling\Tag\Handler\Query\ReadVideoList\ReadVideoListQueryHandler;
|
||||||
|
use MyTube\Handling\Tag\Rule\IsTagSubstringRule;
|
||||||
use MyTube\Handling\Tag\Rule\TagAliasExistsRule;
|
use MyTube\Handling\Tag\Rule\TagAliasExistsRule;
|
||||||
use MyTube\Handling\Tag\Rule\TagExistsRule;
|
use MyTube\Handling\Tag\Rule\TagExistsRule;
|
||||||
use Reinfi\DependencyInjection\Factory\AutoWiringFactory;
|
use Reinfi\DependencyInjection\Factory\AutoWiringFactory;
|
||||||
@ -24,6 +25,7 @@ return [
|
|||||||
/// Rule
|
/// Rule
|
||||||
TagExistsRule::class => InjectionFactory::class,
|
TagExistsRule::class => InjectionFactory::class,
|
||||||
TagAliasExistsRule::class => InjectionFactory::class,
|
TagAliasExistsRule::class => InjectionFactory::class,
|
||||||
|
IsTagSubstringRule::class => InjectionFactory::class,
|
||||||
|
|
||||||
/// Builder
|
/// Builder
|
||||||
TagBuilder::class => AutoWiringFactory::class,
|
TagBuilder::class => AutoWiringFactory::class,
|
||||||
|
|||||||
34
src/HandlingDomain/Tag/src/Rule/IsTagSubstringRule.php
Normal file
34
src/HandlingDomain/Tag/src/Rule/IsTagSubstringRule.php
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace MyTube\Handling\Tag\Rule;
|
||||||
|
|
||||||
|
use MyTube\Data\Business\Repository\TagRepository;
|
||||||
|
use Reinfi\DependencyInjection\Annotation\InjectDoctrineRepository;
|
||||||
|
|
||||||
|
class IsTagSubstringRule
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @InjectDoctrineRepository(
|
||||||
|
* entityManager="MyTube\Data\Business\Manager\MyTubeEntityManager",
|
||||||
|
* entity="MyTube\Data\Business\Entity\Tag"
|
||||||
|
* )
|
||||||
|
*/
|
||||||
|
public function __construct(
|
||||||
|
private readonly TagRepository $tagRepository,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public function appliesTo(
|
||||||
|
string $substring,
|
||||||
|
): bool
|
||||||
|
{
|
||||||
|
$substring = "%" . $substring . "%";
|
||||||
|
|
||||||
|
$qb = $this->tagRepository->createQueryBuilder('t')
|
||||||
|
->where('t.description like :substring')
|
||||||
|
->setParameter('substring', $substring);
|
||||||
|
|
||||||
|
|
||||||
|
return count($qb->getQuery()->getResult()) !== 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user