Skip to content

Commit

Permalink
Migrate spam handling to new frozen mechanism and some more cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
Seldaek committed Jan 10, 2024
1 parent 9b8cfaa commit 22ada3b
Show file tree
Hide file tree
Showing 16 changed files with 75 additions and 49 deletions.
2 changes: 1 addition & 1 deletion src/Command/ClearVersionsCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
foreach ($packageNames as $name) {
$package = $packageRepo->findOneBy(['name' => $name]);
if ($package !== null) {
$package->setCrawledAt(new \DateTime);
$package->setCrawledAt(new \DateTimeImmutable());
$em->persist($package);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/Command/DumpPackagesCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
SELECT p.id
FROM package p
LEFT JOIN download d ON (d.id = p.id AND d.type = 1)
WHERE (replacementPackage != "spam/spam" OR replacementPackage IS NULL)
WHERE p.frozen IS NULL
AND (d.total > 1000 OR d.lastUpdated > :date)
ORDER BY p.id ASC
', ['date' => date('Y-m-d H:i:s', strtotime('-4months'))]);
Expand Down
2 changes: 1 addition & 1 deletion src/Command/DumpPackagesV2Command.php
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
try {
while ($iterations--) {
if ($force) {
$ids = $this->getEM()->getConnection()->fetchFirstColumn('SELECT id FROM package WHERE replacementPackage != "spam/spam" OR replacementPackage IS NULL ORDER BY id ASC');
$ids = $this->getEM()->getConnection()->fetchFirstColumn('SELECT id FROM package WHERE frozen IS NULL ORDER BY id ASC');
} else {
$ids = $this->getEM()->getRepository(Package::class)->getStalePackagesForDumpingV2();
}
Expand Down
3 changes: 2 additions & 1 deletion src/Command/IndexPackagesCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use Algolia\AlgoliaSearch\SearchClient;
use App\Entity\Package;
use App\Entity\PackageFreezeReason;
use App\Model\DownloadManager;
use App\Model\FavoriteManager;
use Composer\Pcre\Preg;
Expand Down Expand Up @@ -138,7 +139,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
}

// delete spam packages from the search index
if ($package->isAbandoned() && $package->getReplacementPackage() === 'spam/spam') {
if ($package->isFrozen() && $package->getFreezeReason() === PackageFreezeReason::Spam) {
try {
$index->deleteObject($package->getName());
$idsToUpdate[] = $package->getId();
Expand Down
15 changes: 8 additions & 7 deletions src/Controller/PackageController.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
namespace App\Controller;

use App\Entity\Dependent;
use App\Entity\PackageFreezeReason;
use App\Entity\PhpStat;
use App\Security\Voter\PackageActions;
use App\SecurityAdvisory\GitHubSecurityAdvisoriesSource;
Expand Down Expand Up @@ -462,7 +463,7 @@ public function markSafeAction(Request $req): RedirectResponse

#[IsGranted('ROLE_ADMIN')]
#[Route(path: '/package/{name}/unfreeze', name: 'unfreeze_package', requirements: ['name' => '[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+?'], defaults: ['_format' => 'html'], methods: ['POST'])]
public function unfreezePackageAction(Request $req, string $name, CsrfTokenManagerInterface $csrfTokenManager): RedirectResponse
public function unfreezePackageAction(Request $req, string $name, CsrfTokenManagerInterface $csrfTokenManager): Response
{
if (!$this->isCsrfTokenValid('unfreeze', (string) $req->request->get('token'))) {
throw new BadRequestHttpException('Invalid CSRF token');
Expand Down Expand Up @@ -508,7 +509,7 @@ public function viewPackageAction(Request $req, string $name, CsrfTokenManagerIn
return $package;
}

if ($package->isAbandoned() && $package->getReplacementPackage() === 'spam/spam' && !$this->isGranted('ROLE_ADMIN')) {
if ($package->isFrozen() && $package->getFreezeReason() === PackageFreezeReason::Spam && !$this->isGranted('ROLE_ANTISPAM')) {
throw new NotFoundHttpException('This is a spam package');
}

Expand Down Expand Up @@ -818,7 +819,7 @@ public function updatePackageAction(Request $req, string $name): Response
return new JsonResponse(['status' => 'error', 'message' => 'Package not found'], 404);
}

if ($package->isAbandoned() && $package->getReplacementPackage() === 'spam/spam') {
if ($package->isFrozen() && $package->getFreezeReason() === PackageFreezeReason::Spam) {
throw new NotFoundHttpException('This is a spam package');
}

Expand Down Expand Up @@ -1017,8 +1018,8 @@ public function abandonAction(Request $request, #[MapEntity] Package $package, #
$package->setAbandoned(true);
$package->setReplacementPackage(str_replace('https://packagist.org/packages/', '', (string) $form->get('replacement')->getData()));
$package->setIndexedAt(null);
$package->setCrawledAt(new DateTime());
$package->setUpdatedAt(new DateTime());
$package->setCrawledAt(new DateTimeImmutable());
$package->setUpdatedAt(new DateTimeImmutable());
$package->setDumpedAt(null);
$package->setDumpedAtV2(null);

Expand All @@ -1042,8 +1043,8 @@ public function unabandonAction(#[MapEntity] Package $package, #[CurrentUser] ?U
$package->setAbandoned(false);
$package->setReplacementPackage(null);
$package->setIndexedAt(null);
$package->setCrawledAt(new DateTime());
$package->setUpdatedAt(new DateTime());
$package->setCrawledAt(new DateTimeImmutable());
$package->setUpdatedAt(new DateTimeImmutable());
$package->setDumpedAt(null);
$package->setDumpedAtV2(null);

Expand Down
2 changes: 1 addition & 1 deletion src/Controller/UserController.php
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public function markSpammerAction(Request $req, #[VarName('name')] User $user):

$em->getConnection()->executeStatement(
'UPDATE package p JOIN maintainers_packages mp ON mp.package_id = p.id
SET abandoned = 1, replacementPackage = "spam/spam", suspect = "spam", indexedAt = NULL, dumpedAt = "2100-01-01 00:00:00"
SET frozen = "spam", indexedAt = NULL
WHERE mp.user_id = :userId',
['userId' => $user->getId()]
);
Expand Down
15 changes: 8 additions & 7 deletions src/Entity/Package.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ enum PackageFreezeReason: string
{
case Spam = 'spam';
case RemoteIdMismatch = 'remote_id';
case Gone = 'gone';
}

/**
Expand All @@ -52,8 +53,9 @@ enum PackageFreezeReason: string
#[ORM\Index(name: 'dumped2_idx', columns: ['dumpedAtV2'])]
#[ORM\Index(name: 'repository_idx', columns: ['repository'])]
#[ORM\Index(name: 'remoteid_idx', columns: ['remoteId'])]
#[ORM\Index(name: 'dumped2_crawled_idx', columns: ['dumpedAtV2', 'crawledAt'])]
#[ORM\Index(name: 'dumped2_crawled_frozen_idx', columns: ['dumpedAtV2', 'crawledAt', 'frozen'])]
#[ORM\Index(name: 'vendor_idx', columns: ['vendor'])]
#[ORM\Index(name: 'frozen_idx', columns: ['frozen'])]
#[UniquePackage(groups: ['Create'])]
#[VendorWritable(groups: ['Create'])]
#[ValidPackageRepository(groups: ['Update', 'Default'])]
Expand Down Expand Up @@ -590,7 +592,7 @@ public function getUpdatedAt(): ?DateTimeInterface

public function wasUpdatedInTheLast24Hours(): bool
{
return $this->updatedAt && $this->updatedAt > new \DateTime('-24 hours');
return $this->updatedAt && $this->updatedAt > new \DateTimeImmutable('-24 hours');
}

public function setCrawledAt(?DateTimeInterface $crawledAt): void
Expand Down Expand Up @@ -735,9 +737,10 @@ public function getSuspect(): ?string
public function freeze(PackageFreezeReason $reason): void
{
$this->frozen = $reason;
$this->setCrawledAt($dt = new \DateTimeImmutable('2100-01-01 00:00:00'));
$this->setDumpedAt($dt);
$this->setDumpedAtV2($dt);
// force re-indexing for spam packages to ensure they get deleted from the search index
if ($reason === PackageFreezeReason::Spam) {
$this->setIndexedAt(null);
}
}

public function unfreeze(): void
Expand All @@ -747,8 +750,6 @@ public function unfreeze(): void
}
$this->frozen = null;
$this->setCrawledAt(null);
$this->setDumpedAt(null);
$this->setDumpedAtV2(null);
}

public function isFrozen(): bool
Expand Down
23 changes: 12 additions & 11 deletions src/Entity/PackageRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public function getPackageNamesUpdatedSince(\DateTimeInterface $date): array
$query = $this->getEntityManager()
->createQuery("
SELECT p.name FROM App\Entity\Package p
WHERE p.dumpedAt >= :date AND (p.replacementPackage IS NULL OR p.replacementPackage != 'spam/spam')
WHERE p.dumpedAt >= :date AND p.frozen IS NULL
")
->setParameters(['date' => $date]);

Expand All @@ -72,7 +72,7 @@ public function getPackageNamesUpdatedSince(\DateTimeInterface $date): array
public function getPackageNames(): array
{
$query = $this->getEntityManager()
->createQuery("SELECT p.name FROM App\Entity\Package p WHERE p.replacementPackage IS NULL OR p.replacementPackage != 'spam/spam'");
->createQuery("SELECT p.name FROM App\Entity\Package p WHERE p.frozen IS NULL");

$names = $this->getPackageNamesForQuery($query);

Expand Down Expand Up @@ -102,7 +102,7 @@ public function getProvidedNames(): array
public function getPackageNamesByType(string $type): array
{
$query = $this->getEntityManager()
->createQuery("SELECT p.name FROM App\Entity\Package p WHERE p.type = :type AND (p.replacementPackage IS NULL OR p.replacementPackage != 'spam/spam')")
->createQuery("SELECT p.name FROM App\Entity\Package p WHERE p.type = :type AND p.frozen IS NULL")
->setParameters(['type' => $type]);

return $this->getPackageNamesForQuery($query);
Expand All @@ -114,7 +114,7 @@ public function getPackageNamesByType(string $type): array
public function getPackageNamesByVendor(string $vendor): array
{
$query = $this->getEntityManager()
->createQuery("SELECT p.name FROM App\Entity\Package p WHERE p.vendor = :vendor AND (p.replacementPackage IS NULL OR p.replacementPackage != 'spam/spam')")
->createQuery("SELECT p.name FROM App\Entity\Package p WHERE p.vendor = :vendor AND p.frozen IS NULL")
->setParameters(['vendor' => $vendor]);

return $this->getPackageNamesForQuery($query);
Expand Down Expand Up @@ -166,11 +166,10 @@ public function getPackagesWithFields(array $filters, array $fields): array
$selector .= ', p.replacementPackage';
}

$where = '(p.replacementPackage IS NULL OR p.replacementPackage != :replacement)';
$where = 'p.frozen IS NULL';
foreach ($filters as $filter => $val) {
$where .= ' AND p.'.$filter.' = :'.$filter;
}
$filters['replacement'] = "spam/spam";
$query = $this->getEntityManager()
->createQuery("SELECT p.name $selector FROM App\Entity\Package p WHERE $where ORDER BY p.name")
->setParameters($filters);
Expand Down Expand Up @@ -218,6 +217,7 @@ public function getStalePackages(): array
return $conn->fetchAllAssociative(
'SELECT p.id FROM package p
WHERE p.abandoned = false
AND p.frozen IS NULL
AND (
p.crawledAt IS NULL
OR (p.autoUpdated = 0 AND p.crawledAt < :recent AND p.createdAt >= :yesterday)
Expand Down Expand Up @@ -259,6 +259,7 @@ public function getStalePackagesForDumping(): array
FROM package p
LEFT JOIN download d ON (d.id = p.id AND d.type = 1)
WHERE (p.dumpedAt IS NULL OR (p.dumpedAt <= p.crawledAt AND p.crawledAt < NOW()))
AND p.frozen IS NULL
AND (d.total > 1000 OR d.lastUpdated > :date)
ORDER BY p.crawledAt ASC
', ['date' => date('Y-m-d H:i:s', strtotime('-4months'))]);
Expand All @@ -272,7 +273,7 @@ public function isPackageDumpableForV1(Package $package): bool
SELECT p.id
FROM package p
LEFT JOIN download d ON (d.id = p.id AND d.type = 1)
WHERE p.id = :id
WHERE p.id = :id AND p.frozen IS NULL
AND (d.total > 1000 OR d.lastUpdated > :date)
', ['id' => $package->getId(), 'date' => date('Y-m-d H:i:s', strtotime('-4months'))]);
}
Expand All @@ -284,7 +285,7 @@ public function getStalePackagesForDumpingV2(): array
{
$conn = $this->getEntityManager()->getConnection();

return $conn->fetchFirstColumn('SELECT p.id FROM package p WHERE p.dumpedAtV2 IS NULL OR (p.dumpedAtV2 <= p.crawledAt AND p.crawledAt < NOW())');
return $conn->fetchFirstColumn('SELECT p.id FROM package p WHERE (p.dumpedAtV2 IS NULL OR (p.dumpedAtV2 <= p.crawledAt AND p.crawledAt < NOW())) AND p.frozen IS NULL');
}

/**
Expand Down Expand Up @@ -414,7 +415,7 @@ public function getFilteredQueryBuilder(array $filters = [], bool $orderByName =
$qb->leftJoin('v.tags', 't');
}

$qb->andWhere('(p.replacementPackage IS NULL OR p.replacementPackage != \'spam/spam\')');
$qb->andWhere('p.frozen IS NULL');

$qb->orderBy('p.abandoned');
if (true === $orderByName) {
Expand Down Expand Up @@ -466,7 +467,7 @@ public function markPackageSuspect(Package $package): void
*/
public function getSuspectPackageCount(): int
{
$sql = 'SELECT COUNT(*) count FROM package p WHERE p.suspect IS NOT NULL AND (p.replacementPackage IS NULL OR p.replacementPackage != "spam/spam")';
$sql = 'SELECT COUNT(*) count FROM package p WHERE p.suspect IS NOT NULL AND p.frozen IS NULL';

return max(0, (int) $this->getEntityManager()->getConnection()->fetchOne($sql));
}
Expand All @@ -477,7 +478,7 @@ public function getSuspectPackageCount(): int
public function getSuspectPackages(int $offset = 0, int $limit = 15): array
{
$sql = 'SELECT p.id, p.name, p.description, p.language, p.abandoned, p.replacementPackage
FROM package p WHERE p.suspect IS NOT NULL AND (p.replacementPackage IS NULL OR p.replacementPackage != "spam/spam") ORDER BY p.createdAt DESC LIMIT '.((int) $limit).' OFFSET '.((int) $offset);
FROM package p WHERE p.suspect IS NOT NULL AND p.frozen IS NULL ORDER BY p.createdAt DESC LIMIT '.((int) $limit).' OFFSET '.((int) $offset);

return $this->getEntityManager()->getConnection()->fetchAllAssociative($sql);
}
Expand Down
4 changes: 2 additions & 2 deletions src/Entity/VersionRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ public function remove(Version $version): void
$em = $this->getEntityManager();
$package = $version->getPackage();
$package->getVersions()->removeElement($version);
$package->setCrawledAt(new \DateTime);
$package->setUpdatedAt(new \DateTime);
$package->setCrawledAt(new \DateTimeImmutable());
$package->setUpdatedAt(new \DateTimeImmutable());
$em->persist($package);

$this->versionIdCache->deleteVersion($package, $version);
Expand Down
4 changes: 2 additions & 2 deletions src/Model/PackageManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,8 @@ public function notifyUpdateFailure(Package $package, \Exception $e, ?string $de
}

// make sure the package crawl time is updated so we avoid retrying failing packages more often than working ones
if (!$package->getCrawledAt() || $package->getCrawledAt() < new \DateTime()) {
$package->setCrawledAt(new \DateTime);
if (!$package->getCrawledAt() || $package->getCrawledAt() < new \DateTimeImmutable()) {
$package->setCrawledAt(new \DateTimeImmutable());
}
$this->doctrine->getManager()->flush();

Expand Down
6 changes: 3 additions & 3 deletions src/Package/SymlinkDumper.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

namespace App\Package;

use App\Entity\PackageFreezeReason;
use Composer\Pcre\Preg;
use Doctrine\DBAL\ArrayParameterType;
use Seld\Signal\SignalHandler;
Expand Down Expand Up @@ -193,9 +194,8 @@ public function dump(array $packageIds, bool $force = false, bool $verbose = fal

// prepare packages in memory
foreach ($packages as $package) {
// skip spam packages in the dumper in case we do a forced full dump and prevent them from being dumped for a little while
if ($package->isAbandoned() && $package->getReplacementPackage() === 'spam/spam') {
$dumpTimeUpdates['2100-01-01 00:00:00'][] = $package->getId();
// skip spam packages in the dumper in case one appears due to a race condition
if ($package->isFrozen() && $package->getFreezeReason() === PackageFreezeReason::Spam) {
continue;
}

Expand Down
10 changes: 7 additions & 3 deletions src/Package/Updater.php
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ public function update(IOInterface $io, Config $config, Package $package, VcsRep
throw new \RuntimeException('Driver could not be established for package '.$package->getName().' ('.$package->getRepository().')');
}

if ($package->isFrozen()) {
return $package;
}

$remoteId = null;
if ($driver instanceof GitHubDriver) {
$repoData = $driver->getRepoData();
Expand Down Expand Up @@ -303,12 +307,12 @@ public function update(IOInterface $io, Config $config, Package $package, VcsRep
$io->writeError('Updated from '.$package->getRepository().' using ' . $driver::class . $usingDetails);

// make sure the package exists in the package list if for some reason adding it on submit failed
if ($package->getReplacementPackage() !== 'spam/spam' && !$this->providerManager->packageExists($package->getName())) {
if (!$this->providerManager->packageExists($package->getName())) {
$this->providerManager->insertPackage($package);
}

$package->setUpdatedAt(new \DateTime);
$package->setCrawledAt(new \DateTime);
$package->setUpdatedAt(new \DateTimeImmutable());
$package->setCrawledAt(new \DateTimeImmutable());

if ($flags & self::FORCE_DUMP) {
$package->setDumpedAt(null);
Expand Down
6 changes: 3 additions & 3 deletions src/Package/V2Dumper.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

namespace App\Package;

use App\Entity\PackageFreezeReason;
use App\Entity\SecurityAdvisory;
use Composer\Pcre\Preg;
use Doctrine\DBAL\ArrayParameterType;
Expand Down Expand Up @@ -111,9 +112,8 @@ public function dump(array $packageIds, bool $force = false, bool $verbose = fal

// prepare packages in memory
foreach ($packages as $package) {
// skip spam packages in the dumper in case we do a forced full dump and prevent them from being dumped for a little while
if ($package->isAbandoned() && $package->getReplacementPackage() === 'spam/spam') {
$dumpTimeUpdates['2100-01-01 00:00:00'][] = $package->getId();
// skip spam packages in the dumper in case one appears due to a race condition
if ($package->isFrozen() && $package->getFreezeReason() === PackageFreezeReason::Spam) {
continue;
}

Expand Down
Loading

0 comments on commit 22ada3b

Please sign in to comment.