Skip to content

Commit

Permalink
Merge pull request #257 from mnapoli/timeout-handling
Browse files Browse the repository at this point in the history
Try to fix #220: recover from timeouts
  • Loading branch information
mnapoli authored Mar 7, 2019
2 parents 4d1c31a + 8d145ee commit be2175b
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 7 deletions.
10 changes: 7 additions & 3 deletions runtime/php/layers/fpm/bootstrap
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,15 @@ $lambdaRuntime = LambdaRuntime::fromEnvironmentVariable();

$handler = $appRoot . '/' . getenv('_HANDLER');
if (! is_file($handler)) {
echo "Handler `$handler` doesn't exist";
exit(1);
$lambdaRuntime->failInitialization("Handler `$handler` doesn't exist");
}

$phpFpm = new PhpFpm($handler);
$phpFpm->start();
try {
$phpFpm->start();
} catch (\Throwable $e) {
$lambdaRuntime->failInitialization('Error while starting PHP-FPM', $e);
}

while (true) {
$lambdaRuntime->processNextEvent(function ($event) use ($phpFpm): array {
Expand Down
10 changes: 10 additions & 0 deletions src/Runtime/FastCgiCommunicationFailed.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?php declare(strict_types=1);

namespace Bref\Runtime;

/**
* There was an error while communicating with FastCGI.
*/
class FastCgiCommunicationFailed extends \Exception
{
}
33 changes: 33 additions & 0 deletions src/Runtime/LambdaRuntime.php
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,39 @@ private function signalFailure(string $invocationId, \Throwable $error): void
]);
}

/**
* Abort the lambda and signal to the runtime API that we failed to initialize this instance.
*
* @see https://docs.aws.amazon.com/lambda/latest/dg/runtimes-api.html#runtimes-api-initerror
*/
public function failInitialization(string $message, ?\Throwable $error = null): void
{
if ($error instanceof \Exception) {
$errorMessage = get_class($error) . ': ' . $error->getMessage();
} else {
$errorMessage = $error->getMessage();
}

// Log the exception in CloudWatch
echo "$message\n";
printf(
"Fatal error: %s in %s:%d\nStack trace:\n%s",
$errorMessage,
$error->getFile(),
$error->getLine(),
$error->getTraceAsString()
);

$url = "http://{$this->apiUrl}/2018-06-01/runtime/init/error";
$this->postJson($url, [
'errorMessage' => $message . ' ' . $error->getMessage(),
'errorType' => get_class($error),
'stackTrace' => explode(PHP_EOL, $error->getTraceAsString()),
]);

exit(1);
}

/**
* @param mixed $data
*/
Expand Down
44 changes: 40 additions & 4 deletions src/Runtime/PhpFpm.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
namespace Bref\Runtime;

use Bref\Http\LambdaResponse;
use Hoa\Fastcgi\Exception\Exception as HoaFastCgiException;
use Hoa\Fastcgi\Responder;
use Hoa\Socket\Client;
use Hoa\Socket\Exception\Exception as HoaSocketException;
use Symfony\Component\Process\Process;

/**
Expand All @@ -24,7 +26,7 @@ class PhpFpm
private const SOCKET = '/tmp/.bref/php-fpm.sock';
private const CONFIG = '/opt/bref/etc/php-fpm.conf';

/** @var Client */
/** @var Client|null */
private $client;
/** @var string */
private $handler;
Expand All @@ -35,7 +37,6 @@ class PhpFpm

public function __construct(string $handler, string $configFile = self::CONFIG)
{
$this->client = new Client('unix://' . self::SOCKET, 30000);
$this->handler = $handler;
$this->configFile = $configFile;
}
Expand All @@ -45,6 +46,10 @@ public function __construct(string $handler, string $configFile = self::CONFIG)
*/
public function start(): void
{
if ($this->isReady()) {
throw new \Exception('PHP-FPM has already been started, aborting');
}

if (! is_dir(dirname(self::SOCKET))) {
mkdir(dirname(self::SOCKET));
}
Expand All @@ -60,13 +65,19 @@ public function start(): void
echo $output;
});

$this->reconnect();

$this->waitForServerReady();
}

public function stop(): void
{
if ($this->fpm && $this->fpm->isRunning()) {
$this->fpm->stop();
$this->client->disconnect();
$this->fpm->stop(2);
if ($this->isReady()) {
throw new \Exception('PHP-FPM cannot be stopped');
}
}
}

Expand Down Expand Up @@ -99,7 +110,18 @@ public function proxy($event): LambdaResponse
[$requestHeaders, $requestBody] = $this->eventToFastCgiRequest($event);

$responder = new Responder($this->client);
$responder->send($requestHeaders, $requestBody);

try {
$responder->send($requestHeaders, $requestBody);
} catch (HoaFastCgiException|HoaSocketException $e) {
// Once the socket gets broken every following request is broken. We need to reconnect.
$this->reconnect();
throw new FastCgiCommunicationFailed(sprintf(
'Error communicating with PHP-FPM to read the HTTP response. A common root cause of this can be that the Lambda (or PHP) timed out, for example when trying to connect to a remote API or database, if this happens continuously check for those! Bref will reconnect to PHP-FPM to clean things up. Original exception message: %s %s',
get_class($e),
$e->getMessage()
), 0, $e);
}

$responseHeaders = $responder->getResponseHeaders();

Expand Down Expand Up @@ -216,4 +238,18 @@ private function eventToFastCgiRequest(array $event): array

return [$requestHeaders, $requestBody];
}

private function reconnect(): void
{
if ($this->client) {
/**
* Hoa magic
*
* @see \Hoa\Socket\Connection\Connection
*/
$this->client->disconnect();
}

$this->client = new Client('unix://' . self::SOCKET, 30000);
}
}
2 changes: 2 additions & 0 deletions template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ Resources:
Description: 'Bref HTTP demo'
CodeUri: .
Handler: demo/http.php
Timeout: 30 # in seconds (API Gateway has a timeout of 30 seconds)
MemorySize: 1024 # The memory size is related to the pricing and CPU power
Runtime: provided
Layers:
- 'arn:aws:lambda:us-east-2:209497400698:layer:php-72-fpm:1'
Expand Down
3 changes: 3 additions & 0 deletions tests/Runtime/PhpFpm/php-fpm.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ catch_workers_output = yes
; New PHP 7.3 option that disables a verbose log prefix
; Disabled for now until we switch to PHP 7.3
;decorate_workers_output = no

; Very short timeout to be able to test timeouts without having a very long test suite
request_terminate_timeout = 1
5 changes: 5 additions & 0 deletions tests/Runtime/PhpFpm/timeout.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?php declare(strict_types=1);

if (isset($_GET['timeout'])) {
sleep((int) $_GET['timeout']);
}
22 changes: 22 additions & 0 deletions tests/Runtime/PhpFpmTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Bref\Test\Runtime;

use Bref\Http\LambdaResponse;
use Bref\Runtime\FastCgiCommunicationFailed;
use Bref\Runtime\PhpFpm;
use Bref\Test\HttpRequestProxyTest;
use PHPUnit\Framework\TestCase;
Expand Down Expand Up @@ -700,6 +701,27 @@ public function test response with error_log()
], $response['headers']);
}

public function test timeouts are recovered from()
{
$this->fpm = new PhpFpm(__DIR__ . '/PhpFpm/timeout.php', __DIR__ . '/PhpFpm/php-fpm.conf');
$this->fpm->start();

try {
$this->fpm->proxy([
'httpMethod' => 'GET',
'queryStringParameters' => [
'timeout' => 10,
],
]);
$this->fail('No exception was thrown');
} catch (FastCgiCommunicationFailed $e) {
// PHP-FPM should work after that
$statusCode = $this->fpm->proxy(['httpMethod' => 'GET'])
->toApiGatewayFormat()['statusCode'];
self::assertEquals(200, $statusCode);
}
}

private function assertGlobalVariables(array $event, array $expectedGlobalVariables): void
{
$this->startFpm(__DIR__ . '/PhpFpm/request.php');
Expand Down

0 comments on commit be2175b

Please sign in to comment.