-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
YSP-672: AI: Add media payloads and pipelines #15
base: main
Are you sure you want to change the base?
Changes from 27 commits
798fac3
5ee2369
4921e6d
bfa07ad
d58ee43
a262224
87076b9
392c61f
cecb5a8
e15ad4c
dc61214
cb6446f
a7c8b11
4764994
8f6d55a
06be214
ae1a5d4
11b81e2
c3eb39a
de852f6
f8811b9
4e88876
df00a1f
2791996
1f2f4a2
98e666c
0d2801b
d7c9387
aec2f45
bc53acc
5f904fd
eb5782e
d870513
3656122
8730ef1
22b41a1
f5e1fed
a868bb9
90ef0c2
a4be970
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
# Create permission for managin AI Engine settings. | ||
administer ai engine: | ||
title: 'Administer AI Engine' | ||
description: 'Enable services and change sensative settings.' | ||
description: 'Enable services and change sensitive settings.' | ||
manage ai engine settings: | ||
title: 'Manage AI Engine Settings' | ||
description: 'Set and update AI Engine content and settings.' |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,20 +2,32 @@ | |
|
||
namespace Drupal\ai_engine_embedding\Service; | ||
|
||
use Drupal\ai_engine_feed\Service\Sources; | ||
use Drupal\Core\Config\ConfigFactoryInterface; | ||
use Drupal\Core\Entity\EntityInterface; | ||
use Drupal\Core\Entity\EntityPublishedInterface; | ||
use Drupal\Core\Http\ClientFactory; | ||
use Drupal\Core\Logger\LoggerChannelInterface; | ||
use Drupal\ai_engine_feed\Service\Sources; | ||
use Drupal\metatag\MetatagManager; | ||
|
||
/** | ||
* Service for updating the vector database as content is updated. | ||
*/ | ||
class EntityUpdate { | ||
/** | ||
* The default chunk size for sending data to the AI Embedding service. | ||
* | ||
* @var int | ||
*/ | ||
const CHUNK_SIZE_DEFAULT = 3000; | ||
|
||
/** | ||
* The allowed entity types for indexing. | ||
* | ||
* @var array | ||
*/ | ||
const ALLOWED_ENTITIES = ['node', 'media']; | ||
|
||
/** | ||
* The configuration factory. | ||
* | ||
|
@@ -150,36 +162,56 @@ public function delete(EntityInterface $entity) { | |
* a cleanup routine to find and delete out of date chunks. | ||
*/ | ||
public function addAllDocuments() { | ||
$docTypes = ['node' => 'text', 'media' => 'media']; | ||
$config = $this->configFactory->get('ai_engine_embedding.settings'); | ||
$data = $this->getData("upsert", $config, [], ""); | ||
$httpClient = $this->httpClientFactory->fromOptions([ | ||
'headers' => [ | ||
'Content-Type' => 'application/json', | ||
], | ||
]); | ||
$endpoint = $config->get('azure_embedding_service_url') . '/api/upsert'; | ||
|
||
try { | ||
$response = $httpClient->post($endpoint, ['json' => $data]); | ||
// Loop through entityTypesToSend and send. | ||
foreach (self::ALLOWED_ENTITIES as $entityType) { | ||
$data = $this->getData("upsert", $config, ['entityType' => $entityType], "", $docTypes[$entityType]); | ||
$endpoint = $config->get('azure_embedding_service_url') . '/api/upsert'; | ||
$response = $this->sendJsonPost($endpoint, $data); | ||
|
||
if ($response->getStatusCode() === 200) { | ||
$responseData = json_decode($response->getBody()->getContents(), TRUE); | ||
$this->logger->notice( | ||
'Removed node @id from vector database. Service response: @response', | ||
'Upserted node @id from vector database. Service response: @response', | ||
['@response' => print_r($responseData, TRUE)] | ||
); | ||
} | ||
else { | ||
$this->logger->notice( | ||
'Unable to remove node @id from vector database. POST failed with status code: @code', | ||
'Unable to upsert node @id to vector database. POST failed with status code: @code', | ||
['@code' => $response->getStatusCode()] | ||
); | ||
return NULL; | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Sends a post request to an endpoint with data. | ||
* | ||
* @param string $endpoint | ||
* The endpoint to send the data to. | ||
* @param array $data | ||
* The data to send. | ||
* | ||
* @return \Psr\Http\Message\ResponseInterface | ||
* The response from the post request. | ||
*/ | ||
protected function sendJsonPost($endpoint, $data) { | ||
$httpClient = $this->httpClientFactory->fromOptions([ | ||
'headers' => [ | ||
'Content-Type' => 'application/json', | ||
], | ||
]); | ||
|
||
try { | ||
return $httpClient->post($endpoint, ['json' => $data]); | ||
} | ||
catch (\Exception $e) { | ||
$this->logger->error( | ||
'An error occurred while upserting document: @error', | ||
'An error occurred while posting document: @error', | ||
['@error' => $e->getMessage()] | ||
); | ||
return NULL; | ||
|
@@ -198,41 +230,26 @@ public function addAllDocuments() { | |
*/ | ||
public function upsertDocument(EntityInterface $entity) { | ||
$config = $this->configFactory->get('ai_engine_embedding.settings'); | ||
$chunk_size = $config->get('azure_chunk_size') || CHUNK_SIZE_DEFAULT; | ||
$entityTypeId = $entity->getEntityTypeId(); | ||
$route_params = [ | ||
'entityType' => $entity->getEntityTypeId(), | ||
'entityType' => $entityTypeId, | ||
'id' => $entity->id(), | ||
]; | ||
$data = $this->getData("upsert", $config, $route_params, ""); | ||
$httpClient = $this->httpClientFactory->fromOptions([ | ||
'headers' => [ | ||
'Content-Type' => 'application/json', | ||
], | ||
]); | ||
$endpoint = $config->get('azure_embedding_service_url') . '/api/upsert'; | ||
$response = $this->sendJsonPost($endpoint, $data); | ||
|
||
try { | ||
$response = $httpClient->post($endpoint, ['json' => $data]); | ||
|
||
if ($response->getStatusCode() === 200) { | ||
$responseData = json_decode($response->getBody()->getContents(), TRUE); | ||
$this->logger->notice( | ||
'Removed node @id from vector database. Service response: @response', | ||
['@id' => $entity->id(), '@response' => print_r($responseData, TRUE)] | ||
); | ||
} | ||
else { | ||
$this->logger->notice( | ||
'Unable to remove node @id from vector database. POST failed with status code: @code', | ||
['@id' => $entity->id(), '@code' => $response->getStatusCode()] | ||
); | ||
return NULL; | ||
} | ||
if ($response->getStatusCode() === 200) { | ||
$responseData = json_decode($response->getBody()->getContents(), TRUE); | ||
$this->logger->notice( | ||
'Upserted node @id to vector database. Service response: @response', | ||
['@id' => $entity->id(), '@response' => print_r($responseData, TRUE)] | ||
); | ||
} | ||
catch (\Exception $e) { | ||
$this->logger->error( | ||
'An error occurred while upserting document: @error', | ||
['@error' => $e->getMessage()] | ||
else { | ||
$this->logger->notice( | ||
'Unable to upsert node @id to vector database. POST failed with status code: @code', | ||
['@id' => $entity->id(), '@code' => $response->getStatusCode()] | ||
); | ||
return NULL; | ||
} | ||
|
@@ -253,35 +270,20 @@ protected function removeDocument(EntityInterface $entity) { | |
"id_list" => [], | ||
"id_filter_list" => [$this->sources->getSearchIndexId($entity)], | ||
]; | ||
$httpClient = $this->httpClientFactory->fromOptions([ | ||
'headers' => [ | ||
'Content-Type' => 'application/json', | ||
], | ||
]); | ||
$endpoint = $config->get('azure_embedding_service_url') . '/api/deletebyid'; | ||
$response = $this->sendJsonPost($endpoint, $data); | ||
|
||
try { | ||
$response = $httpClient->post($endpoint, ['json' => $data]); | ||
|
||
if ($response->getStatusCode() === 200) { | ||
$responseData = json_decode($response->getBody()->getContents(), TRUE); | ||
$this->logger->notice( | ||
'Removed node @id from vector database. Service response: @response', | ||
['@id' => $entity->id(), '@response' => print_r($responseData, TRUE)] | ||
); | ||
} | ||
else { | ||
$this->logger->notice( | ||
'Unable to remove node @id from vector database. POST failed with status code: @code', | ||
['@id' => $entity->id(), '@code' => $response->getStatusCode()] | ||
); | ||
return NULL; | ||
} | ||
if ($response->getStatusCode() === 200) { | ||
$responseData = json_decode($response->getBody()->getContents(), TRUE); | ||
$this->logger->notice( | ||
'Removed node @id from vector database. Service response: @response', | ||
['@id' => $entity->id(), '@response' => print_r($responseData, TRUE)] | ||
); | ||
} | ||
catch (\Exception $e) { | ||
$this->logger->error( | ||
'An error occurred while deleting document: @error', | ||
['@error' => $e->getMessage()] | ||
else { | ||
$this->logger->notice( | ||
'Unable to remove node @id from vector database. POST failed with status code: @code', | ||
['@id' => $entity->id(), '@code' => $response->getStatusCode()] | ||
); | ||
return NULL; | ||
} | ||
|
@@ -330,7 +332,29 @@ protected function isIndexable(EntityInterface $entity) { | |
* TRUE if the entity should be embedded, FALSE otherwise. | ||
*/ | ||
protected function isSupportedEntityType(EntityInterface $entity) { | ||
return $entity->getEntityTypeId() === 'node'; | ||
$entity_type_id = $entity->getEntityTypeId(); | ||
|
||
if ($entity_type_id == 'media') { | ||
return $this->isSupportedMediaType($entity); | ||
} | ||
else { | ||
return in_array($entity->getEntityTypeId(), self::ALLOWED_ENTITIES); | ||
} | ||
} | ||
|
||
/** | ||
* Checks if an entity is supported by the embedding system. | ||
* | ||
* @param \Drupal\Core\Entity\EntityInterface $entity | ||
* The entity to check. | ||
* | ||
* @return bool | ||
* TRUE if the entity should be embedded, FALSE otherwise. | ||
*/ | ||
protected function isSupportedMediaType(EntityInterface $entity) { | ||
$config = $this->configFactory->get('ai_engine_embedding.settings'); | ||
$allowed_media_types = $config->get('included_media_types'); | ||
return in_array($entity->bundle(), $allowed_media_types); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm guessing that this will throw an error on sites that already have this module enabled but did not resave the form. Can not check in_array on NULL. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah good catch; I've added |
||
} | ||
|
||
/** | ||
|
@@ -386,7 +410,7 @@ public function isIndexingEnabled(EntityInterface $entity) { | |
* @return array | ||
* An array of data to send to the AI Embedding service. | ||
*/ | ||
protected function getData($action = 'upsert', $config, $route_params = [], $data = ""): array { | ||
protected function getData($action = 'upsert', $config = NULL, $route_params = [], $data = "", $doctype = 'text'): array { | ||
$allowed_actions = ['upsert']; | ||
if (!$config) { | ||
throw new \Exception('Missing configuration object.'); | ||
|
@@ -396,7 +420,13 @@ protected function getData($action = 'upsert', $config, $route_params = [], $dat | |
throw new \Exception('Invalid action provided.'); | ||
} | ||
|
||
$chunk_size = $config->get('azure_chunk_size') ?? CHUNK_SIZE_DEFAULT; | ||
$allowed_doctypes = ['text', 'media']; | ||
|
||
if (!in_array($doctype, $allowed_doctypes)) { | ||
throw new \Exception('Invalid doctype provided.'); | ||
} | ||
|
||
$chunk_size = $config->get('azure_chunk_size') ?? self::CHUNK_SIZE_DEFAULT; | ||
|
||
$data_endpoint = ""; | ||
if ($data == "") { | ||
|
@@ -405,7 +435,7 @@ protected function getData($action = 'upsert', $config, $route_params = [], $dat | |
|
||
return [ | ||
"action" => $action, | ||
"doctype" => "text", | ||
"doctype" => $doctype, | ||
"service_name" => $config->get('azure_search_service_name'), | ||
"index_name" => $config->get('azure_search_service_index'), | ||
"data" => $data, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
<?php | ||
|
||
namespace Drupal\ai_engine_feed\Annotation; | ||
|
||
use Drupal\Component\Annotation\Plugin; | ||
|
||
/** | ||
* Defines a content feed plugin annotation object. | ||
* | ||
* @Annotation | ||
*/ | ||
class ContentFeedPlugin extends Plugin { | ||
/** | ||
* The plugin ID. | ||
* | ||
* @var string | ||
*/ | ||
public $id; | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not a part of your current work, but I'm just now realizing that we should have a config/install for this settings file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah that makes sense. I can add that as part of this.