Skip to content

Commit

Permalink
content id
Browse files Browse the repository at this point in the history
  • Loading branch information
ponderingdemocritus committed Jan 19, 2025
1 parent fefdb86 commit d20e780
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 38 deletions.
4 changes: 2 additions & 2 deletions examples/example-twitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ async function main() {
// Initialize core
const core = new Core(roomManager, vectorDb, processor, {
logging: {
level: LogLevel.DEBUG,
level: LogLevel.ERROR,
enableColors: true,
enableTimestamp: true,
},
Expand All @@ -56,7 +56,7 @@ async function main() {
const consciousness = new Consciousness(llmClient, roomManager, {
intervalMs: 300000, // Think every 5 minutes
minConfidence: 0.7,
logLevel: LogLevel.DEBUG,
logLevel: LogLevel.ERROR,
});

// Register Twitter inputs
Expand Down
135 changes: 99 additions & 36 deletions packages/core/src/core/processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ function hasRoomSupport(vectorDb: VectorDB): vectorDb is VectorDBWithRooms {
return "storeInRoom" in vectorDb && "findSimilarInRoom" in vectorDb;
}

function hashString(str: string): string {
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = (hash << 5) - hash + char;
hash = hash & hash; // Convert to 32-bit integer
}
return Math.abs(hash).toString(36); // Convert to base36 for shorter strings
}

export class Processor {
private logger: Logger;
private availableOutputs: Map<string, Output> = new Map();
Expand Down Expand Up @@ -91,8 +101,15 @@ export class Processor {

// Check if this content was already processed
const contentId = this.generateContentId(content);

console.log("contentId", contentId);
const alreadyProcessed = await this.hasProcessedContent(contentId, room);

this.logger.info("Processor.process", "Already processed", {
contentId,
alreadyProcessed,
});

if (alreadyProcessed) {
return {
content,
Expand Down Expand Up @@ -121,6 +138,11 @@ export class Processor {
contentClassification
);

this.logger.info("Processor.process", "Suggested outputs", {
contentId,
suggestedOutputs,
});

// Store that we've processed this content
await this.markContentAsProcessed(contentId, room);

Expand Down Expand Up @@ -437,47 +459,88 @@ Return only valid JSON, no other text.`;

// Helper method to generate a consistent ID for content
private generateContentId(content: any): string {
if (typeof content === "string") {
return content;
}

// If content has an ID field, use that
if (content.id) {
return content.id.toString();
}
try {
// For strings, look for ID pattern first, then hash
if (typeof content === "string") {
return `content_${hashString(content)}`;
}

// For arrays, map through and get IDs
if (Array.isArray(content)) {
return content
.map((item) => {
// Look for any kind of ID in metadata
if (item.metadata?.id) return item.metadata.id;
// Look for any ID field at root level
// For arrays, try to find IDs first
if (Array.isArray(content)) {
const ids = content.map((item) => {
// Try to find an explicit ID first
if (item.id) return item.id;
// Look for common ID patterns in metadata
const metadataIds = item.metadata
? Object.entries(item.metadata).find(([key]) =>
key.toLowerCase().includes("id")
)
: null;
if (metadataIds) return metadataIds[1];
// Fallback to content hash
return JSON.stringify(item);
})
.join("_");
}
if (item.metadata?.id) return item.metadata.id;

// Look for common ID patterns
for (const [key, value] of Object.entries(item.metadata || {})) {
if (key.toLowerCase().endsWith("id") && value) {
return value;
}
}

// If no ID found, hash the content
const relevantData = {
content: item.content || item,
type: item.type,
};
return hashString(JSON.stringify(relevantData));
});

return `array_${ids.join("_")}`;
}

// If content has metadata with an ID
if (content.metadata) {
// Look for any ID field in metadata
const metadataIds = Object.entries(content.metadata).find(([key]) =>
key.toLowerCase().includes("id")
// For single objects, try to find an ID first
if (content.id) {
return `obj_${content.id}`;
}

// Special handling for consciousness-generated content
if (
content.type === "internal_thought" ||
content.source === "consciousness"
) {
const thoughtData = {
content: content.content,
timestamp: content.timestamp,
};
return `thought_${hashString(JSON.stringify(thoughtData))}`;
}

if (content.metadata?.id) {
return `obj_${content.metadata.id}`;
}

// Look for common ID patterns in metadata
if (content.metadata) {
for (const [key, value] of Object.entries(content.metadata)) {
if (key.toLowerCase().endsWith("id") && value) {
return `obj_${value}`;
}
}
}

// If no ID found, fall back to hashing relevant content
const relevantData = {
content: content.content || content,
type: content.type,
// Include source if available, but exclude room IDs
...(content.source &&
content.source !== "consciousness" && { source: content.source }),
};
return `obj_${hashString(JSON.stringify(relevantData))}`;
} catch (error) {
this.logger.error(
"Processor.generateContentId",
"Error generating content ID",
{
error,
content:
typeof content === "object" ? JSON.stringify(content) : content,
}
);
if (metadataIds) return metadataIds[1]?.toString() || "";
return `fallback_${Date.now()}`;
}

// Fallback to stringifying the content
return JSON.stringify(content);
}

// Check if we've already processed this content
Expand Down

0 comments on commit d20e780

Please sign in to comment.