Skip to content
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies = [
"tiktoken>=0.12.0",
"groq>=1.1.1",
"scipy>=1.11",
"fastmemory>=0.4.3",
]

[project.scripts]
Expand Down
182 changes: 182 additions & 0 deletions scripts/authentic_atf_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import os
import time
import json
import re
import string
import pandas as pd
from datasets import load_dataset
import fastmemory
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from huggingface_hub import hf_hub_download

# Ensure required NLTK packages are available
try:
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('averaged_perceptron_tagger_eng', quiet=True)
except Exception as e:
print(f"Warning: NLTK download issues: {e}")

STOP_WORDS = {"this", "that", "these", "those", "when", "where", "which", "what", "there", "their", "after", "before", "will", "have", "with", "from", "assistant", "user"}

def extract_concepts(text):
"""Entity/Concept extraction for topological linking."""
try:
tokens = word_tokenize(text)
tagged = pos_tag(tokens)
nouns = [word.lower() for (word, pos) in tagged if pos.startswith('NN') and word.lower() not in STOP_WORDS]
proper_nouns = [word for (word, pos) in tagged if pos == 'NNP']
return list(set(nouns[:3] + proper_nouns[:2]))
except:
words = text.translate(str.maketrans('', '', string.punctuation)).split()
return [w.lower() for w in words if len(w) > 5 and w.lower() not in STOP_WORDS][:5]

def generate_atfs(segments, conversation_id):
"""Generates ATFs from conversational segments."""
atfs = []
for i, seg in enumerate(segments):
logic_text = seg.strip()
if not logic_text: continue

concepts = extract_concepts(logic_text)
my_id = f"{conversation_id}_{i}"

# Action is based on the role/type
role = "Assistant" if "assistant:" in logic_text.lower() else "User"
action = f"Logic_{role}_{concepts[0].title()}" if concepts else f"Dialogue_{role}_{i}"

# Connections (Edges)
connections = [f"[{conversation_id}]"]
connections.extend([f"[{c}]" for c in concepts])

# Sanitize for Rust
sanitized_logic = logic_text.replace('\\', '\\\\').replace('\"', '\\\"').replace('\n', ' ')

atf = (
f"## [ID: {my_id}]\n"
f"**Action:** {action}\n"
f"**Input:** {{Data}}\n"
f"**Logic:** {sanitized_logic}\n"
f"**Data_Connections:** {', '.join(connections)}\n"
f"**Access:** Open\n"
f"**Events:** Ingest\n\n"
)
atfs.append(atf)
return "".join(atfs)

def run_beam_audit(limit=10):
print("\n🚀 Initiating BEAM Forensic Audit (Mohammadta/BEAM 100K)...")
try:
ds = load_dataset("Mohammadta/BEAM", split="100K")
except Exception as e:
print(f"Error loading BEAM: {e}")
return []

results = []
samples = list(ds)[:limit]

for row in samples:
conv_id = row.get("conversation_id", "unknown")
chat = row.get("chat", [])

# Flatten turns (Mocking AMB _iter_turns)
turns = []
for session in chat:
if isinstance(session, list):
for turn in session:
role = turn.get("role", "unknown").capitalize()
content = turn.get("content", "")
turns.append(f"{role}: {content}")

if not turns: continue

atf_markdown = generate_atfs(turns, conv_id)

start_time = time.time()
json_graph = fastmemory.process_markdown(atf_markdown)
latency = (time.time() - start_time) * 1000

cluster_count = json_graph.count('"block_type"')
results.append({
"Dataset": "BEAM-100K",
"Sample_ID": conv_id,
"Nodes": len(turns),
"Clusters": cluster_count,
"Latency_ms": latency
})
print(f"[BEAM] Processed {conv_id}: {len(turns)} turns -> {cluster_count} clusters in {latency:.2f}ms")

return results

def run_personamem_audit(limit=10):
print("\n🚀 Initiating PersonaMem Forensic Audit (bowen-upenn/PersonaMem)...")
try:
# PersonaMem contexts are in jsonl files in the hub
local_path = hf_hub_download(repo_id="bowen-upenn/PersonaMem", filename="shared_contexts_32k.jsonl", repo_type="dataset")
contexts = []
with open(local_path, "r") as f:
for line in f:
entry = json.loads(line)
ctx_id, turns = next(iter(entry.items()))
contexts.append((ctx_id, turns))
if len(contexts) >= limit: break
except Exception as e:
print(f"Error loading PersonaMem: {e}")
return []

results = []
for ctx_id, turns in contexts:
segments = []
for t in turns:
role = t.get("role", "unknown")
content = t.get("content", "")
segments.append(f"[{role}] {content}")

atf_markdown = generate_atfs(segments, ctx_id)

start_time = time.time()
json_graph = fastmemory.process_markdown(atf_markdown)
latency = (time.time() - start_time) * 1000

cluster_count = json_graph.count('"block_type"')
results.append({
"Dataset": "PersonaMem-32K",
"Sample_ID": ctx_id,
"Nodes": len(turns),
"Clusters": cluster_count,
"Latency_ms": latency
})
print(f"[PersonaMem] Processed {ctx_id}: {len(turns)} segments -> {cluster_count} clusters in {latency:.2f}ms")

return results

def main():
print("--- FASTMEMORY AUTHENTIC BEAM SOTA AUDIT ---")
all_metrics = []

# Run BEAM Audit (The primary correction)
beam_results = run_beam_audit(limit=15)
all_metrics.extend(beam_results)

# Run PersonaMem Audit
pm_results = run_personamem_audit(limit=10)
all_metrics.extend(pm_results)

if all_metrics:
df = pd.DataFrame(all_metrics)
df.to_csv("authentic_fastmemory_metrics.csv", index=False)
print("\n✅ CORRECTED BEAM AUDIT COMPLETE.")
print("-" * 50)
print(f"Total Logic Nodes: {df['Nodes'].sum()}")
print(f"Avg Indexing Latency: {df['Latency_ms'].mean():.2f} ms")
print(f"Total Topological Clusters: {df['Clusters'].sum()}")
print("-" * 50)
print("Final BEAM metrics saved to: authentic_fastmemory_metrics.csv")
else:
print("\n❌ Audit failed. Check logs.")

if __name__ == "__main__":
main()
26 changes: 26 additions & 0 deletions scripts/authentic_fastmemory_metrics.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Dataset,Sample_ID,Nodes,Clusters,Latency_ms
BEAM-100K,1,188,436,622.4467754364014
BEAM-100K,2,200,397,849.2159843444824
BEAM-100K,3,194,419,703.1517028808594
BEAM-100K,4,212,332,1312.8509521484375
BEAM-100K,5,238,338,1070.5039501190186
BEAM-100K,6,258,506,1182.3718547821045
BEAM-100K,7,260,476,1068.4700012207031
BEAM-100K,8,268,457,1868.4842586517334
BEAM-100K,9,270,485,1223.215103149414
BEAM-100K,10,344,567,1160.5098247528076
BEAM-100K,11,388,549,1108.1349849700928
BEAM-100K,12,392,677,1368.7989711761475
BEAM-100K,13,310,505,1391.3640975952148
BEAM-100K,14,268,497,1322.463035583496
BEAM-100K,15,272,453,1111.0010147094727
PersonaMem-32K,e898d03fec683b1cabf29f57287ff66f8a31842543ecef44b56766844c1c1301,183,305,1889.7819519042969
PersonaMem-32K,1b0b224347aea71887603d63880b90c8d37b1f58073098513b839209034c2f3b,183,289,1499.0079402923584
PersonaMem-32K,ae5c969c32dafa28ff3f884495f4655de811b061007afaf3307d7b858ff7cfae,171,301,1661.374807357788
PersonaMem-32K,5c8fb86fe80da5b203e7926407dc3a35f763d32e5891082aaae632210734b5a5,170,295,921.4968681335449
PersonaMem-32K,aa95cf5880d83a73bb98512a07a64fb873fb24d9dac2bb1862f7c00008632260,160,269,1339.721918106079
PersonaMem-32K,06f12a0c4085193a32bd1658c5f4b8a5e6e7e1f5221d7169f296130c8d69480d,195,310,1159.999132156372
PersonaMem-32K,8c336cac503ae78c7fe58a6aef0965963041cd579d1a885db4709293b1853829,213,340,905.2162170410156
PersonaMem-32K,ad5320ec1416e1e17665cee3d166d459ee29357af2a08f63131443bacc85931a,212,338,1662.8828048706055
PersonaMem-32K,a9f46aff0bd886c1e45562554ffc4d67fcee974f8cdcd41611e465971692a6f5,168,266,1663.3059978485107
PersonaMem-32K,cf26537544446b92554000ab50a3c44983a1e0b3de21e9923099792f103d84ef,161,264,1048.8677024841309
74 changes: 74 additions & 0 deletions scripts/verify_fastmemory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import os
import sys
import json
import time

# ZERO DEPENDENCY MOCK MODELS
class Document:
def __init__(self, id, content, user_id):
self.id = id
self.content = content
self.user_id = user_id

class Query:
def __init__(self, query):
self.query = query

try:
import fastmemory
except ImportError:
print("!!! Error: 'fastmemory' package not found.")
print("Please run: pip install fastmemory>=0.4.3")
sys.exit(1)

def run_isolated_audit():
print("--- [FORENSIC MODE] FastMemory Engine Audit ---")

# 0. Engine Health Check
print("[STEP 0] Checking Engine Health...")
test_input = "The quick brown fox jumps over the lazy dog. Cats are independent animals."
try:
res = fastmemory.process_markdown(test_input)
if res == "[]":
print("FAILURE: Engine returned empty graph.")
print("DIAGNOSIS: The embedded rust-louvain binary may not be compatible with your platform.")
print(f" Platform: {sys.platform}, Python: {sys.version}")
print("ACTION: pip install --force-reinstall fastmemory>=0.4.3")
return
print(f"SUCCESS: Engine is responsive (output: {len(res)} chars)")
except Exception as e:
print(f"CRASH: Engine failed: {e}")
return

# 1. Forensic Payload
docs = [
Document("doc_company", "FastBuilder.AI is a leader in Sovereign AI.", "audit_user"),
Document("doc_tech", "Our topological memory graphs achieve high precision on BEAM.", "audit_user"),
Document("doc_login", "The master vault code is 1234-AX-99.", "audit_user")
]

segments = [doc.content for doc in docs]
full_text = " ".join(segments)

print("\n[STEP 1] Running Engine Indexing...")
try:
json_graph = fastmemory.process_markdown(full_text)
if json_graph == "[]":
print("FAILURE: Engine returned empty graph [].")
return
print(f"SUCCESS: Graph generated (len: {len(json_graph)})")
except Exception as e:
print(f"CRASH: Engine failed to process input: {e}")
return

# 2. Content Recovery Check
print("\n[STEP 2] Verifying Topology Structure...")
try:
graph = json.loads(json_graph)
total_nodes = sum(len(block.get("nodes", [])) for block in graph)
print(f"SUCCESS: {len(graph)} clusters, {total_nodes} total nodes")
except json.JSONDecodeError:
print("FAILURE: Engine returned invalid JSON")

if __name__ == "__main__":
run_isolated_audit()
2 changes: 2 additions & 0 deletions src/memory_bench/memory/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .mem0_cloud import Mem0CloudMemoryProvider
from .hybrid_search import HybridSearchMemoryProvider
from .supermemory import SupermemoryMemoryProvider
from .fastmemory import FastMemoryProvider

REGISTRY: dict[str, type[MemoryProvider]] = {
"bm25": BM25MemoryProvider,
Expand All @@ -22,6 +23,7 @@
"mem0-cloud": Mem0CloudMemoryProvider,
"qdrant": HybridSearchMemoryProvider,
"supermemory": SupermemoryMemoryProvider,
"fastmemory": FastMemoryProvider,
}


Expand Down
2 changes: 2 additions & 0 deletions src/memory_bench/memory/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations
from __future__ import annotations
import asyncio
from abc import ABC, abstractmethod
from pathlib import Path
Expand Down
Loading