Learning Agent Architecture
Core Learning Loop:
Experience → Learning → Knowledge Update → Improved Performance
Experience → Learning → Knowledge Update → Improved Performance
Performance Element
Executes actions based on current knowledge and policies
Learning Element
Analyzes feedback and updates knowledge base
Critic
Evaluates performance and provides feedback signals
Problem Generator
Suggests exploratory actions for learning opportunities
Knowledge Base
Stores learned patterns, rules, and experiences
Learning Agent Implementation
class LearningAgent:
def __init__(self, learning_rate=0.1, exploration_rate=0.1):
self.knowledge_base = {}
self.experience_buffer = []
self.learning_rate = learning_rate
self.exploration_rate = exploration_rate
self.performance_history = []
def act(self, state):
"""Choose action based on current knowledge and exploration"""
if random.random() < self.exploration_rate:
# Exploration: try new actions
action = self.explore_action(state)
else:
# Exploitation: use best known action
action = self.best_action(state)
return action
def learn_from_experience(self, state, action, reward, next_state):
"""Update knowledge based on experience"""
experience = {
'state': state,
'action': action,
'reward': reward,
'next_state': next_state,
'timestamp': time.time()
}
self.experience_buffer.append(experience)
# Update knowledge using various learning methods
self.update_q_values(state, action, reward, next_state)
self.update_patterns(experience)
self.evaluate_performance()
# Periodic knowledge consolidation
if len(self.experience_buffer) % 100 == 0:
self.consolidate_knowledge()
def update_q_values(self, state, action, reward, next_state):
"""Q-learning update rule"""
state_key = self.state_to_key(state)
if state_key not in self.knowledge_base:
self.knowledge_base[state_key] = {}
current_q = self.knowledge_base[state_key].get(action, 0)
max_next_q = max(
self.knowledge_base.get(
self.state_to_key(next_state), {}
).values(),
default=0
)
# Q-learning formula
new_q = current_q + self.learning_rate * (
reward + 0.9 * max_next_q - current_q
)
self.knowledge_base[state_key][action] = new_q
def update_patterns(self, experience):
"""Learn patterns from experience"""
# Pattern recognition and rule extraction
patterns = self.extract_patterns(experience)
for pattern in patterns:
self.add_to_knowledge_base(pattern)
def consolidate_knowledge(self):
"""Consolidate and optimize learned knowledge"""
# Remove outdated experiences
cutoff_time = time.time() - 3600 # 1 hour
self.experience_buffer = [
exp for exp in self.experience_buffer
if exp['timestamp'] > cutoff_time
]
# Optimize knowledge representation
self.optimize_knowledge_base()
def adapt_learning_rate(self):
"""Dynamically adjust learning parameters"""
recent_performance = self.performance_history[-10:]
if len(recent_performance) >= 10:
if self.is_improving(recent_performance):
self.exploration_rate *= 0.95 # Reduce exploration
else:
self.exploration_rate *= 1.05 # Increase exploration
# Decay learning rate over time
self.learning_rate *= 0.999
# Example: Adaptive web scraping agent
class AdaptiveScrapingAgent(LearningAgent):
def __init__(self):
super().__init__()
self.success_patterns = {}
self.failure_patterns = {}
def scrape_page(self, url):
state = self.analyze_page_structure(url)
action = self.act(state)
try:
result = self.execute_scraping_action(action, url)
reward = self.calculate_reward(result)
self.learn_from_experience(state, action, reward, None)
return result
except Exception as e:
self.learn_from_experience(state, action, -1, None)
return None