# -*- coding: utf-8 -*-
import sys
import numpy as np
import os
import shutil
import datetime
import random as rnd
import yaml
import io
# MIE CLASSI
import lib.utilities as ut
import lib.envs.environmentBookIA as book_env_BookIA
from lib.simulation import Experiment
from lib.agents.AgentDDQN import DoubleDQLearningAgent
import lib.tabella as tab
if "../" not in sys.path:
sys.path.append("../")
def apprendi(file_config='parametri.yaml'):
global env
global agent
global experiment
global state_list
tempo_scaduto = False
param = ut.carica_parametri(file_config)
environment_type = param['environment']
tag = param['tag']
config_agente = param['parametri_personalizzati']
num_epoche = param['epoche']
agente = param['agente']
t_stop = param['stop_temporale']
stop_temporale = datetime.datetime(t_stop[0], t_stop[1], t_stop[2], t_stop[3], t_stop[4])
log_path = 'Output\\' + environment_type + '-' + agente + tag + '\\'
if not os.path.exists(log_path):
os.makedirs(log_path)
with io.open(log_path + 'Parametri-' + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") + '.yaml', 'w',
encoding='utf8') as outfile:
yaml.dump(param, outfile, default_flow_style=False, allow_unicode=True)
cartella_ordini = param['training_set_ordini']
training_set = os.listdir(cartella_ordini)
cartella_eseguiti = param['training_set_eseguiti']
tot_round = param['tot_round'] # per epoca
num_round = int(tot_round / len(training_set))
ut.timestamp()
da_stampare = []
storico_performance = dict()
file_training_set = open('Output\\' + environment_type + '\\TrainingSet.txt', 'w')
with open('LOG\\log_agente.txt', 'w') as f:
f.write('')
prima_epoca = True
tabelle_elaborate = []
ordine_file = []
for i, file in enumerate(training_set):
print(file)
file_training_set.write(file + '\n')
tab_stati = tab.TabellaStati(cartella_ordini + '\\' + file, cartella_eseguiti, file_config)
tabelle_elaborate.append(tab_stati)
ordine_file.append(i)
totale_step = 0
for epoca in range(0, num_epoche):
step_epoca = 0
if tempo_scaduto is True:
print('APPRENDIMENTO INTERROTTO')
break
print('Epoca: #', epoca, '\n')
rnd.shuffle(training_set)
rnd.shuffle(ordine_file)
ut.timestamp()
for i, file in enumerate(training_set):
if datetime.datetime.now() > stop_temporale:
tempo_scaduto = True
print('TEMPO SCADUTO')
break
tab_stati = tabelle_elaborate[ordine_file[i]]
if environment_type == 'RewardMidAgentSellerFixedStop':
env = book_env_BookIA.Seller(file_config, tab_stati)
elif environment_type == 'RewardMidAgentBuyerFixedStop':
env = book_env_BookIA.Buyer(file_config, tab_stati)
if prima_epoca and i == 0:
if config_agente is None:
config_agente = 'lib\\agents\\AgentDDQN_param.yaml'
agent = DoubleDQLearningAgent(config_agente, list(range(env.action_space.n)),
env.state_space_dim, path=log_path)
state_list = ut.leggi_parametro('stati_da_loggare', config_agente)
if env.state_space_dim == len(state_list[0]):
print("\nCurrent initial target QValues:\n")
agent.printerTarget.printValues()
print('File # ', i, ' ', file)
experiment = Experiment(env, agent)
risultati = experiment.run_qlearning(num_round)
print("Last average return: ", risultati, " Last agent epsilon: ", agent.get_epsilon())
step_epoca += experiment.get_step_num()
print('Numero step eseguiti: ', experiment.get_step_num(), "\n")
da_stampare = np.append(da_stampare, risultati)
storico_performance.setdefault(file, risultati)
if prima_epoca:
prima_epoca = False
ordine_file = list(range(len(tabelle_elaborate)))
print("Current target QValues:\n")
if env.state_space_dim == len(state_list[0]):
agent.printerTarget.printValues()
print("Numero step eseguiti nell'epoca: ", step_epoca, "\n")
totale_step += step_epoca
if agente == 'DoubleDeepQLearn':
print("Number of samples stored so far: ", agent.get_number_of_saved_samples())
shutil.copy('LOG\\log_agente.txt', 'LOG\\log_agente-' + agent.nome + '-' +
datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") + '-funct.txt')
with open('LOG\\log_agente.txt', 'w') as f:
f.write('')
experiment.close()
del experiment
agent.backup()
with open('risultati.txt', 'a') as f:
f.write('%s\n' % da_stampare)
with open('sintetici.txt', 'a') as f:
sintetici = [np.mean(da_stampare), np.std(da_stampare), len(da_stampare)]
f.write('%s\n' % sintetici)
print("Numero totale step eseguiti per training: ", totale_step)
file_training_set.close()
ut.timestamp()
if __name__ == '__main__':
if len(sys.argv) == 2:
apprendi(sys.argv[1])
elif len(sys.argv) == 1:
apprendi('parametri.yaml')
print("Using standard configuration file \"parametri.yaml\" ")
else:
print("One parameter only!")