2023-03-12 12:02:01 +08:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
from orm import db_session, engine, Base, ins
|
2023-03-13 19:47:25 +08:00
|
|
|
from orm import Experiment, Sample, Result
|
2023-03-12 12:02:01 +08:00
|
|
|
from sqlalchemy.exc import OperationalError
|
2023-03-12 22:21:39 +08:00
|
|
|
from sqlalchemy import text
|
2023-03-12 12:02:01 +08:00
|
|
|
import yaml
|
|
|
|
import random
|
2023-03-13 19:47:25 +08:00
|
|
|
import pandas as pd
|
2023-03-12 12:02:01 +08:00
|
|
|
import numpy as np
|
|
|
|
import platform
|
|
|
|
|
|
|
|
|
|
|
|
class ControllerDB:
|
|
|
|
dct_parameter = None
|
|
|
|
is_test: bool = None
|
|
|
|
db_name_prefix: str = None
|
|
|
|
reset_flag: int
|
|
|
|
|
|
|
|
lst_saved_s_id_3: list
|
|
|
|
lst_saved_s_id_1_2: list
|
2023-03-13 19:47:25 +08:00
|
|
|
|
2023-03-12 12:02:01 +08:00
|
|
|
# n_sample_1_2: int
|
|
|
|
|
|
|
|
def __init__(self, prefix, reset_flag=0):
|
|
|
|
with open('conf_experiment.yaml') as yaml_file:
|
|
|
|
dct_conf_experiment = yaml.full_load(yaml_file)
|
|
|
|
self.is_test = prefix == 'test'
|
|
|
|
self.db_name_prefix = prefix
|
2023-03-13 19:47:25 +08:00
|
|
|
dct_para_in_test = dct_conf_experiment[
|
|
|
|
'test'] if self.is_test else dct_conf_experiment['not_test']
|
|
|
|
self.dct_parameter = {
|
|
|
|
'meta_seed': dct_conf_experiment['meta_seed'],
|
|
|
|
**dct_para_in_test
|
|
|
|
}
|
|
|
|
print(self.dct_parameter)
|
2023-03-12 12:02:01 +08:00
|
|
|
self.reset_flag = reset_flag # 0, not reset; 1, reset self; 2, reset all
|
2023-03-13 19:47:25 +08:00
|
|
|
self.lst_saved_s_id = []
|
2023-03-12 12:02:01 +08:00
|
|
|
|
|
|
|
def init_tables(self):
|
|
|
|
self.fill_experiment_table()
|
|
|
|
self.fill_sample_table()
|
|
|
|
|
|
|
|
def fill_experiment_table(self):
|
2023-03-13 19:47:25 +08:00
|
|
|
Firm = pd.read_csv("Firm_amended.csv")
|
|
|
|
Firm['Code'] = Firm['Code'].astype('string')
|
|
|
|
Firm.fillna(0, inplace=True)
|
|
|
|
list_dct = []
|
2023-03-14 11:39:11 +08:00
|
|
|
for _, row in Firm.iterrows():
|
|
|
|
code = row['Code']
|
|
|
|
row = row['1':]
|
|
|
|
for product_code in row.index[row == 1].to_list():
|
|
|
|
dct = {code: [product_code]}
|
|
|
|
list_dct.append(dct)
|
|
|
|
break
|
|
|
|
break
|
|
|
|
# list_dct = [{'140': ['1.4.5.1']}]
|
2023-03-13 19:47:25 +08:00
|
|
|
for idx_exp, dct in enumerate(list_dct):
|
|
|
|
self.add_experiment_1(idx_exp, self.dct_parameter['n_max_trial'],
|
|
|
|
dct)
|
|
|
|
print(f'Inserted experiment for exp {idx_exp}!')
|
|
|
|
|
|
|
|
def add_experiment_1(self, idx_exp, n_max_trial,
|
|
|
|
dct_list_init_remove_firm_prod):
|
|
|
|
e = Experiment(
|
|
|
|
idx_exp=idx_exp,
|
|
|
|
n_sample=int(self.dct_parameter['n_sample']),
|
|
|
|
n_iter=int(self.dct_parameter['n_iter']),
|
|
|
|
n_max_trial=n_max_trial,
|
|
|
|
dct_list_init_remove_firm_prod=dct_list_init_remove_firm_prod)
|
|
|
|
db_session.add(e)
|
2023-03-12 12:02:01 +08:00
|
|
|
db_session.commit()
|
|
|
|
|
|
|
|
def fill_sample_table(self):
|
|
|
|
rng = random.Random(self.dct_parameter['meta_seed'])
|
2023-03-13 19:47:25 +08:00
|
|
|
lst_seed = [
|
|
|
|
rng.getrandbits(32)
|
|
|
|
for _ in range(int(self.dct_parameter['n_sample']))
|
|
|
|
]
|
2023-03-12 12:02:01 +08:00
|
|
|
lst_exp = db_session.query(Experiment).all()
|
|
|
|
lst_sample = []
|
|
|
|
for experiment in lst_exp:
|
|
|
|
for idx_sample in range(int(experiment.n_sample)):
|
|
|
|
s = Sample(e_id=experiment.id,
|
2023-03-13 19:47:25 +08:00
|
|
|
idx_sample=idx_sample + 1,
|
2023-03-12 12:02:01 +08:00
|
|
|
seed=lst_seed[idx_sample],
|
|
|
|
is_done_flag=-1)
|
|
|
|
lst_sample.append(s)
|
|
|
|
db_session.bulk_save_objects(lst_sample)
|
|
|
|
db_session.commit()
|
|
|
|
print(f'Inserted {len(lst_sample)} samples!')
|
|
|
|
|
|
|
|
def reset_db(self, force_drop=False):
|
|
|
|
# first, check if tables exist
|
2023-03-13 19:47:25 +08:00
|
|
|
lst_table_obj = [
|
|
|
|
Base.metadata.tables[str_table]
|
|
|
|
for str_table in ins.get_table_names()
|
|
|
|
if str_table.startswith(self.db_name_prefix)
|
|
|
|
]
|
2023-03-12 12:02:01 +08:00
|
|
|
is_exist = len(lst_table_obj) > 0
|
|
|
|
if force_drop:
|
|
|
|
while is_exist:
|
|
|
|
a_table = random.choice(lst_table_obj)
|
|
|
|
try:
|
|
|
|
Base.metadata.drop_all(bind=engine, tables=[a_table])
|
|
|
|
except KeyError:
|
|
|
|
pass
|
|
|
|
except OperationalError:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
lst_table_obj.remove(a_table)
|
2023-03-13 19:47:25 +08:00
|
|
|
print(
|
|
|
|
f"Table {a_table.name} is dropped for exp: {self.db_name_prefix}!!!"
|
|
|
|
)
|
2023-03-12 12:02:01 +08:00
|
|
|
finally:
|
|
|
|
is_exist = len(lst_table_obj) > 0
|
|
|
|
|
|
|
|
if is_exist:
|
2023-03-13 19:47:25 +08:00
|
|
|
print(
|
|
|
|
f"All tables exist. No need to reset for exp: {self.db_name_prefix}."
|
|
|
|
)
|
2023-03-12 12:02:01 +08:00
|
|
|
# change the is_done_flag from 0 to -1, to rerun the in-finished tasks
|
|
|
|
if self.reset_flag > 0:
|
|
|
|
if self.reset_flag == 2:
|
2023-03-13 19:47:25 +08:00
|
|
|
sample = db_session.query(Sample).filter(
|
|
|
|
Sample.is_done_flag == 0)
|
2023-03-12 12:02:01 +08:00
|
|
|
elif self.reset_flag == 1:
|
2023-03-13 19:47:25 +08:00
|
|
|
sample = db_session.query(Sample).filter(
|
|
|
|
Sample.is_done_flag == 0,
|
|
|
|
Sample.computer_name == platform.node())
|
2023-03-12 12:02:01 +08:00
|
|
|
else:
|
|
|
|
raise ValueError('Wrong reset flag')
|
2023-03-13 19:47:25 +08:00
|
|
|
if sample.count() > 0:
|
|
|
|
for s in sample:
|
|
|
|
qry_result = db_session.query(Result).filter_by(
|
|
|
|
s_id=s.id)
|
|
|
|
if qry_result.count() > 0:
|
|
|
|
db_session.query(Result).filter(s_id = s.id).delete()
|
|
|
|
db_session.commit()
|
|
|
|
s.is_done_flag = -1
|
2023-03-12 12:02:01 +08:00
|
|
|
db_session.commit()
|
2023-03-13 19:47:25 +08:00
|
|
|
print(f"Reset the sample id {s.id} flag from 0 to -1")
|
2023-03-12 12:02:01 +08:00
|
|
|
else:
|
2023-03-12 22:21:39 +08:00
|
|
|
Base.metadata.create_all(bind=engine)
|
2023-03-12 12:02:01 +08:00
|
|
|
self.init_tables()
|
2023-03-13 19:47:25 +08:00
|
|
|
print(
|
|
|
|
f"All tables are just created and initialized for exp: {self.db_name_prefix}."
|
|
|
|
)
|
2023-03-12 12:02:01 +08:00
|
|
|
|
|
|
|
def prepare_list_sample(self):
|
2023-03-13 19:47:25 +08:00
|
|
|
res = db_session.execute(
|
|
|
|
text(f'''SELECT count(*) FROM {self.db_name_prefix}_sample s,
|
|
|
|
{self.db_name_prefix}_experiment e WHERE s.e_id=e.id '''
|
|
|
|
)).scalar()
|
|
|
|
n_sample = 0 if res is None else res
|
|
|
|
print(f'There are a total of {n_sample} samples.')
|
|
|
|
res = db_session.execute(
|
|
|
|
text(
|
|
|
|
f'SELECT id FROM {self.db_name_prefix}_sample WHERE is_done_flag = -1'
|
|
|
|
))
|
2023-03-12 12:02:01 +08:00
|
|
|
for row in res:
|
|
|
|
s_id = row[0]
|
2023-03-13 19:47:25 +08:00
|
|
|
self.lst_saved_s_id.append(s_id)
|
2023-03-12 12:02:01 +08:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def select_random_sample(lst_s_id):
|
|
|
|
while 1:
|
|
|
|
if len(lst_s_id) == 0:
|
|
|
|
return None
|
|
|
|
s_id = random.choice(lst_s_id)
|
|
|
|
lst_s_id.remove(s_id)
|
2023-03-13 19:47:25 +08:00
|
|
|
res = db_session.query(Sample).filter(Sample.id == int(s_id),
|
|
|
|
Sample.is_done_flag == -1)
|
2023-03-12 12:02:01 +08:00
|
|
|
if res.count() == 1:
|
|
|
|
return res[0]
|
|
|
|
|
|
|
|
def fetch_a_sample(self, s_id=None):
|
|
|
|
if s_id is not None:
|
|
|
|
res = db_session.query(Sample).filter(Sample.id == int(s_id))
|
|
|
|
if res.count() == 0:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
return res[0]
|
|
|
|
|
2023-03-13 19:47:25 +08:00
|
|
|
sample = self.select_random_sample(self.lst_saved_s_id)
|
2023-03-12 12:02:01 +08:00
|
|
|
if sample is not None:
|
|
|
|
return sample
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def lock_the_sample(sample: Sample):
|
|
|
|
sample.is_done_flag, sample.computer_name = 0, platform.node()
|
|
|
|
db_session.commit()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
# pprint.pprint(dct_exp_config)
|
|
|
|
# pprint.pprint(dct_conf_problem)
|
|
|
|
db = ControllerDB('first')
|
2023-03-13 19:47:25 +08:00
|
|
|
ratio = db_session.execute(
|
|
|
|
'SELECT COUNT(*) / 332750 FROM first_sample s WHERE s.is_done_flag = 1'
|
|
|
|
).scalar()
|
2023-03-12 12:02:01 +08:00
|
|
|
print(ratio)
|
|
|
|
# db.fill_experiment_table()
|
|
|
|
# print(db.dct_parameter)
|
|
|
|
# db.init_tables()
|
|
|
|
# db.fill_sample_table()
|
|
|
|
# pprint.pprint(dct_conf_exp)
|
|
|
|
# db.update_bi()
|
|
|
|
# db.reset_db(force_drop=True)
|
|
|
|
# db.prepare_list_sample()
|
|
|
|
#
|
|
|
|
# for i in range(1000):
|
|
|
|
# if i % 10 == 0:
|
|
|
|
# print(i)
|
|
|
|
# print(len(db.lst_saved_s_id_1_2), len(db.lst_saved_s_id_3))
|
|
|
|
# r = db.fetch_a_sample()
|
|
|
|
# if i % 10 == 0:
|
|
|
|
# print(len(db.lst_saved_s_id_1_2), len(db.lst_saved_s_id_3))
|
|
|
|
# print(r, r.experiment.idx_exp)
|
|
|
|
# if i == 400:
|
|
|
|
# print()
|
|
|
|
# pass
|