Module molcrawl.experiment_tracker
Experiment Tracking System A module that centrally manages the execution status, results, and logs of each process
Sub-modules
molcrawl.experiment_tracker.api-
FastAPI-based experiment management API Works as a backend for molcrawl-web
molcrawl.experiment_tracker.database-
Experiment management database - SQLite based
molcrawl.experiment_tracker.helpers-
Experiment tracking helper functions Decorators and context managers that can be easily integrated into existing scripts
molcrawl.experiment_tracker.models-
Experiment management data model definition
molcrawl.experiment_tracker.tracker-
Experiment Tracker - Main Interface Simple API to manage experiments from each script
Classes
class DatasetType (value, names=None, *, module=None, qualname=None, type=None, start=1)-
Expand source code
class DatasetType(str, Enum): """Dataset type""" COMPOUNDS = "compounds" GENOME_SEQUENCE = "genome_sequence" MOLECULE_NAT_LANG = "molecule_related_natural_language" PROTEIN_SEQUENCE = "protein_sequence" RNA = "rna" PROTEINGYM = "proteingym" CLINVAR = "clinvar" OMIM = "omim" COSMIC = "cosmic" OTHER = "other"Dataset type
Ancestors
- builtins.str
- enum.Enum
Class variables
var CLINVARvar COMPOUNDSvar COSMICvar GENOME_SEQUENCEvar MOLECULE_NAT_LANGvar OMIMvar OTHERvar PROTEINGYMvar PROTEIN_SEQUENCEvar RNA
class Experiment (experiment_id: str,
experiment_name: str,
experiment_type: ExperimentType,
model_type: ModelType,
dataset_type: DatasetType,
status: ExperimentStatus,
created_at: datetime.datetime,
started_at: datetime.datetime | None = None,
completed_at: datetime.datetime | None = None,
total_duration_seconds: float | None = None,
config_path: str | None = None,
config: Dict[str, Any] = <factory>,
results_dir: str | None = None,
results: Dict[str, Any] = <factory>,
metrics: Dict[str, float] = <factory>,
steps: List[ExperimentStep] = <factory>,
logs: List[ExperimentLog] = <factory>,
tags: List[str] = <factory>,
notes: str = '',
environment: Dict[str, Any] = <factory>)-
Expand source code
@dataclass class Experiment: """Overall information about the experiment""" experiment_id: str experiment_name: str experiment_type: ExperimentType model_type: ModelType dataset_type: DatasetType status: ExperimentStatus created_at: datetime started_at: Optional[datetime] = None completed_at: Optional[datetime] = None total_duration_seconds: Optional[float] = None # Setting information config_path: Optional[str] = None config: Dict[str, Any] = field(default_factory=dict) # Results information results_dir: Optional[str] = None results: Dict[str, Any] = field(default_factory=dict) metrics: Dict[str, float] = field(default_factory=dict) # steps and logs steps: List[ExperimentStep] = field(default_factory=list) logs: List[ExperimentLog] = field(default_factory=list) # metadata tags: List[str] = field(default_factory=list) notes: str = "" environment: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: return { "experiment_id": self.experiment_id, "experiment_name": self.experiment_name, "experiment_type": self.experiment_type.value, "model_type": self.model_type.value, "dataset_type": self.dataset_type.value, "status": self.status.value, "created_at": self.created_at.isoformat(), "started_at": self.started_at.isoformat() if self.started_at else None, "completed_at": self.completed_at.isoformat() if self.completed_at else None, "total_duration_seconds": self.total_duration_seconds, "config_path": self.config_path, "config": self.config, "results_dir": self.results_dir, "results": self.results, "metrics": self.metrics, "steps": [step.to_dict() for step in self.steps], "logs": [log.to_dict() for log in self.logs], "tags": self.tags, "notes": self.notes, "environment": self.environment, } @classmethod def from_dict(cls, data: Dict[str, Any]) -> "Experiment": data["experiment_type"] = ExperimentType(data["experiment_type"]) data["model_type"] = ModelType(data["model_type"]) data["dataset_type"] = DatasetType(data["dataset_type"]) data["status"] = ExperimentStatus(data["status"]) data["created_at"] = datetime.fromisoformat(data["created_at"]) if data.get("started_at"): data["started_at"] = datetime.fromisoformat(data["started_at"]) if data.get("completed_at"): data["completed_at"] = datetime.fromisoformat(data["completed_at"]) data["steps"] = [ExperimentStep.from_dict(step) for step in data.get("steps", [])] data["logs"] = [ExperimentLog.from_dict(log) for log in data.get("logs", [])] return cls(**data) def to_json(self) -> str: """Convert to JSON string""" return json.dumps(self.to_dict(), indent=2, ensure_ascii=False) @classmethod def from_json(cls, json_str: str) -> "Experiment": """Restore from JSON string""" return cls.from_dict(json.loads(json_str))Overall information about the experiment
Static methods
def from_dict(data: Dict[str, Any]) ‑> Experimentdef from_json(json_str: str) ‑> Experiment-
Restore from JSON string
Instance variables
var completed_at : datetime.datetime | Nonevar config : Dict[str, Any]var config_path : str | Nonevar created_at : datetime.datetimevar dataset_type : DatasetTypevar environment : Dict[str, Any]var experiment_id : strvar experiment_name : strvar experiment_type : ExperimentTypevar logs : List[ExperimentLog]var metrics : Dict[str, float]var model_type : ModelTypevar notes : strvar results : Dict[str, Any]var results_dir : str | Nonevar started_at : datetime.datetime | Nonevar status : ExperimentStatusvar steps : List[ExperimentStep]var total_duration_seconds : float | None
Methods
def to_dict(self) ‑> Dict[str, Any]-
Expand source code
def to_dict(self) -> Dict[str, Any]: return { "experiment_id": self.experiment_id, "experiment_name": self.experiment_name, "experiment_type": self.experiment_type.value, "model_type": self.model_type.value, "dataset_type": self.dataset_type.value, "status": self.status.value, "created_at": self.created_at.isoformat(), "started_at": self.started_at.isoformat() if self.started_at else None, "completed_at": self.completed_at.isoformat() if self.completed_at else None, "total_duration_seconds": self.total_duration_seconds, "config_path": self.config_path, "config": self.config, "results_dir": self.results_dir, "results": self.results, "metrics": self.metrics, "steps": [step.to_dict() for step in self.steps], "logs": [log.to_dict() for log in self.logs], "tags": self.tags, "notes": self.notes, "environment": self.environment, } def to_json(self) ‑> str-
Expand source code
def to_json(self) -> str: """Convert to JSON string""" return json.dumps(self.to_dict(), indent=2, ensure_ascii=False)Convert to JSON string
class ExperimentLog (timestamp: datetime.datetime, level: str, message: str, source: str | None = None)-
Expand source code
@dataclass class ExperimentLog: """Experiment Log Entry""" timestamp: datetime level: str # INFO, WARNING, ERROR, DEBUG message: str source: Optional[str] = None # Log source (file name, function name, etc.) def to_dict(self) -> Dict[str, Any]: return { "timestamp": self.timestamp.isoformat(), "level": self.level, "message": self.message, "source": self.source, } @classmethod def from_dict(cls, data: Dict[str, Any]) -> "ExperimentLog": data["timestamp"] = datetime.fromisoformat(data["timestamp"]) return cls(**data)Experiment Log Entry
Static methods
def from_dict(data: Dict[str, Any]) ‑> ExperimentLog
Instance variables
var level : strvar message : strvar source : str | Nonevar timestamp : datetime.datetime
Methods
def to_dict(self) ‑> Dict[str, Any]-
Expand source code
def to_dict(self) -> Dict[str, Any]: return { "timestamp": self.timestamp.isoformat(), "level": self.level, "message": self.message, "source": self.source, }
class ExperimentStatus (value, names=None, *, module=None, qualname=None, type=None, start=1)-
Expand source code
class ExperimentStatus(str, Enum): """Experiment Status""" PENDING = "pending" # Not executed RUNNING = "running" # Running COMPLETED = "completed" # Completed FAILED = "failed" # failed CANCELLED = "cancelled" # cancel SKIPPED = "skipped" # skippedExperiment Status
Ancestors
- builtins.str
- enum.Enum
Class variables
var CANCELLEDvar COMPLETEDvar FAILEDvar PENDINGvar RUNNINGvar SKIPPED
class ExperimentStep (step_id: str,
step_name: str,
status: ExperimentStatus,
start_time: datetime.datetime | None = None,
end_time: datetime.datetime | None = None,
duration_seconds: float | None = None,
command: str | None = None,
output_path: str | None = None,
error_message: str | None = None,
metadata: Dict[str, Any] = <factory>)-
Expand source code
@dataclass class ExperimentStep: """Each step of the experiment""" step_id: str step_name: str status: ExperimentStatus start_time: Optional[datetime] = None end_time: Optional[datetime] = None duration_seconds: Optional[float] = None command: Optional[str] = None output_path: Optional[str] = None error_message: Optional[str] = None metadata: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: data = asdict(self) data["status"] = self.status.value data["start_time"] = self.start_time.isoformat() if self.start_time else None data["end_time"] = self.end_time.isoformat() if self.end_time else None return data @classmethod def from_dict(cls, data: Dict[str, Any]) -> "ExperimentStep": data["status"] = ExperimentStatus(data["status"]) if data.get("start_time"): data["start_time"] = datetime.fromisoformat(data["start_time"]) if data.get("end_time"): data["end_time"] = datetime.fromisoformat(data["end_time"]) return cls(**data)Each step of the experiment
Static methods
def from_dict(data: Dict[str, Any]) ‑> ExperimentStep
Instance variables
var command : str | Nonevar duration_seconds : float | Nonevar end_time : datetime.datetime | Nonevar error_message : str | Nonevar metadata : Dict[str, Any]var output_path : str | Nonevar start_time : datetime.datetime | Nonevar status : ExperimentStatusvar step_id : strvar step_name : str
Methods
def to_dict(self) ‑> Dict[str, Any]-
Expand source code
def to_dict(self) -> Dict[str, Any]: data = asdict(self) data["status"] = self.status.value data["start_time"] = self.start_time.isoformat() if self.start_time else None data["end_time"] = self.end_time.isoformat() if self.end_time else None return data
class ExperimentTracker (db_path: str | None = None)-
Expand source code
class ExperimentTracker: """ experiment tracker Usage: tracker = ExperimentTracker() exp_id = tracker.start_experiment( name="GPT2 ProteinGym Training", experiment_type=ExperimentType.TRAINING, model_type=ModelType.GPT2, dataset_type=DatasetType.PROTEINGYM ) step_id = tracker.start_step(exp_id, "data_loading", "Load dataset") # ... process ... tracker.complete_step(exp_id, step_id) tracker.complete_experiment(exp_id, results={"accuracy": 0.95}) """ def __init__(self, db_path: Optional[str] = None): """ Args: db_path: Database file path. If not specified, get from environment variable """ if db_path is None: # defaultdatabase path of project_root = Path(__file__).parent.parent.parent db_dir = project_root / "experiment_data" db_dir.mkdir(exist_ok=True) db_path = str(db_dir / "experiments.db") self.db = ExperimentDatabase(db_path) self.current_experiment_id: Optional[str] = None def start_experiment( self, name: str, experiment_type: ExperimentType, model_type: ModelType, dataset_type: DatasetType, config: Optional[Dict[str, Any]] = None, config_path: Optional[str] = None, tags: Optional[List[str]] = None, notes: str = "", ) -> str: """ Start experiment Args: name: Experiment name experiment_type: Experiment type model_type: Model type dataset_type: Dataset type config: configuration information config_path: configuration file path tags: tag list notes: notes Returns: Experiment ID """ experiment_id = ( f"{model_type.value}_{dataset_type.value}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}" ) # Get environment information environment = { "hostname": os.environ.get("HOSTNAME", "unknown"), "user": os.environ.get("USER", "unknown"), "python_version": os.environ.get("CONDA_PYTHON_EXE", "unknown"), "conda_env": os.environ.get("CONDA_DEFAULT_ENV", "unknown"), "learning_source_dir": os.environ.get("LEARNING_SOURCE_DIR", "unknown"), "pwd": os.getcwd(), } experiment = Experiment( experiment_id=experiment_id, experiment_name=name, experiment_type=experiment_type, model_type=model_type, dataset_type=dataset_type, status=ExperimentStatus.RUNNING, created_at=datetime.now(), started_at=datetime.now(), config=config or {}, config_path=config_path, tags=tags or [], notes=notes, environment=environment, ) self.db.save_experiment(experiment) self.current_experiment_id = experiment_id self.log(experiment_id, "INFO", f"Experiment started: {name}") return experiment_id def start_step( self, experiment_id: str, step_id: str, step_name: str, command: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> str: """ start step Args: experiment_id: Experiment ID step_id: Step ID step_name: Step name command: execution command metadata: metadata Returns: Step ID """ experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") step = ExperimentStep( step_id=step_id, step_name=step_name, status=ExperimentStatus.RUNNING, start_time=datetime.now(), command=command, metadata=metadata or {}, ) experiment.steps.append(step) self.db.save_experiment(experiment) self.log(experiment_id, "INFO", f"Step started: {step_name}") return step_id def complete_step( self, experiment_id: str, step_id: str, output_path: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> None: """ complete the step Args: experiment_id: Experiment ID step_id: Step ID output_path: output path metadata: additional metadata """ experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") for step in experiment.steps: if step.step_id == step_id: step.status = ExperimentStatus.COMPLETED step.end_time = datetime.now() if step.start_time: step.duration_seconds = (step.end_time - step.start_time).total_seconds() step.output_path = output_path if metadata: step.metadata.update(metadata) break self.db.save_experiment(experiment) self.log(experiment_id, "INFO", f"Step completed: {step_id}") def fail_step(self, experiment_id: str, step_id: str, error_message: str) -> None: """ put a step in a failed state Args: experiment_id: Experiment ID step_id: Step ID error_message: Error message """ experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") for step in experiment.steps: if step.step_id == step_id: step.status = ExperimentStatus.FAILED step.end_time = datetime.now() if step.start_time: step.duration_seconds = (step.end_time - step.start_time).total_seconds() step.error_message = error_message break self.db.save_experiment(experiment) self.log(experiment_id, "ERROR", f"Step failed: {step_id} - {error_message}") def complete_experiment( self, experiment_id: str, results: Optional[Dict[str, Any]] = None, metrics: Optional[Dict[str, float]] = None, results_dir: Optional[str] = None, ) -> None: """ Complete the experiment Args: experiment_id: Experiment ID results: Results information metrics: metrics results_dir: results directory """ experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") experiment.status = ExperimentStatus.COMPLETED experiment.completed_at = datetime.now() if experiment.started_at: experiment.total_duration_seconds = (experiment.completed_at - experiment.started_at).total_seconds() if results: experiment.results = results if metrics: experiment.metrics = metrics if results_dir: experiment.results_dir = results_dir self.db.save_experiment(experiment) self.log(experiment_id, "INFO", "Experiment completed successfully") def fail_experiment(self, experiment_id: str, error_message: str) -> None: """ put the experiment into a failed state Args: experiment_id: Experiment ID error_message: Error message """ experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") experiment.status = ExperimentStatus.FAILED experiment.completed_at = datetime.now() if experiment.started_at: experiment.total_duration_seconds = (experiment.completed_at - experiment.started_at).total_seconds() self.db.save_experiment(experiment) self.log(experiment_id, "ERROR", f"Experiment failed: {error_message}") def log(self, experiment_id: str, level: str, message: str, source: Optional[str] = None) -> None: """ add log Args: experiment_id: Experiment ID level: Log level (INFO, WARNING, ERROR, DEBUG) message: message source: source """ log = ExperimentLog(timestamp=datetime.now(), level=level, message=message, source=source) self.db.add_log(experiment_id, log) def get_experiment(self, experiment_id: str) -> Optional[Experiment]: """Get the experiment""" return self.db.get_experiment(experiment_id) def list_experiments( self, status: Optional[ExperimentStatus] = None, experiment_type: Optional[ExperimentType] = None, model_type: Optional[ModelType] = None, dataset_type: Optional[DatasetType] = None, limit: int = 100, offset: int = 0, ) -> List[Experiment]: """Get experiment list""" return self.db.list_experiments( status=status, experiment_type=experiment_type, model_type=model_type, dataset_type=dataset_type, limit=limit, offset=offset, ) def get_statistics(self) -> Dict[str, Any]: """Get statistics""" return self.db.get_statistics() def export_experiment_json(self, experiment_id: str, output_path: str) -> None: """Export experiment in JSON format""" experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") output_file = Path(output_path) output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(experiment.to_json(), encoding="utf-8")experiment tracker
Usage
tracker = ExperimentTracker() exp_id = tracker.start_experiment( name="GPT2 ProteinGym Training", experiment_type=ExperimentType.TRAINING, model_type=ModelType.GPT2, dataset_type=DatasetType.PROTEINGYM )
step_id = tracker.start_step(exp_id, "data_loading", "Load dataset")
… process …
tracker.complete_step(exp_id, step_id)
tracker.complete_experiment(exp_id, results={"accuracy": 0.95})
Args
db_path- Database file path. If not specified, get from environment variable
Methods
def complete_experiment(self,
experiment_id: str,
results: Dict[str, Any] | None = None,
metrics: Dict[str, float] | None = None,
results_dir: str | None = None) ‑> None-
Expand source code
def complete_experiment( self, experiment_id: str, results: Optional[Dict[str, Any]] = None, metrics: Optional[Dict[str, float]] = None, results_dir: Optional[str] = None, ) -> None: """ Complete the experiment Args: experiment_id: Experiment ID results: Results information metrics: metrics results_dir: results directory """ experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") experiment.status = ExperimentStatus.COMPLETED experiment.completed_at = datetime.now() if experiment.started_at: experiment.total_duration_seconds = (experiment.completed_at - experiment.started_at).total_seconds() if results: experiment.results = results if metrics: experiment.metrics = metrics if results_dir: experiment.results_dir = results_dir self.db.save_experiment(experiment) self.log(experiment_id, "INFO", "Experiment completed successfully")Complete the experiment
Args
experiment_id- Experiment ID
results- Results information
metrics- metrics
results_dir- results directory
def complete_step(self,
experiment_id: str,
step_id: str,
output_path: str | None = None,
metadata: Dict[str, Any] | None = None) ‑> None-
Expand source code
def complete_step( self, experiment_id: str, step_id: str, output_path: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> None: """ complete the step Args: experiment_id: Experiment ID step_id: Step ID output_path: output path metadata: additional metadata """ experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") for step in experiment.steps: if step.step_id == step_id: step.status = ExperimentStatus.COMPLETED step.end_time = datetime.now() if step.start_time: step.duration_seconds = (step.end_time - step.start_time).total_seconds() step.output_path = output_path if metadata: step.metadata.update(metadata) break self.db.save_experiment(experiment) self.log(experiment_id, "INFO", f"Step completed: {step_id}")complete the step
Args
experiment_id- Experiment ID
step_id- Step ID
output_path- output path
metadata- additional metadata
def export_experiment_json(self, experiment_id: str, output_path: str) ‑> None-
Expand source code
def export_experiment_json(self, experiment_id: str, output_path: str) -> None: """Export experiment in JSON format""" experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") output_file = Path(output_path) output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(experiment.to_json(), encoding="utf-8")Export experiment in JSON format
def fail_experiment(self, experiment_id: str, error_message: str) ‑> None-
Expand source code
def fail_experiment(self, experiment_id: str, error_message: str) -> None: """ put the experiment into a failed state Args: experiment_id: Experiment ID error_message: Error message """ experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") experiment.status = ExperimentStatus.FAILED experiment.completed_at = datetime.now() if experiment.started_at: experiment.total_duration_seconds = (experiment.completed_at - experiment.started_at).total_seconds() self.db.save_experiment(experiment) self.log(experiment_id, "ERROR", f"Experiment failed: {error_message}")put the experiment into a failed state
Args
experiment_id- Experiment ID
error_message- Error message
def fail_step(self, experiment_id: str, step_id: str, error_message: str) ‑> None-
Expand source code
def fail_step(self, experiment_id: str, step_id: str, error_message: str) -> None: """ put a step in a failed state Args: experiment_id: Experiment ID step_id: Step ID error_message: Error message """ experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") for step in experiment.steps: if step.step_id == step_id: step.status = ExperimentStatus.FAILED step.end_time = datetime.now() if step.start_time: step.duration_seconds = (step.end_time - step.start_time).total_seconds() step.error_message = error_message break self.db.save_experiment(experiment) self.log(experiment_id, "ERROR", f"Step failed: {step_id} - {error_message}")put a step in a failed state
Args
experiment_id- Experiment ID
step_id- Step ID
error_message- Error message
def get_experiment(self, experiment_id: str) ‑> Experiment | None-
Expand source code
def get_experiment(self, experiment_id: str) -> Optional[Experiment]: """Get the experiment""" return self.db.get_experiment(experiment_id)Get the experiment
def get_statistics(self) ‑> Dict[str, Any]-
Expand source code
def get_statistics(self) -> Dict[str, Any]: """Get statistics""" return self.db.get_statistics()Get statistics
def list_experiments(self,
status: ExperimentStatus | None = None,
experiment_type: ExperimentType | None = None,
model_type: ModelType | None = None,
dataset_type: DatasetType | None = None,
limit: int = 100,
offset: int = 0) ‑> List[Experiment]-
Expand source code
def list_experiments( self, status: Optional[ExperimentStatus] = None, experiment_type: Optional[ExperimentType] = None, model_type: Optional[ModelType] = None, dataset_type: Optional[DatasetType] = None, limit: int = 100, offset: int = 0, ) -> List[Experiment]: """Get experiment list""" return self.db.list_experiments( status=status, experiment_type=experiment_type, model_type=model_type, dataset_type=dataset_type, limit=limit, offset=offset, )Get experiment list
def log(self, experiment_id: str, level: str, message: str, source: str | None = None) ‑> None-
Expand source code
def log(self, experiment_id: str, level: str, message: str, source: Optional[str] = None) -> None: """ add log Args: experiment_id: Experiment ID level: Log level (INFO, WARNING, ERROR, DEBUG) message: message source: source """ log = ExperimentLog(timestamp=datetime.now(), level=level, message=message, source=source) self.db.add_log(experiment_id, log)add log
Args
experiment_id- Experiment ID
level- Log level (INFO, WARNING, ERROR, DEBUG)
message- message
source- source
def start_experiment(self,
name: str,
experiment_type: ExperimentType,
model_type: ModelType,
dataset_type: DatasetType,
config: Dict[str, Any] | None = None,
config_path: str | None = None,
tags: List[str] | None = None,
notes: str = '') ‑> str-
Expand source code
def start_experiment( self, name: str, experiment_type: ExperimentType, model_type: ModelType, dataset_type: DatasetType, config: Optional[Dict[str, Any]] = None, config_path: Optional[str] = None, tags: Optional[List[str]] = None, notes: str = "", ) -> str: """ Start experiment Args: name: Experiment name experiment_type: Experiment type model_type: Model type dataset_type: Dataset type config: configuration information config_path: configuration file path tags: tag list notes: notes Returns: Experiment ID """ experiment_id = ( f"{model_type.value}_{dataset_type.value}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}" ) # Get environment information environment = { "hostname": os.environ.get("HOSTNAME", "unknown"), "user": os.environ.get("USER", "unknown"), "python_version": os.environ.get("CONDA_PYTHON_EXE", "unknown"), "conda_env": os.environ.get("CONDA_DEFAULT_ENV", "unknown"), "learning_source_dir": os.environ.get("LEARNING_SOURCE_DIR", "unknown"), "pwd": os.getcwd(), } experiment = Experiment( experiment_id=experiment_id, experiment_name=name, experiment_type=experiment_type, model_type=model_type, dataset_type=dataset_type, status=ExperimentStatus.RUNNING, created_at=datetime.now(), started_at=datetime.now(), config=config or {}, config_path=config_path, tags=tags or [], notes=notes, environment=environment, ) self.db.save_experiment(experiment) self.current_experiment_id = experiment_id self.log(experiment_id, "INFO", f"Experiment started: {name}") return experiment_idStart experiment
Args
name- Experiment name
experiment_type- Experiment type
model_type- Model type
dataset_type- Dataset type
config- configuration information
config_path- configuration file path
tags- tag list
notes- notes
Returns
Experiment ID
def start_step(self,
experiment_id: str,
step_id: str,
step_name: str,
command: str | None = None,
metadata: Dict[str, Any] | None = None) ‑> str-
Expand source code
def start_step( self, experiment_id: str, step_id: str, step_name: str, command: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> str: """ start step Args: experiment_id: Experiment ID step_id: Step ID step_name: Step name command: execution command metadata: metadata Returns: Step ID """ experiment = self.db.get_experiment(experiment_id) if not experiment: raise ValueError(f"Experiment {experiment_id} not found") step = ExperimentStep( step_id=step_id, step_name=step_name, status=ExperimentStatus.RUNNING, start_time=datetime.now(), command=command, metadata=metadata or {}, ) experiment.steps.append(step) self.db.save_experiment(experiment) self.log(experiment_id, "INFO", f"Step started: {step_name}") return step_idstart step
Args
experiment_id- Experiment ID
step_id- Step ID
step_name- Step name
command- execution command
metadata- metadata
Returns
Step ID
class ExperimentType (value, names=None, *, module=None, qualname=None, type=None, start=1)-
Expand source code
class ExperimentType(str, Enum): """Experiment type""" DATA_PREPARATION = "data_preparation" # Data preparation TRAINING = "training" # model training EVALUATION = "evaluation" # Evaluation VISUALIZATION = "visualization" # visualization INFERENCE = "inference" # inferenceExperiment type
Ancestors
- builtins.str
- enum.Enum
Class variables
var DATA_PREPARATIONvar EVALUATIONvar INFERENCEvar TRAININGvar VISUALIZATION
class ModelType (value, names=None, *, module=None, qualname=None, type=None, start=1)-
Expand source code
class ModelType(str, Enum): """Model type""" GPT2 = "gpt2" BERT = "bert" GPN = "gpn" OTHER = "other"Model type
Ancestors
- builtins.str
- enum.Enum
Class variables
var BERTvar GPNvar GPT2var OTHER