Module molcrawl.compounds.utils.config
Classes
class CompoundConfig (data_preparation: Organix13PreparationConfig = <factory>)-
Expand source code
@dataclass class CompoundConfig(Config): data_preparation: Organix13PreparationConfig = field(default_factory=Organix13PreparationConfig) def __post_init__(self): if not isinstance(self.data_preparation, Organix13PreparationConfig): # type: ignore[misc] self.data_preparation = Organix13PreparationConfig(**self.data_preparation) # type: ignore[arg-type]CompoundConfig(data_preparation: molcrawl.compounds.utils.config.Organix13PreparationConfig =
) Ancestors
Instance variables
var data_preparation : Organix13PreparationConfig
Inherited members
class Organix13PreparationConfig (organix13_dataset: str = 'learning_source_dummy/compounds/organix13',
save_path: str = <factory>,
vocab_path: str = 'assets/molecules/vocab.txt',
max_length: int = 256,
raw_data_path: str = 'learning_source_dummy/compounds')-
Expand source code
@dataclass class Organix13PreparationConfig: # Path to save the OrganiX13 dataset once is downloaded and processed by the script organix13_dataset: str = COMPOUNDS_DIR + "/organix13" # Path to save the processed and tokenized dataset save_path: str = field(default_factory=lambda: get_dataset_path("compounds", "organix13_tokenized.parquet")) # Path to the vocabulary vocab_path: str = "assets/molecules/vocab.txt" # Max length of the tokenized sequences max_length: int = 256 # Location to save raw unprocessed datasets raw_data_path: str = COMPOUNDS_DIROrganix13PreparationConfig(organix13_dataset: str = 'learning_source_dummy/compounds/organix13', save_path: str =
, vocab_path: str = 'assets/molecules/vocab.txt', max_length: int = 256, raw_data_path: str = 'learning_source_dummy/compounds') Instance variables
var max_length : intvar organix13_dataset : strvar raw_data_path : strvar save_path : strvar vocab_path : str