Module molcrawl.compounds.utils.config

Classes

class CompoundConfig (data_preparation: Organix13PreparationConfig = <factory>)
Expand source code
@dataclass
class CompoundConfig(Config):
    data_preparation: Organix13PreparationConfig = field(default_factory=Organix13PreparationConfig)

    def __post_init__(self):
        if not isinstance(self.data_preparation, Organix13PreparationConfig):  # type: ignore[misc]
            self.data_preparation = Organix13PreparationConfig(**self.data_preparation)  # type: ignore[arg-type]

CompoundConfig(data_preparation: molcrawl.compounds.utils.config.Organix13PreparationConfig = )

Ancestors

Instance variables

var data_preparationOrganix13PreparationConfig

Inherited members

class Organix13PreparationConfig (organix13_dataset: str = 'learning_source_dummy/compounds/organix13',
save_path: str = <factory>,
vocab_path: str = 'assets/molecules/vocab.txt',
max_length: int = 256,
raw_data_path: str = 'learning_source_dummy/compounds')
Expand source code
@dataclass
class Organix13PreparationConfig:
    # Path to save the OrganiX13 dataset once is downloaded and processed by the script
    organix13_dataset: str = COMPOUNDS_DIR + "/organix13"

    # Path to save the processed and tokenized dataset
    save_path: str = field(default_factory=lambda: get_dataset_path("compounds", "organix13_tokenized.parquet"))

    # Path to the vocabulary
    vocab_path: str = "assets/molecules/vocab.txt"

    # Max length of the tokenized sequences
    max_length: int = 256

    # Location to save raw unprocessed datasets
    raw_data_path: str = COMPOUNDS_DIR

Organix13PreparationConfig(organix13_dataset: str = 'learning_source_dummy/compounds/organix13', save_path: str = , vocab_path: str = 'assets/molecules/vocab.txt', max_length: int = 256, raw_data_path: str = 'learning_source_dummy/compounds')

Instance variables

var max_length : int
var organix13_dataset : str
var raw_data_path : str
var save_path : str
var vocab_path : str