Module molcrawl.molecule_nat_lang.utils.config

Classes

class MoleculeNLConfig (data_preparation: MoleculeNLPreparationConfig = <factory>)
Expand source code
@dataclass
class MoleculeNLConfig(Config):
    data_preparation: MoleculeNLPreparationConfig = field(default_factory=MoleculeNLPreparationConfig)

    def __post_init__(self):
        if not isinstance(self.data_preparation, MoleculeNLPreparationConfig):  # type: ignore[misc]
            self.data_preparation = MoleculeNLPreparationConfig(**self.data_preparation)  # type: ignore[arg-type]

MoleculeNLConfig(data_preparation: molcrawl.molecule_nat_lang.utils.config.MoleculeNLPreparationConfig = )

Ancestors

Instance variables

var data_preparationMoleculeNLPreparationConfig

Inherited members

class MoleculeNLPreparationConfig (dataset: str = 'molecule_nat_lang/osunlp/SMolInstruct',
save_path: str = 'molecule_nat_lang/molecule_related_natural_language_tokenized.parquet',
num_workers: int = 12)
Expand source code
@dataclass
class MoleculeNLPreparationConfig(Config):
    # Path to save raw data
    dataset: str = "molecule_nat_lang/osunlp/SMolInstruct"

    # Path to save the processed and tokenized dataset
    save_path: str = "molecule_nat_lang/molecule_related_natural_language_tokenized.parquet"

    # Num of workers to use in the data preparation
    num_workers: int = 12

MoleculeNLPreparationConfig(dataset: str = 'molecule_nat_lang/osunlp/SMolInstruct', save_path: str = 'molecule_nat_lang/molecule_related_natural_language_tokenized.parquet', num_workers: int = 12)

Ancestors

Instance variables

var dataset : str
var num_workers : int
var save_path : str

Inherited members