Module molcrawl.utils.image_manager

Image management utility

Utility for unified management of images resulting from dataset preparation and verification

Functions

def get_image_output_dir(model_type: str) ‑> str
Expand source code
def get_image_output_dir(model_type: str) -> str:
    """
    Get image output directory for specified model type

        Args:
    model_type (str): Model type ('protein_sequence', 'genome_sequence', 'compounds', 'rna', 'molecule_nat_lang')

        Returns:
    str: path of image output directory
    """
    learning_source_dir = check_learning_source_dir()
    image_dir = os.path.join(learning_source_dir, model_type, "image")
    os.makedirs(image_dir, exist_ok=True)
    return image_dir

Get image output directory for specified model type

Args:

model_type (str): Model type ('protein_sequence', 'genome_sequence', 'compounds', 'rna', 'molecule_nat_lang')

Returns:

str: path of image output directory

def get_image_path(model_type: str, filename: str) ‑> str
Expand source code
def get_image_path(model_type: str, filename: str) -> str:
    """
    Get image path for specified model type and file name

        Args:
    model_type (str): Model type
    filename (str): File name (including extension)

        Returns:
    str: complete image file path
    """
    image_dir = get_image_output_dir(model_type)
    return os.path.join(image_dir, filename)

Get image path for specified model type and file name

Args:

model_type (str): Model type filename (str): File name (including extension)

Returns:

str: complete image file path

def list_images_in_model_dir(model_type: str) ‑> list[ImageInfo]
Expand source code
def list_images_in_model_dir(model_type: str) -> list[ImageInfo]:
    """
    Get a list of image files in the specified model type directory

        Args:
    model_type (str): Model type

        Returns:
    list: List of image file information [{'filename': str, 'path': str, 'size': int, 'modified': float}]
    """
    image_dir = get_image_output_dir(model_type)

    if not os.path.exists(image_dir):
        return []

    images: list[ImageInfo] = []
    image_extensions = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg"}

    for filename in os.listdir(image_dir):
        if any(filename.lower().endswith(ext) for ext in image_extensions):
            filepath = os.path.join(image_dir, filename)
            try:
                stat = os.stat(filepath)
                images.append({"filename": filename, "path": filepath, "size": stat.st_size, "modified": stat.st_mtime})
            except OSError:
                continue

    # Sort by update date (newest first)
    images.sort(key=lambda x: float(x["modified"]), reverse=True)
    return images

Get a list of image files in the specified model type directory

Args:

model_type (str): Model type

Returns:

list: List of image file information [{'filename': str, 'path': str, 'size': int, 'modified': float}]

def migrate_legacy_images()
Expand source code
def migrate_legacy_images():
    """
    Migrate images from existing assets/img directory to new structure
    """
    legacy_dir = os.path.join("assets", "img")

    if not os.path.exists(legacy_dir):
        return

    # Image file mapping (determine model type from file name pattern)
    model_mappings = {
        "protein_sequence": ["protein_sequence_"],
        "genome_sequence": ["genome_sequence_"],
        "compounds": ["compounds_"],
        "rna": ["rna_"],
        "molecule_nat_lang": ["molecule_nat_lang_"],
    }

    for filename in os.listdir(legacy_dir):
        if not any(filename.lower().endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg"]):
            continue

        # Determine model type from file name
        model_type = None
        for mt, patterns in model_mappings.items():
            if any(pattern in filename for pattern in patterns):
                model_type = mt
                break

        if model_type:
            source_path = os.path.join(legacy_dir, filename)
            dest_path = get_image_path(model_type, filename)

            # Copy file (only if there is no existing file)
            if not os.path.exists(dest_path):
                try:
                    import shutil

                    shutil.copy2(source_path, dest_path)
                    print(f"Migrated: {source_path} -> {dest_path}")
                except Exception as e:
                    print(f"Failed to migrate {source_path}: {e}")

Migrate images from existing assets/img directory to new structure

Classes

class ImageInfo (*args, **kwargs)
Expand source code
class ImageInfo(TypedDict):
    filename: str
    path: str
    size: int
    modified: float

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)

Ancestors

  • builtins.dict

Class variables

var filename : str
var modified : float
var path : str
var size : int