Skip to content

modules

Badfile dataclass

The class that implements the badfiles detection engine.

Attributes:

Name Type Description
zip_rules Optional[str]

The path to yara detection rules for zip files (defaults to ./rules/zip_rules.yara)

tar_rules Optional[str]

The path to yara detection rules for tar files (defaults to ./rules/tar_rules.yara)

csv_rules Optional[str]

The path to yara detection rules for tar files (defaults to ./rules/csv_rules.yara)

is_badfile(self, f)

This function checks for various indicators of potentially malicious content including: - Mime Type confusion; - Zip files with high compression rates and; - Hands f to the proper yara detection rules.

Parameters:

Name Type Description Default
f PathLike

The path of the file to be analyzed

required

Returns:

Type Description
BadfileMsg

The BadfileMsg named tuple

Source code in badfiles/badfiles.py
def is_badfile(self, f: PathLike) -> BadfileMsg:
    """This function checks for various indicators of potentially malicious content including:
        - Mime Type confusion;
        - Zip files with high compression rates and;
        - Hands f to the proper yara detection rules.

    Args:
        f (PathLike): The path of the file to be analyzed

    Returns:
        BadfileMsg: The BadfileMsg named tuple
    """
    is_mime_confusion = self._mime_type_confusion(f)
    if is_mime_confusion[0] is False:
        return BadfileMsg(
            Classification.UNSAFE.value,
            f"Deceptive extension. File extension suggests {is_mime_confusion[2]} inspection shows {is_mime_confusion[1]}",
            pathlib.Path(f).name,
        )
    if is_mime_confusion[1] == "application/zip":
        if self._high_compression(f):
            return BadfileMsg(
                Classification.UNSAFE.value, "high compression rate", pathlib.Path(f).name
            )
    return self._rule_factory(f, is_mime_confusion[1])

BadfileMsg (tuple)

BadfileMsg(classification, message, file)

__getnewargs__(self) special

Return self as a plain tuple. Used by copy and pickle.

Source code in badfiles/badfiles.py
def __getnewargs__(self):
    'Return self as a plain tuple.  Used by copy and pickle.'
    return _tuple(self)

__new__(_cls, classification, message, file) special staticmethod

Create new instance of BadfileMsg(classification, message, file)

__repr__(self) special

Return a nicely formatted representation string

Source code in badfiles/badfiles.py
def __repr__(self):
    'Return a nicely formatted representation string'
    return self.__class__.__name__ + repr_fmt % self

Classification (Enum)

The results returned by the BadFile class

Attributes:

Name Type Description
SAFE str

Nothing malicious was detected.

UNSAFE str

Malicious content was detected.

NOT_IMPLEMENTED str

The file type has not been implemented in the detection engine.

UNKNOWN str

The file type cannot be determined.

find_dde(doc_dir)

Iterates through doc_dir and searches for a directory called externalLinks.

Parameters:

Name Type Description Default
doc_dir PathLike

The target directory to be analyzed.

required

Returns:

Type Description
bool

True if a folder called externalLinks otherwise returns False.

Source code in badfiles/utils.py
def find_dde(doc_dir: Path) -> bool:
    """Iterates through doc_dir and searches for a directory called externalLinks.

    Args:
        doc_dir (PathLike): The target directory to be analyzed.

    Returns:
        bool: True if a folder called externalLinks otherwise returns False.
    """
    p = Path(doc_dir)
    dde = False
    for i in p.glob("**/*"):
        if i.name.lower() == "externallinks":
            dde = True
    shutil.rmtree(doc_dir)
    return dde

process_tar(f, chunk=512)

A generator function that yields tar file headers.

Parameters:

Name Type Description Default
f PathLike

The path the the tar file.

required
chunk int

The size of the tarfile chunks. Defaults to 512.

512

Yields

Generator[bytes, None, None]: Tar file header(s).

Source code in badfiles/utils.py
def process_tar(f: str, chunk: int = 512) -> Generator[bytes, None, None]:
    """A generator function that yields tar file headers.

    Args:
        f (PathLike): The path the the tar file.
        chunk (int, optional): The size of the tarfile chunks. Defaults to 512.

    Yields:
        Generator[bytes, None, None]: Tar file header(s).
    """

    with open(f, "rb") as f:
        for fh in iter(partial(f.read, chunk), b""):
            try:
                data = fh
                # size = data.decode("ascii")[124:135]
                # print(size)
                if data.decode("ascii")[257:262] == "ustar" and data[125:135].isascii():
                    yield data
            except (UnicodeDecodeError, ValueError):
                pass

unzip_doc(doc, dir=PosixPath('/home/runner/work/badfiles/badfiles/tmp_doc'))

Unzips a document to enable the find_dde function.

Parameters:

Name Type Description Default
doc PathLike

The path to the document to unzip

required
dir str

The directory to unzip the document. Defaults to "./tmp_doc".

PosixPath('/home/runner/work/badfiles/badfiles/tmp_doc')

Returns:

Type Description
PathLike

The directory of the unzipped document.

Source code in badfiles/utils.py
def unzip_doc(doc: PathLike, dir=pathlib.Path(RULE_DIR).parent / "./tmp_doc") -> PathLike:
    """Unzips a document to enable the find_dde function.

    Args:
        doc (PathLike): The path to the document to unzip
        dir (str, optional): The directory to unzip the document. Defaults to "./tmp_doc".

    Returns:
        PathLike: The directory of the unzipped document.
    """
    z = zipfile.ZipFile(doc)
    z.extractall(path=dir)
    return dir