Source code for datafuzz.fuzz

# -*- coding: utf-8 -*-
"""
Fuzzer adds basic "dumb" fuzzing strategies to datafuzz.

It will apply a random set of noise and fuzz based on the
column type (or sometimes randomly).
"""
import random
from datafuzz.strategy import Strategy
from datafuzz.utils.fuzz_helpers import add_format, change_encoding, \
        to_bytes, insert_boms, nanify, bigints, hexify,  \
        sql, metachars, files, delimiter, emoji, date_to_str, \
        shift_time


[docs]class Fuzzer(Strategy): """ Fuzzer is used as a strategy to add "dumb" fuzzing methods (i.e. random bad values). These transformations are mainly based on column type. see also: `strategy.Strategy` """ def __init__(self, dataset, **kwargs): """ See `strategy.Strategy` Additional kwargs: columns (list of str): list of indexes or column names If not columns are given, a random set will be chosen. """ self.columns = kwargs.get('columns') super().__init__(dataset, **kwargs) if not self.columns: self.columns = self.dataset.sample(self.percentage, columns=True) self.columns = self.get_numeric_columns(self.columns)
[docs] def run_strategy(self): """ Apply fuzz methods to chosen columns. For now, this applies a mixture of random and column type based transformations. See `Fuzzer.fuzz_str`, `Fuzzer.fuzz_random` and `Fuzzer.fuzz_numeric` for full list of possible transformations. """ for column in self.columns: col_type = self.dataset.column_dtype(column) if random.randint(0, 100) < 20: fuzz = self.fuzz_random() elif 'datetime' in str(col_type) or '<M8[ns]' in str(col_type): fuzz = self.fuzz_date() elif col_type in [object, str]: fuzz = self.fuzz_str() elif 'int' in str(col_type) or 'float' in str(col_type): fuzz = self.fuzz_numeric() self.apply_func_to_column(fuzz, column)
[docs] def fuzz_str(self): """ Return random choice from string fuzz helpers. Possible transformations: - add_format: insert format strings - change_encoding: decode with possibly bad encoding - to_bytes: transform to bytes - insert_boms: insert utf-8 boms """ return random.choice([add_format, change_encoding, to_bytes, insert_boms])
[docs] def fuzz_date(self): """ Return random choice from date fuzz helpers. Possible transformations: - shift_time: shift the time by a random amount - date_to_str: transform to string """ return random.choice([shift_time, date_to_str])
[docs] def fuzz_numeric(self): """ Return a random choice from the numeric fuzz helpers. Possible transformations: - nanify: insert null values (sometimes strs) - bigints: return big magic numbers - hexify: return hex value """ return random.choice([nanify, bigints, hexify])
[docs] def fuzz_random(self): """ Return a random choice from the random fuzz helpers. Possible transformations: - sql: returns unkind sql - metachars: inserts metacharacters - files: returns filepaths or bash - delimiter: inserts multiple delimiters - emoji: inserts one random emoji """ return random.choice([sql, metachars, files, delimiter, emoji])