Source code for datafuzz.output.core

# -*- coding: utf-8 -*-
"""
Output classes for transforming datasets into proper output.

Supported output: CSV, JSON, SQL, pandas, np 2D array, lists
"""
import json
from csv import DictWriter, writer
import dataset as dataset_db

from datafuzz.settings import HAS_NUMPY

if HAS_NUMPY:
    import numpy as np


class BaseOutput:
    """ Base class for output.

        Arguments:
            dataset (`datafuzz.DataSet` or
            `generators.DatasetGenerator`): dataset or generator to output

        Attributes:
            records     (obj): DataSet or generator records obj / list
            data_type   (str): DataSet or generator data_type string
            output      (str): DataSet or generator output string

        Keyword Arguments:
            filename    (str): DataSet or generator output string
    """

    def __init__(self, dataset, **kwargs):
        self.records = dataset.records
        self.data_type = dataset.data_type
        self.output = dataset.output
        if 'filename' in kwargs:
            self.output = kwargs.get('filename')


[docs]class CSVOutput(BaseOutput): """ CSV output for writing datasets to CSV file. see also: `datafuzz.output.BaseOutput` """
[docs] def to_csv(self): """ Write the CSVOutput to a csv file """ if self.data_type == 'pandas': self.records.to_csv(self.output) elif self.data_type == 'numpy': np.savetxt(self.output, self.records, delimiter=",") elif isinstance(self.records[0], dict): with open(self.output, 'w') as output: wrtr = DictWriter(output, fieldnames=self.records[0].keys()) wrtr.writeheader() wrtr.writerows(self.records) else: with open(self.output, 'w') as output: wrtr = writer(output) wrtr.writerows(self.records) return self.output
[docs]class JSONOutput(BaseOutput): """ JSON output for writing datasets to JSON file. see also: `datafuzz.output.BaseOutput` """
[docs] def to_json(self): """ Write the JSONOutput to a json file """ if self.data_type == 'pandas': self.records.T.to_json(self.output) elif self.data_type == 'numpy': json.dump(self.records.to_list(), open(self.output, 'w', encoding='utf-8'), indent=4) else: json.dump(self.records, open(self.output, 'w', encoding='utf-8'), indent=4) return self.output
[docs]class SQLOutput(BaseOutput): """ Database output for writing datasets to a table. see also: `datafuzz.output.BaseOutput` Extra parameters: db_uri (str): Database URI String table (str): Database table name """ def __init__(self, dataset, **kwargs): super().__init__(dataset, **kwargs) self.db_uri = kwargs.get('db_uri') self.table = kwargs.get('table')
[docs] def to_sql(self): """ Write the dataset records to a sql table """ if self.data_type == 'pandas': return self.records.to_sql(self.table, self.db_uri) with dataset_db.connect(self.db_uri) as db: table = db[self.table] return table.insert_many(self.records)