master

分支 (1)

管理

管理

master

investor
/
DataVisualiser.py

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mp
mp.use('TkAgg')
class DataVisualiser:
    def __init__(self):
        pass

    def prop_val_distribution(self, dataframe, suburb, target_currency):
        currency_dict = {
            'AUD': 1,
            'USD': 0.66,
            'INR': 54.25,
            'CNY': 4.72,
            'JPY': 93.87,
            'HKD': 5.12,
            'KRW': 860.92,
            'GBP': 0.51,
            'EUR': 0.60,
            'SGD': 0.88
        }
        if suburb not in dataframe.groupby(['suburb']).suburb.groups.keys():
            print('suburb <{}> is not exist, print all suburb'.format(suburb))
            data = dataframe.price.values
        else:
            data = dataframe[dataframe['suburb'] == suburb].price.values
        if currency_dict.get(target_currency):
            rate = currency_dict.get(target_currency)
            data = data * rate
        else:
            print('target_currency is not exist, use default value AUD')
        # filter data, remove nan value
        filterArr = []
        for elem in data:
            if str(elem) == 'nan':
                filterArr.append(False)
            else:
                filterArr.append(True)
        newData = data[filterArr]
        plt.hist(newData, bins=30, color='skyblue', alpha=0.8)

        plt.title('Histogram')
        plt.xlabel('value')
        plt.ylabel('frequency')

        plt.savefig('./histogram.jpg')
        plt.show()

    def sales_trend(self, dataframe):
        soldDate = dataframe.sold_date.values
        m = {}
        for v in soldDate:
            if str(v) != 'nan':
                year = v.split('/')[-1]
                if m.get(year):
                    m[year] = m[year] + 1
                else:
                    m[year] = 1
        x = []
        y = []
        for i in sorted(m):
            x.append(i)
            y.append(m[i])
        plt.title('sold line')
        plt.xlabel('year')
        plt.ylabel('sold number')
        plt.plot(x, y)
        plt.show()
        pass

if __name__ == '__main__':
    dataframe = pd.read_csv('./property_information.csv')
    demo = DataVisualiser()
    demo.prop_val_distribution(dataframe, 'all', 'CNY')