BlockSci Setup

In [1]:
import blocksci
import matplotlib.pyplot as plt
import matplotlib.ticker
import collections
import pandas as pd
import numpy as np
%matplotlib notebook
In [2]:
# parser_data_directory should be set to the data-directory which the blocksci_parser output
chain = blocksci.Blockchain(parser_data_directory)

Measuring different types of address use

In [3]:
%time net_coins_per_block = chain.map_blocks(lambda block: block.net_address_type_value())
CPU times: user 1min 13s, sys: 836 ms, total: 1min 14s
Wall time: 1min 31s
In [4]:
df = pd.DataFrame(net_coins_per_block).fillna(0).cumsum()/1e8
df = chain.heights_to_dates(df)
df = df.rename(columns={t:blocksci.address_type.pretty_name(t) for t in df.columns})
ax = df.resample("W").mean().plot()
ax.set_ylim(ymin=0)
Out[4]:
(0, 13304531.030172516)

Visualizing Transaction Fees for an individual block

In [5]:
example_block_height = 465100
%time df = pd.DataFrame(chain[example_block_height].fees_per_byte, columns=["Satoshis per byte"])
ax = df.reset_index().plot.scatter(x="index", y="Satoshis per byte")
ax.set_ylim(0)
ax.set_xlim(0)
plt.show()
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 2.05 ms

Identifying Overlay Applications

In [6]:
%time txes = chain.script_type_txes(0, len(chain), blocksci.nulldata)
CPU times: user 1min 7s, sys: 1.46 s, total: 1min 9s
Wall time: 17.5 s
In [7]:
# This query takes a particulary long time since counterparty
# identification requires the attempted decryption of all op_return data
%time labels = [(tx.block.time, blocksci.label_application(tx)) for tx in txes]
CPU times: user 9min 17s, sys: 12.6 s, total: 9min 30s
Wall time: 9min 26s
In [8]:
df = pd.DataFrame(labels, columns=["date", "label"])
df = df.reset_index().groupby(["date", "label"]).count().unstack(level=-1).fillna(0)
df.columns = df.columns.droplevel()
important_columns = list(df[df.index > pd.to_datetime("1-1-2016")].sum().sort_values()[-10:].index)
important_columns = [x for x in important_columns if "Address" not in x]
ax = df[df.index > pd.to_datetime("1-1-2016")].cumsum().resample("w").mean()[important_columns].plot()
ax.set_ylim(0)
plt.tight_layout()

Average Fees per Block in 2017 (USD)

In [9]:
%time fees = [sum(block.fees) for block in chain.range('2017')]
times = [block.time for block in chain.range('2017')]
CPU times: user 22.5 s, sys: 184 ms, total: 22.7 s
Wall time: 22.7 s
In [10]:
converter = blocksci.CurrencyConverter()
In [11]:
df = pd.DataFrame({"Fee":fees}, index=times)
df = converter.satoshi_to_currency_df(df, chain)
ax = df.resample("W").mean().plot(legend=False)
ax.set_ylim(ymin=0)
ax.set_ylabel("Fee per block (USD)")
plt.tight_layout()

Transactions Paying over 1000 dollars in fees

In [12]:
%time high_fee_txes = chain.cpp.filter_tx("fee(tx) > 10000000", 0, len(chain))
CPU times: user 1min 28s, sys: 4 ms, total: 1min 28s
Wall time: 11.4 s
In [13]:
df = pd.DataFrame([(tx.block.height, tx.fee) for tx in high_fee_txes], columns=["height", "fee"])
df.index = df["height"]
df["fee"] = df.apply(lambda x: converter.satoshi_to_currency(x["fee"], chain[x["height"]].time), axis=1)
df = df[df["fee"] > 1000]
df = chain.heights_to_dates(df)
fig, ax = plt.subplots()
ax.plot_date(df.index, df["fee"], fmt="x")
ax.set_yscale("log")
formatter = matplotlib.ticker.ScalarFormatter(useOffset=False)
formatter.set_scientific(False)
ax.yaxis.set_major_formatter(formatter)
plt.tight_layout()