Skip to content
Abarajithan G edited this page Nov 8, 2023 · 12 revisions

Enhancing HLS4ML: Accelerating DNNs on FPGA and ASIC for Scientific Computing

This wiki contains our project details, motivation, innovations, and architectural goals.

API Design (WIP)

from deepsocflow import Hardware, Bundle, QInput, BundleModel, QConvCore, QDenseCore, QAdd, QPool, Softmax, QLeakyReLu

'''
0. Specify Hardware
'''
hw = Hardware (
        bits_input       = 8,
        bits_weights     = 4,
        bits_sum         = 24,
        bits_bias        = 16,
        max_kernel_size  = (13, 13),
        max_channels_in  = 512,
        max_channels_out = 512,
        max_image_size   = (32,32),
     )
hw.export() # Generates: config_hw.svh, config_hw.tcl


'''
1. Build Model
'''
# model = Model.from_json('config_model.json')  # Alternatively, can build model from json

x = QInput(
        input_shape= (8,32,32,3),
        hw= hw,
        input_frac_bits= 4
    )

x = Bundle(
        core= QConvCore(
                filters= 32,
                kernel_size= (7,7),
                strides= (2,2),
                padding= 'same',
                weights_frac_bits= 4,
                bias_frac_bits= 8,
                activation= QLeakyReLu(
                                negative_slope=0.125,
                                frac_bits= 4,
                            ),
                ),
        pool = QPool(
                type= 'max',
                size= (3,3),
                strides= (1,1),
                padding= 'same',
                frac_bits= 4
                )
    )(x)

x = x_skip

x = Bundle(
        core= QConvCore(
                filters= 64,
                kernel_size= (3,3),
                weights_frac_bits= 4,
                bias_frac_bits= 8,
                activation= QLeakyReLu(
                                negative_slope=0,
                                frac_bits= 4,
                            ),
                ),
        pool= QAdd(x_skip), # Residual addition
        flatten= True,
    )(x)

x = Bundle(
        dense= QDenseCore(
                outputs= 10,
                weights_frac_bits= 4,
                bias_frac_bits= 8,
                activation= Softmax()
                ),
    )(x)

model = BundleModel(inputs=x_in, outputs=x)


'''
2. TRAIN
'''

print(model.summary(expand_nested=True))
model.compile( # Qkeras
  loss='categorical_crossentropy',
  optimizer=Adam(learning_rate=lr_schedule(0)), 
  metrics=['acc']
  )

model.fit(x_train, y_train,
    batch_size=BATCH_SIZE,
    epochs=NB_EPOCH,
    validation_data=(x_test, y_test),
    shuffle=True,
    callbacks=callbacks)


'''
Export 
'''
model.export() # Generates: savedmodel, config_model.json


'''
3. Export for inference

- Runs forward pass in float32, records intermediate tensors
- Runs forward pass in integer, comparing with float32 for zero error
- Generates 
      - config_firmware.h
      - weights.bin
      - expected.bin
'''

example_input = model.get_random_input()
model.export_inference(example_input) # -> config_firmware.h, weights.bin

'''
FPGA: Run vivado.tcl
ASIC: Set PDK paths, run syn.tcl & pnr.tcl
'''

System

Link to our IEEE SOC contest submission this link

Clone this wiki locally