This module contains custom models, custom splitters, etc... for both causal and MLM language modeling tasks. This includes things like training BERT from scratch or fine-tuning a particular pre-trained LM on your own corpus.
torch.cuda.set_device(1)
print(f'Using GPU #{torch.cuda.current_device()}: {torch.cuda.get_device_name()}')
wiki_path = untar_data(URLs.WIKITEXT_TINY)
train_df = pd.read_csv(wiki_path/'train.csv', header=None)
valid_df = pd.read_csv(wiki_path/'test.csv', header=None)
train_df['is_valid'] = False
valid_df['is_valid'] = True
df = pd.concat([train_df, valid_df])
print(len(df))
df.head()
task = HF_TASKS_AUTO.CausalLM
pretrained_model_name = "gpt2"
hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_model_name, task=task)
if (hf_tokenizer.pad_token is None): hf_tokenizer.pad_token = '[PAD]'
blocks = (
HF_Seq2SeqBlock(before_batch_tfm=HF_CausalLMBeforeBatchTransform(hf_arch, hf_config, hf_tokenizer, hf_model)),
noop
)
dblock = DataBlock(blocks=blocks, get_x=ColReader(0), splitter=ColSplitter(col='is_valid'))
dls = dblock.dataloaders(df, bs=2)
b = dls.one_batch()
b[0]['input_ids'].shape, b[0]['labels'].shape, b[1].shape
dls.show_batch(dataloaders=dls, max_n=2, input_trunc_at=500, target_trunc_at=500)
model = HF_BaseModelWrapper(hf_model)
learn = Learner(dls,
model,
opt_func=partial(Adam),
loss_func=HF_PreCalculatedLoss(),
cbs=[HF_BaseModelCallback],
splitter=hf_splitter)#.to_fp16()
learn.create_opt() # -> will create your layer groups based on your "splitter" function
learn.freeze()
# preds = learn.model(b[0])
# len(preds),preds[0], preds[1].shape
print(len(learn.opt.param_groups))
learn.lr_find(suggestions=True)
learn.fit_one_cycle(1, lr_max=3e-3)