Deploy trained model BERT in aws Lambda

Question

I already train a BERT model in Python 3.9.16 and I save the .pth files in the models directory (my model ia about 417MB) and I also have my Dockerfile and requirements.txt as following:

# Dockerfile

```
FROM public.ecr.aws/lambda/python:3.9-x86_64
ENV TRANSFORMERS_CACHE=/tmp/huggingface_cache/
COPY requirements.txt .
#RUN pip install torch==1.10.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
RUN pip install torch==1.9.0
RUN pip install transformers==4.9.2
RUN pip install numpy==1.21.2
RUN pip install pandas==1.3.2
RUN pip install -r requirements.txt --target "${LAMBDA_TASK_ROOT}/dependencies"

COPY app.py ${LAMBDA_TASK_ROOT}

COPY models ${LAMBDA_TASK_ROOT}/dependencies/models

CMD [ "app.handler" ]
```

# requirements.txt

```
torch==1.9.0
transformers==4.9.2
numpy==1.21.2
pandas==1.3.2

```

# app.py

```
import torch
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig
#from keras.preprocessing.sequence import pad_sequences
#from keras_preprocessing.sequence import pad_sequences
#from tensorflow.keras.preprocessing.sequence import pad_sequences
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import numpy as np
import pandas as pd
from typing import Dict
import json

# Path to the directory containing the pre-trained model files
#model_dir = "./models/"
model_dir= "./dependencies/models/"

dict_path = f"{model_dir}/model_BERT_DAVID_v2.pth"
state_dict = torch.load(dict_path,map_location=torch.device('cpu'))
vocab_path=f"{model_dir}/vocab_BERT_DAVID_v2.pth"
vocab = torch.load(vocab_path,map_location=torch.device('cpu'))
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4, state_dict=state_dict)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True, vocab=vocab)

def handler(event):
    #payload = json.loads(event)
    payload=event # dict with the text
    text = payload['text']
    df = pd.DataFrame()
    df['TEXT']=[text]
    sentences = df['TEXT'].values
    sentences = ["[CLS] " + sentence + " [SEP]" for sentence in sentences]
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
    MAX_LEN = 256
    # Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
    input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
    # Pad our input tokens
    #input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
    # Pad our input tokens
    input_ids = [torch.tensor(seq)[:MAX_LEN].clone().detach() for seq in input_ids]
    input_ids = torch.nn.utils.rnn.pad_sequence(input_ids, batch_first=True, padding_value=0)
    input_ids = torch.nn.functional.pad(input_ids, (0, MAX_LEN - input_ids.shape[1]), value=0)[:, :MAX_LEN]
    input_ids = input_ids.type(torch.LongTensor)
    # Create attention masks
    attention_masks = []
    # Create a mask of 1s for each token followed by 0s for padding
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq];attention_masks.append(seq_mask)
    prediction_inputs = input_ids.to('cpu') # cuda
    prediction_masks = torch.tensor(attention_masks, device='cpu') # cuda
    batch_size = 32
    prediction_data = TensorDataset(prediction_inputs, prediction_masks)
    prediction_sampler = SequentialSampler(prediction_data)
    prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)
    # Prediction 
    # Put model in evaluation mode
    model.eval()
    # Tracking variables 
    predictions = []
    # Predict 
    for batch in prediction_dataloader:
            # Add batch to GPU
            #batch = tuple(t.to(device) for t in batch)
            batch = tuple(t for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask = batch
            # Telling the model not to compute or store gradients, saving memory and speeding up prediction
            with torch.no_grad():
                # Forward pass, calculate logit predictions
                logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)     
            # Move logits and labels to CPU
            logits = logits['logits'].detach().cpu().numpy()
            #label_ids = b_labels.to('cpu').numpy()
            # Store predictions and true labels
            predictions.append(logits)
            #true_labels.append(label_ids)
    key = {0:'VERY_NEGATIVE', 1:'SOMEWHAT_NEGATIVE', 2:'NEUTRAL',3:'POSITIVE'} 
    values=np.argmax(predictions[0], axis=1).flatten() # prediccion maxima de likehood
    converted_values = [key.get(val) for val in values] # valor del dict al que corresponde al optimo valor de likehood
    # Obtain the score for the intensity
    exponents = np.exp(predictions) # Operar sobre la softmax para sacar la prob
    softmax = exponents / np.sum(exponents)
    intensity={'VERY_NEGATIVE':softmax[0][0][0],'SOMEWHAT_NEGATIVE':softmax[0][0][1],'NEUTRAL':softmax[0][0][2],\
           'POSITIVE':softmax[0][0][3]}
    score=max(intensity.values())
    return converted_values[0]

```

Everything seems correct in local when i create the aws lambda function in the 3.9 version I got this error:

```
{
  "errorMessage": "invalid load key, 'v'.",
  "errorType": "UnpicklingError",
  "requestId": "",
  "stackTrace": [
    "  File "/var/lang/lib/python3.9/importlib/__init__.py", line 127, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
",
    "  File "", line 1030, in _gcd_import
",
    "  File "", line 1007, in _find_and_load
",
    "  File "", line 986, in _find_and_load_unlocked
",
    "  File "", line 680, in _load_unlocked
",
    "  File "", line 850, in exec_module
",
    "  File "", line 228, in _call_with_frames_removed
",
    "  File "/var/task/app.py", line 25, in 
    state_dict = torch.load(dict_path,map_location=torch.device('cpu'))
",
    "  File "/var/lang/lib/python3.9/site-packages/torch/serialization.py", line 608, in load
    return _legacy_load(opened_file, map_location, pickle_module, **pickle_load_args)
",
    "  File "/var/lang/lib/python3.9/site-packages/torch/serialization.py", line 777, in _legacy_load
    magic_number = pickle_module.load(f, **pickle_load_args)
"
  ]
}

```

I try multiple things but no solution so far anyone can help me

Answer

No idea if this is the issue or not, but your `model_dir` variable includes a trailing slash, and you also include a slash when you construct `dict_path` and `vocab_path`. Try removing one of them to see if it helps.

Deploy trained model BERT in aws Lambda

Dockerfile

requirements.txt

app.py

Relevanter Inhalt