deepai/Lab1_2/Lab1&2_Transformers.ipynb

94 lines
2.0 KiB
Plaintext
Raw Normal View History

2024-02-20 23:59:53 +00:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "Cv-9Vzunb_tf"
},
"source": [
"# Import Necessary Library"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "4f-K54nHb-Uq"
},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"import torch.utils.data as data\n",
"import math\n",
"import os\n",
"import urllib.request\n",
"import pandas as pd\n",
"from functools import partial\n",
"from urllib.error import HTTPError\n",
"from datetime import datetime"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "XCv8_IzSdut4"
},
"outputs": [],
"source": [
"def scaled_dot_product(q, k, v, mask=None):\n",
" # implemented by the student, you can ignore the mask implementation currently\n",
" # just assignment all the mask is on\n",
"\n",
" shape_len = len(k.shape)\n",
"\n",
" transpose = k.mT\n",
" d = k.shape[-1]\n",
"\n",
" score_scale = torch.matmul(q, transpose)/math.sqrt(d)\n",
"\n",
" attention_weight = torch.nn.functional.softmax(score_scale, 1)\n",
"\n",
" output = torch.matmul(attention_weight, v)\n",
"\n",
" return output, attention_weight"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}