INFO5558 Deep Neural Networks Sol.

Published on
/14 mins read/

会持续更新完本课程所有作业解析。


dnn_assgn_8

https://github.com/jeffheaton/app_deep_learning/blob/main/assignments/assignment_yourname_t81_558_class8.ipynb

使用特征工程预测物品成本,需达到指定精度要求。

使用两个CSV文件:训练文件:含已知成本与特征数据。提交文件:仅含特征数据,需预测成本

特征包括:高度、宽度、深度、形状、质量。目标变量:成本。

对原始特征进行工程处理:包括变换、编码和构造新特征,形状特征为分类变量需专门处理,使用训练数据建模,预测提交文件中的成本数据。

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
 
df_train = pd.read_csv("https://data.heatonresearch.com/data/t81-558/datasets/shapes-train.csv")
df_submit = pd.read_csv("https://data.heatonresearch.com/data/t81-558/datasets/shapes-test.csv")
 
def engineer_features(df):
    df_fe = df.copy()
 
    df_fe['volume'] = df_fe['height'] * df_fe['width'] * df_fe['depth']
    df_fe['surface_area'] = 2 * (df_fe['height'] * df_fe['width'] +
                                df_fe['height'] * df_fe['depth'] +
                                df_fe['width'] * df_fe['depth'])
 
    df_fe['aspect_ratio_hw'] = df_fe['height'] / df_fe['width']
    df_fe['aspect_ratio_hd'] = df_fe['height'] / df_fe['depth']
    df_fe['aspect_ratio_wd'] = df_fe['width'] / df_fe['depth']
 
    shape_dummies = pd.get_dummies(df_fe['shape'], prefix='shape')
    df_fe = pd.concat([df_fe, shape_dummies], axis=1)
 
    quality_map = {'low': 1, 'medium': 2, 'high': 3}
    df_fe['quality_encoded'] = df_fe['quality'].map(quality_map)
 
    df_fe['volume_quality'] = df_fe['volume'] * df_fe['quality_encoded']
    df_fe['surface_quality'] = df_fe['surface_area'] * df_fe['quality_encoded']
 
    return df_fe
 
df_train_fe = engineer_features(df_train)
df_submit_fe = engineer_features(df_submit)
 
feature_columns = ['height', 'width', 'depth', 'volume', 'surface_area',
                  'aspect_ratio_hw', 'aspect_ratio_hd', 'aspect_ratio_wd',
                  'quality_encoded', 'volume_quality', 'surface_quality'] + \
                 [col for col in df_train_fe.columns if col.startswith('shape_')]
 
X_train = df_train_fe[feature_columns]
y_train = df_train_fe['cost']
X_submit = df_submit_fe[feature_columns]
 
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
 
y_pred = model.predict(X_submit)
 
submit_df = pd.DataFrame({
    'cost': y_pred,
    'id': df_submit['id']
})
 
submit_df = submit_df[['cost', 'id']]
 
print("Submission dataframe info:")
print(submit_df.info())
print("\nFirst few rows:")
print(submit_df.head())
 
key = "..."
file = ".../assignment_FangYou_t81_558_class8.ipynb"
 
submit(source_file=file, data=[submit_df], key=key, no=8, course='t81-558')

dnn_assgn_7

https://github.com/jeffheaton/app_deep_learning/blob/main/assignments/assignment_yourname_t81_558_class7.ipynb

将任意图像转换为正方形画布,通过添加计算出的背景色实现。 方形化方法

  • 横向图像:在上下添加背景条
  • 纵向图像:在左右添加背景条
  • 原图在正方形画布中居中显示 背景色计算 取原图所有像素RGB值的平均值,结果取整作为填充色。
%matplotlib inline
 
import os
import requests
from io import BytesIO
import numpy as np
from PIL import Image
from matplotlib.pyplot import imshow
 
file = ".../assignment_FangYou_t81_558_class7.ipynb"
key = "..."
 
def load_image(url):
    response = requests.get(url)
    img = Image.open(BytesIO(response.content)).convert("RGB")
    print(f"Loaded image from {url} with size: {img.size}")
    return img
 
def make_square(img: Image.Image) -> Image.Image:
    width, height = img.size
    new_size = max(width, height)
    arr = np.array(img)
    avg_color = tuple([int(x) for x in arr.mean(axis=(0,1))])
    print(f"Original size: ({width},{height}), new size: ({new_size},{new_size}), average color: {avg_color}")
    new_img = Image.new("RGB", (new_size, new_size), color=avg_color)
    left = round((new_size - width) / 2)
    top = round((new_size - height) / 2)
    new_img.paste(img, (left, top))
    print(f"Image pasted at position: ({left},{top})")
    print(f"Corner pixel values: Top-left: {new_img.getpixel((0,0))}, Bottom-right: {new_img.getpixel((new_size-1,new_size-1))}")
    return new_img
 
url1 = "https://raw.githubusercontent.com/jeffheaton/t81_558_deep_learning/master/photos/hickory_home.jpg"
url2 = "https://raw.githubusercontent.com/jeffheaton/t81_558_deep_learning/master/photos/landscape.jpg"
 
img1 = load_image(url1)
img2 = load_image(url2)
 
submit_img1 = make_square(img1)
submit_img2 = make_square(img2)
 
print(f"Submit image 1 size: {submit_img1.size}")
print(f"Submit image 2 size: {submit_img2.size}")
 
imshow(submit_img1)
imshow(submit_img2)
 
submit(source_file=file, data=[submit_img1, submit_img2], key=key, no=7, course='t81-558')

dnn_assgn_6

https://github.com/jeffheaton/app_deep_learning/blob/main/assignments/assignment_yourname_t81_558_class6.ipynb

使用大型语言模型(LLM)从给定的100个英语句子中提取人名。

每个句子对应一个ID,输出结果应为两列:ID 和提取出的 人名(单个词)。

示例说明:

  • 输入句子:"Sarah found an old photograph in the attic."
  • 输出结果:ID=1, name=Sarah

注意: 需处理所有句子,并确保输出格式与示例一致(ID与提取的人名对应)。

import os
import pandas as pd
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
 
key = "..."
OPENAI_KEY = "..."
 
df = pd.read_csv("https://data.heatonresearch.com/data/t81-558/sentences.csv")
 
MODEL = "gpt-4o-mini"
TEMPERATURE = 0
 
llm = ChatOpenAI(
    api_key=OPENAI_KEY,
    model=MODEL,
    temperature=TEMPERATURE,
    n=1,
    max_tokens=256
)
 
prompt = ChatPromptTemplate.from_template(
    "Extract the NAME of the person from each of the following sentences. "
    "Only return the names in order, one per line.\n\n{sentences}"
)
 
def extract_names_batch(sentences, batch_size=10):
    results = []
    for i in range(0, len(sentences), batch_size):
        batch = sentences[i:i+batch_size]
        text_block = "\n".join(f"{j+1}. {s}" for j, s in enumerate(batch))
        chain = prompt | llm
        response = chain.invoke({"sentences": text_block})
        names = [line.strip() for line in response.content.split("\n") if line.strip()]
        results.extend(names)
    return results
 
df["name"] = extract_names_batch(df["sentence"].tolist(), batch_size=10)
df_submit = df[["id", "name"]]
print(df_submit.head(10))
 
file = ".../assignment_FangYou_t81_558_class6.ipynb"
submit(source_file=file, data=[df_submit], key=key, course="t81-558", no=6)

dnn_assgn_5

https://github.com/jeffheaton/app_deep_learning/blob/main/assignments/assignment_yourname_t81_558_class5.ipynb

本次作业用colab。

YOLOv8 目标检测模型对一系列街景图片进行分析,并按要求统计特定物体的数量。

环境搭建与模型准备:首先需要在 Google Colab 环境中安装 ultralytics 库,并挂载 Google Drive 以便后续的文件读取与提交。随后加载预训练的 yolov8n.pt 模型作为核心检测引擎。

图像识别与参数控制:程序需自动处理从 sidewalk1.jpgsidewalk10.jpg 的十张指定街景图片。在执行检测时,必须严格设定置信度阈值 conf=0.1 和交并比阈值 iou=0.8,以确保识别结果与课程标准答案保持一致。

数据解析与目标统计:需要将模型输出的检测框信息转化为 Pandas DataFrame 格式,从中提取出边界框坐标及类别名称。针对每张图片,需专门统计 person(人)car(车)bus(公交车) 这三类目标的具体数量。

结果汇总与自动化提交:将识别出的统计数据汇总成一个包含 imagebuscarperson 四列的最终表格,并保存为 CSV 文件。最后通过指定的 submit 函数,将该统计表与 Notebook 源代码一同发送至服务器进行评分。

import pandas as pd
from ultralytics import YOLO
 
# 1. 初始化模型
model = YOLO("yolov8n.pt")
 
# 2. 定义辅助函数:将 YOLO 结果转换为 DataFrame
def results_to_dataframe(results):
    lookup = results[0].names
    boxes = results[0].boxes
    data = {
        'class': [int(item) for item in boxes.cls],
        'name': [lookup[int(item)] for item in boxes.cls],
        'xmin': [int(box[0]) for box in boxes.xyxy],
        'ymin': [int(box[1]) for box in boxes.xyxy],
        'xmax': [int(box[2]) for box in boxes.xyxy],
        'ymax': [int(box[3]) for box in boxes.xyxy],
    }
    return pd.DataFrame(data)
 
# 3. 定义辅助函数:统计特定对象数量
def count_objects(results, image_id):
    df = results_to_dataframe(results)
    # 只筛选目标类别:人、车、公交车
    filtered = df[df['name'].isin(['person', 'car', 'bus'])]
    counts = filtered['name'].value_counts().to_dict()
 
    return {
        'image': image_id,
        'bus': counts.get('bus', 0),
        'car': counts.get('car', 0),
        'person': counts.get('person', 0),
    }
 
# 4. 执行批量检测处理 (sidewalk1.jpg 到 sidewalk10.jpg)
image_urls = [
    f"https://data.heatonresearch.com/data/t81-558/sidewalk/sidewalk{i}.jpg"
    for i in range(1, 11)
]
 
rows = []
print("正在处理图像...")
for i, url in enumerate(image_urls, start=1):
    # 使用指定的超参数以确保结果一致性
    results = model(url, conf=0.1, iou=0.8, verbose=False)
    row = count_objects(results, i)
    rows.append(row)
 
# 5. 生成并保存最终结果
final_df = pd.DataFrame(rows)
output_path = "/content/drive/MyDrive/assignment_FangYou_t81_558_class5.csv"
final_df.to_csv(output_path, index=False)
 
print("\n--- 检测统计结果 ---")
print(final_df)
print(f"\n✅ 结果已保存至: {output_path}")

dnn_assgn_4

https://github.com/jeffheaton/app_deep_learning/blob/main/assignments/assignment_yourname_t81_558_class4.ipynb

使用 crx.csv 数据集,并分别训练两个神经网络来预测并填补其中 a2 和 a14 列的缺失值;训练特征统一选自无缺失且非目标的 s3、a8、a9、a10、a11、a12、a13 和 a15 列,在填补后需提交一个包含全部原始列(a1 至 a16)的完整文件,即使 a14 列的平均值可能因预测存在微小差异。

import os
import pandas as pd
import numpy as np
from scipy.stats import zscore
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn import metrics
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import tqdm
 
key = ""
file = ".../assignment_FangYou_t81_558_class4.ipynb"
 
df = pd.read_csv(".../crx.csv", na_values=['?'])
 
df_submit = df.copy()
 
feature_columns = ['s3', 'a8', 'a9', 'a10', 'a11', 'a12', 'a13', 'a15']
 
def preprocess_data(df, target_col):
    data = df.copy()
 
    categorical_cols = data[feature_columns].select_dtypes(include=['object']).columns.tolist()
 
    label_encoders = {}
    for col in categorical_cols:
        le = LabelEncoder()
        data[col] = data[col].fillna('MISSING')
        data[col] = le.fit_transform(data[col].astype(str))
        label_encoders[col] = le
 
    numerical_cols = [col for col in feature_columns if col not in categorical_cols]
    scaler = StandardScaler()
    if numerical_cols:
        data[numerical_cols] = scaler.fit_transform(data[numerical_cols])
 
    return data, label_encoders, scaler, categorical_cols, numerical_cols
 
def train_neural_network(X_train, y_train, X_val, y_val, input_dim, epochs=100, batch_size=32):
    X_train_tensor = torch.FloatTensor(X_train.values)
    y_train_tensor = torch.FloatTensor(y_train.values).view(-1, 1)
    X_val_tensor = torch.FloatTensor(X_val.values)
    y_val_tensor = torch.FloatTensor(y_val.values).view(-1, 1)
 
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
 
    class NeuralNet(nn.Module):
        def __init__(self, input_dim):
            super(NeuralNet, self).__init__()
            self.fc1 = nn.Linear(input_dim, 64)
            self.fc2 = nn.Linear(64, 32)
            self.fc3 = nn.Linear(32, 16)
            self.fc4 = nn.Linear(16, 1)
            self.relu = nn.ReLU()
            self.dropout = nn.Dropout(0.2)
 
        def forward(self, x):
            x = self.relu(self.fc1(x))
            x = self.dropout(x)
            x = self.relu(self.fc2(x))
            x = self.dropout(x)
            x = self.relu(self.fc3(x))
            x = self.fc4(x)
            return x
 
    model = NeuralNet(input_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
 
    for epoch in range(epochs):
        model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
 
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val_tensor)
            val_loss = criterion(val_outputs, y_val_tensor)
 
        if (epoch + 1) % 20 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}')
 
    return model
 
def predict_missing_values(model, df_missing, df_processed, target_col, scaler, numerical_cols):
    model.eval()
    with torch.no_grad():
        missing_indices = df_missing[df_missing[target_col].isna()].index
 
        if len(missing_indices) == 0:
            return df_missing[target_col]
 
        X_pred = df_processed.loc[missing_indices, feature_columns]
        X_pred_tensor = torch.FloatTensor(X_pred.values)
 
        predictions = model(X_pred_tensor).numpy().flatten()
 
        result = df_missing[target_col].copy()
        result.loc[missing_indices] = predictions
 
        return result
 
print("Processing data for a2 prediction...")
df_processed_a2, le_dict_a2, scaler_a2, cat_cols_a2, num_cols_a2 = preprocess_data(df, 'a2')
 
a2_not_missing = df_processed_a2[df_processed_a2['a2'].notna()]
X_a2 = a2_not_missing[feature_columns]
y_a2 = a2_not_missing['a2']
 
X_train_a2, X_val_a2, y_train_a2, y_val_a2 = train_test_split(
    X_a2, y_a2, test_size=0.2, random_state=42
)
 
print(f"Training a2 model with {len(X_train_a2)} samples...")
model_a2 = train_neural_network(X_train_a2, y_train_a2, X_val_a2, y_val_a2,
                               input_dim=len(feature_columns), epochs=100)
 
print("Predicting missing a2 values...")
df_submit['a2'] = predict_missing_values(model_a2, df_submit, df_processed_a2, 'a2',
                                        scaler_a2, num_cols_a2)
 
print("\nProcessing data for a14 prediction...")
df_processed_a14, le_dict_a14, scaler_a14, cat_cols_a14, num_cols_a14 = preprocess_data(df, 'a14')
 
a14_not_missing = df_processed_a14[df_processed_a14['a14'].notna()]
X_a14 = a14_not_missing[feature_columns]
y_a14 = a14_not_missing['a14']
 
X_train_a14, X_val_a14, y_train_a14, y_val_a14 = train_test_split(
    X_a14, y_a14, test_size=0.2, random_state=42
)
 
print(f"Training a14 model with {len(X_train_a14)} samples...")
model_a14 = train_neural_network(X_train_a14, y_train_a14, X_val_a14, y_val_a14,
                                input_dim=len(feature_columns), epochs=100)
 
print("Predicting missing a14 values...")
df_submit['a14'] = predict_missing_values(model_a14, df_submit, df_processed_a14, 'a14',
                                         scaler_a14, num_cols_a14)
 
print("\nMissing values filled successfully!")
print(f"Original missing a2 values: {df['a2'].isna().sum()}")
print(f"Filled missing a2 values: {df_submit['a2'].isna().sum()}")
print(f"Original missing a14 values: {df['a14'].isna().sum()}")
print(f"Filled missing a14 values: {df_submit['a14'].isna().sum()}")
 
print("\nSubmission dataframe columns:", df_submit.columns.tolist())
print("Submission dataframe shape:", df_submit.shape)
 
 
submit(source_file=file, data=[df_submit], key=key, course="t81-558", no=4)

dnn_assgn_3

Assgn3要求:

https://github.com/jeffheaton/app_deep_learning/blob/main/assignments/assignment_yourname_t81_558_class3.ipynb

使用 crx 数据集(信用卡审批数据)构建一个二分类**神经网络**。对数据进行预处理(填补缺失值、编码分类变量),然后使用 **Early Stopping**(早停法)训练模型。最终,预测整个数据集(包括训练集和验证集)中每个样本属于 "+" 或 "-" 的概率。

import os
import pandas as pd
from scipy.stats import zscore
import numpy as np
import torch
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
from sklearn.impute import SimpleImputer
 
file = ".../assignment_FangYou_t81_558_class3.ipynb"
df = pd.read_csv(".../crx.csv", na_values=['?'])
 
key = ""
 
X = df.drop('a16', axis=1)
y = df['a16']
 
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(include=['number']).columns.tolist()
 
imputer = SimpleImputer(strategy='median')
X_numerical_imputed = imputer.fit_transform(X[numerical_cols])
X[numerical_cols] = X_numerical_imputed
 
for col in categorical_cols:
    if X[col].isnull().any():
        mode_val = X[col].mode()[0]
        X.loc[X[col].isnull(), col] = mode_val
 
X_encoded = pd.get_dummies(X, columns=categorical_cols, drop_first=True)
 
le = LabelEncoder()
y_encoded = le.fit_transform(y)
 
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)
 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
 
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.LongTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.LongTensor(y_test)
 
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
 
class CreditNet(nn.Module):
    def __init__(self, input_size):
        super(CreditNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
 
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x
 
input_size = X_train_scaled.shape[1]
model = CreditNet(input_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
 
patience = 10
best_loss = float('inf')
counter = 0
epochs = 100
best_model_state = None
 
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
 
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_test_tensor)
        val_loss = criterion(val_outputs, y_test_tensor).item()
 
    if val_loss < best_loss:
        best_loss = val_loss
        counter = 0
        best_model_state = model.state_dict().copy()
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break
 
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')
 
if best_model_state is not None:
    model.load_state_dict(best_model_state)
 
X_full = X_encoded.copy()
X_full_scaled = scaler.transform(X_full)
X_full_tensor = torch.FloatTensor(X_full_scaled)
 
model.eval()
with torch.no_grad():
    outputs = model(X_full_tensor)
    probabilities = torch.softmax(outputs, dim=1).numpy()
 
df_submit = pd.DataFrame({
    '+': probabilities[:, 1],
    '-': probabilities[:, 0]
})
 
print("Submission data shape:", df_submit.shape)
print("First few predictions:")
print(df_submit.head())
 
submit(source_file=file, data=[df_submit], key=key, course="t81-558", no=3)

dnn_assgn_2

Assgn2要求:https://github.com/jeffheaton/app_deep_learning/blob/main/assignments/assignment_yourname_t81_558_class2.ipynb

加载指定的 CSV 文件并进行数据清洗或转换。

import os
import pandas as pd
from scipy.stats import zscore
 
key = ""
file='./assignment_FangYou_t81_558_class2.ipynb'
 
df = pd.read_csv(".../reg-36-data.csv")
print(len(df))
 
# 1.
df['ratio'] = df['max'] / df['number']
 
# 2.
cat2_dummies = pd.get_dummies(df['cat2'], prefix='cat2')
df = pd.concat([df, cat2_dummies], axis=1)
df.drop('cat2', axis=1, inplace=True)
 
# 3.
item_dummies = pd.get_dummies(df['item'], prefix='item')
df = pd.concat([df, item_dummies], axis=1)
df.drop('item', axis=1, inplace=True)
 
# 4.
length_median = df['length'].median()
df = df.assign(length=df['length'].fillna(length_median))
 
# 5.
height_median = df['height'].median()
df = df.assign(height=df['height'].fillna(height_median))
df['height'] = zscore(df['height'])
 
# 6.
columns_to_keep = ['height', 'max', 'number', 'length', 'ratio']
cat2_columns = [col for col in df.columns if col.startswith('cat2_')]
item_columns = [col for col in df.columns if col.startswith('item_')]
 
all_columns_to_keep = columns_to_keep + cat2_columns + item_columns
df = df[all_columns_to_keep]
 
required_cat2_columns = ['cat2_CA-0', 'cat2_CA-1', 'cat2_CA-10', 'cat2_CA-11', 'cat2_CA-12',
                         'cat2_CA-13', 'cat2_CA-14', 'cat2_CA-15', 'cat2_CA-16', 'cat2_CA-17',
                         'cat2_CA-18', 'cat2_CA-19', 'cat2_CA-1A', 'cat2_CA-1B', 'cat2_CA-1C',
                         'cat2_CA-1D', 'cat2_CA-1E', 'cat2_CA-1F', 'cat2_CA-2', 'cat2_CA-20',
                         'cat2_CA-21', 'cat2_CA-22', 'cat2_CA-23', 'cat2_CA-24', 'cat2_CA-25',
                         'cat2_CA-26', 'cat2_CA-27', 'cat2_CA-3', 'cat2_CA-4', 'cat2_CA-5',
                         'cat2_CA-6', 'cat2_CA-7', 'cat2_CA-8', 'cat2_CA-9', 'cat2_CA-A',
                         'cat2_CA-B', 'cat2_CA-C', 'cat2_CA-D', 'cat2_CA-E', 'cat2_CA-F']
 
required_item_columns = ['item_IT-0', 'item_IT-1', 'item_IT-10', 'item_IT-11', 'item_IT-12',
                         'item_IT-13', 'item_IT-14', 'item_IT-15', 'item_IT-16', 'item_IT-17',
                         'item_IT-18', 'item_IT-19', 'item_IT-1A', 'item_IT-1B', 'item_IT-1C',
                         'item_IT-1D', 'item_IT-1E', 'item_IT-2', 'item_IT-3', 'item_IT-4',
                         'item_IT-5', 'item_IT-6', 'item_IT-7', 'item_IT-8', 'item_IT-9',
                         'item_IT-A', 'item_IT-B', 'item_IT-C', 'item_IT-D', 'item_IT-E',
                         'item_IT-F']
 
for col in required_cat2_columns:
    if col not in df.columns:
        df[col] = 0
 
for col in required_item_columns:
    if col not in df.columns:
        df[col] = 0
 
final_columns = ['height', 'max', 'number', 'length', 'ratio'] + required_cat2_columns + required_item_columns
df = df[final_columns]
 
df.to_csv('2.csv',index=False)
submit(source_file=file,data=[df],key=key,course='t81-558',no=2)

dnn_assgn_1

Assgn1要求:https://github.com/jeffheaton/app_deep_learning/blob/main/assignments/assignment_yourname_t81_558_class1.ipynb

熟悉提交函数与提交作业模式。

我不习惯用colab,基本都用jupyter notebook完成。

所有作业先运行Assignment Submit Function,随后运行如下代码。

key = "..." //替换成自己的,密钥有问题给Jeff发邮件
 
file = '.../assignment_FangYou_t81_558_class1.ipynb' //替换成自己的文件路径
 
//主函数
df = pd.DataFrame({'a' : [0, 0, 1, 1], 'b' : [0, 1, 0, 1], 'c' : [0, 1, 1, 0]})
 
//调用提交函数
submit(source_file=file, data=[df], key=key, course='t81-558', no=1)
 
//每次提交作业时,建议先注释掉submit函数,本地测试结果符合预期后再提交。