2023年安徽省大数据与人工智能应用竞赛 人工智能赛道(网络赛-本科组)-答题卡

2023-11-04 21:09:17 竞赛 编辑:黎为乐

2023年安徽省大数据与人工智能应用竞赛

人工智能赛道(网络赛-本科组)-答题卡

2023年9

注:请将每一道题的所写源代码与运行结果截图保存到答题卡对应位置;

将答题卡以队伍编码.docx形式命名在系统进行提交。

第一部分:人工智能基础环境搭建部署(10分)

注:任务1与任务2任选一题完成即可。

o任务1

o任务2

第二部分:样本数据预处理(30分)

注:任务1与任务2都需要完成。

o任务1(15分)。

import pandas as pd

data = pd.read_csv("task2_1.csv")

data = data.dropna(subset=['WORK_PROVINCE'])

non_alphanumeric_count = data[data['WORK_PROVINCE'].str.isalnum()]['WORK_PROVINCE'].count()

print("非字母组合代码的样本个数:", non_alphanumeric_count)

top_20_avg = data.nlargest(20, 'AVG_FLIGHT_COUNT')['AVG_FLIGHT_COUNT'].mean()

print("AVG_FLIGHT_COUNT列最大的20个数值的平均值:", top_20_avg)

o任务2(15分)。

import cv2  

import numpy as np  

img = cv2.imread('data/task2/task2_2.jpg')  

img = cv2.resize(img, (650, 360))  

img1 = img[:, 325:]  

img2 = cv2.cvtColor(img1, cv2.COLOR_BGR2YCrCb)  

img3 = np.zeros_like(img2)  

for i in range(img2.shape[0]):  

    for j in range(img2.shape[1]):  

        if 133 <= img2[i, j][1] <= 173 and 77 <= img2[i, j][2] <= 127:  

            img3[i, j] = [255, 255, 255]  

cv2.imshow('img1', img1)  

cv2.imshow('img3', img3)  

cv2.waitKey(0)  

cv2.destroyAllWindows()

第三部分:传统机器学习算法设计及应用(20分)

import os

import cv2

import numpy as np

from sklearn.cluster import KMeans

from sklearn.svm import SVC

from sklearn.model_selection import train_test_split

from sklearn.metrics import precision_score, recall_score, f1_score

from sklearn.preprocessing import StandardScaler

def extract_sift_features(images, num_clusters):

    sift = cv2.SIFT_create()

    descriptors = []

    for image in images:

        kp, desc = sift.detectAndCompute(image, None)

        if desc is not None:

            descriptors.extend(desc)

    descriptors = np.array(descriptors)

    kmeans = KMeans(n_clusters=num_clusters)

    kmeans.fit(descriptors)

    return kmeans

data_dir = "data/task3"

categories = ["cat", "dog"]

num_clusters = 100  

images = []

labels = []

for category_idx, category in enumerate(categories):

    category_dir = os.path.join(data_dir, category)

    for filename in os.listdir(category_dir):

        if filename.endswith(".jpg"):

            image_path = os.path.join(category_dir, filename)

            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

            images.append(image)

            labels.append(category_idx)

images = np.array(images)

labels = np.array(labels)

kmeans = extract_sift_features(images, num_clusters)

def image_to_bow(image, kmeans_model):

    kp, desc = sift.detectAndCompute(image, None)

    if desc is not None:

        bow = kmeans_model.predict(desc)

        bow_hist = np.bincount(bow, minlength=num_clusters)

        return bow_hist

    else:

        return np.zeros(num_clusters)

X = []

for image in images:

    bow_vector = image_to_bow(image, kmeans)

    X.append(bow_vector)

X = np.array(X)

X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.1, random_state=42)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

svm_model = SVC(kernel='linear')

svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

precision = precision_score(y_test, y_pred, average='weighted')

recall = recall_score(y_test, y_pred, average='weighted')

f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Precision: {precision:.2f}")

print(f"Recall: {recall:.2f}")

print(f"F1-score: {f1:.2f}")


第四部分:深度学习算法设计及应用(20分)

import numpy as np  

from tensorflow.keras.models import Sequential  

from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten  

from tensorflow.keras.preprocessing.image import ImageDataGenerator  

from tensorflow.keras.optimizers import SGD

import cv2

# 读取训练集和测试集的路径以及标签  

train_path = np.loadtxt('trainlabels.txt', delimiter=' ',encoding='gbk',dtype='str')  

test_path = np.loadtxt('testlabels.txt', delimiter=' ',encoding='gbk',dtype='str')  

train_labels = train_path[:, 1:]  

train_path = train_path[:, 0]  

test_labels = test_path[:, 1:]  

test_path = test_path[:, 0]  

  

  

def preprocess_data(img_path, labels):  

    img = cv2.imread(img_path)  

    img = cv2.resize(img,(64, 64))   

    img = np.array(img)  

    img = img / 255.0  # 归一化  

    return img, labels  

  

train_data, train_labels = zip(*[preprocess_data(train_path[i], train_labels[i]) for i in range(len(train_path))])  

test_data, test_labels = zip(*[preprocess_data(test_path[i], test_labels[i]) for i in range(len(test_path))])  

  

train_data = np.array(train_data)  

train_labels = np.array(train_labels,dtype='int')  

test_data = np.array(test_data)  

test_labels = np.array(test_labels,dtype='int')  

  

train_data = train_data.reshape((train_data.shape[0], 64, 64, 3))  

test_data = test_data.reshape((test_data.shape[0], 64, 64, 3))  

  

model = Sequential()  

model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(64, 64, 3)))  

model.add(MaxPooling2D(pool_size=(2, 2)))  

model.add(Conv2D(64, (3, 3), activation='relu'))  

model.add(MaxPooling2D(pool_size=(2, 2)))  

model.add(Flatten())  

model.add(Dense(128, activation='relu'))  

model.add(Dense(7, activation='sigmoid'))  

  

model.compile(loss='mean_squared_error', optimizer=SGD(learning_rate=0.001,decay=0.0001), metrics=['accuracy'])  

  

model.fit(train_data, train_labels, epochs=100, batch_size=32)  

   

loss, accuracy = model.evaluate(test_data, test_labels)  

print('Test accuracy: %.2f' % (accuracy*100))

第五部分:人工智能技术综合应用(20分)

import jieba

import jieba

from jieba.analyse import extract_tags

import codecs

with codecs.open('task5data.txt', 'r', encoding='utf-8') as file:

    text = file.read()

keywords = extract_tags(text, topK=30, withWeight=True)

keyword_dict = {key: weight for key, weight in keywords}

sentences = text.split('。')

sentence_importance = []

for sentence in sentences:

    words = jieba.lcut(sentence)

    importance = sum(keyword_dict.get(word, 0) for word in words)

    sentence_importance.append((sentence, importance))

sentences = sorted(sentence_importance, key=lambda x: x[1], reverse=True)

print("summary:")

print('\n'.join([i[0 ]for i in sorted_sentences[:3]]))

©AHIEC人工智能工作室 2021

地址:安徽省合肥市包河区梁园路安徽工业经济职业技术学院现代科教中心101室

创作者信息:

皖ICP备20011723号-2