@tankle 2015-09-13T13:37:22.000000Z 字数 10088 阅读 2628

2015年网易游戏在线笔试

面试

2015年网易游戏在线笔试

题目4 : 好师父推算

时间限制:5000ms
单点时限:1000ms
内存限制:256MB
描述
师徒系统普遍存在于各类网络游戏中，对于游戏促进新手留存具有重要意义，现在采集到如下信息：

好友个数	聊天次数	是否是好师父
1	3	1
2	1	2

希望你用naïve bayes算法基于“好友个数”和“聊天次数”推算某玩家是好师父的概率，以方便产品优化匹配规则。

输入
输入数据由多行构成，每行中的数据用“\t”分隔。第1行是1~3个用“\t”分隔的数字，表示输出第几个问题的答案，第2行是属性名称，包括fchatnum，cchatnum和remark三个属性，分别代表好友个数、聊天次数和是否是好师父。从第3行开始为训练数据，含义与第2行的属性名称相对应。好友个数和聊天次数取值都是1~10的整数，是否是好师父取值是1~2的整数，其中2表示好师父。

输出
根据第1行输入数据指定的编号输出以下3个小题的答案，多个小题答案使用换行“\n”分割。
第1题：输出好师父的先验概率。
第2题：输出好师父群体中好友个数取值的概率分布，依次对应1~10的概率取值，零值也要输出，中间用逗号分隔。
第3题：输出给定fchatnum=9，cchatnum=9的玩家是好师父的概率。
输出结果统一四舍五入保留小数点后3位。
完整样例输入下载
总计1000条数据，请在这里下载。
样例输入

1       2       3
fchatnum    cchatnum    remark
1       2       1
3       3       1
1       1       1
6       9       2
3       7       2
4       6       2
4       2       2
3       8       2
1       1       1
8       4       2
……
样例输出
0.320
0.034,0.091,0.075,0.144,0.100,0.106,0.119,0.134,0.100,0.097
0.691

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>
#include <climits>
#include <cmath>
#include <map>
using namespace std;
void count(vector<int>& fchat, vector<int>& remark, vector<float>& num,int target){
    for(int i=0; i<remark.size(); i++){
        if(remark[i] == target){
            num[fchat[i]]++;
        }
    }
}
void output(int n, float goodrate, vector<float>& num, float threeres){
    switch (n) {
        case 1:
            printf("%.3f\n", goodrate);
            break;
        case 2:
            for(int i=1; i<num.size(); i++){
                if(i == num.size()-1)
                    printf("%.3f\n", num[i]);
                else
                    printf("%.3f,", num[i]);
            }
            break;
        case 3:
            printf("%.3f\n", threeres);
            break;
    }
}
int main(){
    vector<int> fchat;
    vector<int> cchat;
    vector<int> remark;
    int a, b,c;
    int one, two, three;
    scanf("%d %d %d", &one, &two, &three);
    map<string, vector<int>> str2vec;
    string s1, s2, s3;
    cin>>s1>>s2>>s3;
    while(cin>>a>>b>>c){
        // if(a == -1)
            // break;
        str2vec[s1].push_back(a);
        str2vec[s2].push_back(b);
        str2vec[s3].push_back(c);
    }
    fchat = str2vec["fchatnum"];
    cchat = str2vec["cchatnum"];
    remark = str2vec["remark"];
    int good = 0;
    for(int i=0; i<remark.size(); i++)
        if(remark[i] == 2)
            good++;
    float goodrate = good * 1.0 / remark.size();
    vector<float> num(11, 0);
    count(fchat, remark, num, 2);
    //好师傅的前提下，好友个数的概率分布
    for(int i=1; i<num.size(); i++){
        num[i] = num[i] * 1.0 / good;
    }
    vector<float> cnum(11, 0);
    count(cchat, remark, cnum, 2);
    for(int i=1; i<cnum.size(); i++) {
        cnum[i] = cnum[i] * 1.0 / good;
    }
    vector<float> num1(11,0);
    count(fchat, remark, num1, 1);
    vector<float> cnum1(11, 0);
    count(cchat, remark, cnum1, 1);
    float goodres = goodrate * num[9] * cnum[9];
    int notgood = remark.size() - good;
    float notres = (1 - goodrate) * num1[9] * cnum1[9] / (notgood * notgood);
    //NB公式 
    float threeres = goodres / (goodres + notres);
    output(one, goodrate, num, threeres);
    output(two, goodrate, num, threeres);
    output(three, goodrate, num, threeres);
    return 0;
}

题目3 : 游戏玩家分类

时间限制:25000ms
单点时限:5000ms
内存限制:256MB
描述
理查德•巴图博士通过对游戏中玩家固定的行为模式进行观察，于1996年提出了巴图模型，尝试把玩家的不同行为模式进行分类。他将游戏玩家分成了成就型、探索型、社交型和杀手型。该分类方式本质上从玩家在游戏中的需求出发，根据具体的行为表现对其进行分类。推断玩家所属类型，对于游戏用户研究，精准营销投放都有非常重要的意义，因此对不同玩家进行分类是一项重要研究工作。为了实现分类模型，通过收集玩家在游戏中的不同行为数据并进行归一化，可以得到玩家的特征向量以及已知类型玩家的标签，如：

副本参与次数	竞技场参与次数	任务完成次数	登陆频率	充值额度	玩家类型
0.8	0.5	0.6	0.9	0.2	A
0.4	0.8	0.1	0.2	0.1	B
0.9	0.1	0.5	0.6	0.9	C
0.5	0.2	0.1	0.3	0.0	D

(其中前五列数字为玩家的特征向量，最后一列字母是玩家类型，有A、B、C、D四种取值)

分类问题有多种解决算法，其中K最近邻(k-Nearest Neighbor，KNN)分类算法是最简单的机器学习算法之一，其思想是：如果一个样本在特征空间中的k个最相似的样本中的大多数属于某一个类别，则该样本也属于这个类别。请用该方法实现游戏玩家分类，距离度量函数采用欧氏距离。

输入
每个输入数据包含一组训练数据和一组测试数据。

第一行第一个数为KNN算法的k(k<=10000)值，第二个数为特征向量的长度L(L<=100)，第三个数M(M>k, M<=10000)为训练数据行数，第四个数N(N<=10000)为测试数据行数。之后是M行训练数据和N行测试数据。每行中数据使用空格分隔。

输出
对于每行测试数据，输出该玩家的类型，例如“A”。如果前K个相似类型中，出现数量最多的类型有重复，则一起输出，以ABCD升序排列，例如“AC”。

样例输入

3 5 16 2
0.19 0.04 0.06 0.22 0.11 A
0.28 0.42 0.38 0.39 0.44 B
0.71 0.61 0.54 0.52 0.54 C
0.98 0.82 0.92 0.98 0.97 D
0.05 0.03 0.15 0.01 0.11 A
0.33 0.29 0.33 0.47 0.27 B
0.72 0.52 0.61 0.71 0.68 C
0.78 0.86 0.91 1.0 0.76 D
0.01 0.17 0.14 0.15 0.2 A
0.44 0.36 0.32 0.32 0.35 B
0.67 0.65 0.57 0.58 0.52 C
0.87 0.92 0.8 0.83 0.77 D
0.01 0.11 0.14 0.12 0.07 A
0.33 0.43 0.43 0.45 0.38 B
0.57 0.54 0.75 0.7 0.64 C
0.9 0.94 0.83 0.96 0.77 D
0.29 0.29 0.42 0.36 0.27
0.56 0.67 0.71 0.66 0.7
样例输出
B
C

#include <iostream>
#include <vector>
#include <math.h>
#include <map>
#include <algorithm>
using namespace std;
struct sample {
    char label;
    double distance;
};
bool cmp1(sample a, sample b)
{
    if (a.distance == b.distance)
        return a.label < b.label;
    return a.distance < b.distance;
}
bool cmp2(pair<char, int> a, pair<char, int> b)
{
    if (a.second == b.second)
        return a.first < b.first;
    return a.second > b.second;
}
void readTrainData(vector<vector<double> > &trainData, vector<char> &trainLabel, int L, int M)
{
    for (int i = 0; i < M; ++i) {
        // 读入训练数据特征
        vector<double> lineData;
        double tmpData;
        for (int j = 0; j < L; ++j) {
            cin >> tmpData;
            lineData.push_back(tmpData);
        }
        trainData.push_back(lineData);
        // 读入训练数据类别
        char label;
        cin >> label;
        trainLabel.push_back(label);
    }
}
void readTestData(vector<vector<double> > &testData, int L, int N)
{
    for (int i = 0; i < N; ++i) {
        vector<double> lineData;
        double tmpData;
        for (int j = 0; j < L; ++j) {
            cin >> tmpData;
            lineData.push_back(tmpData);
        }
        testData.push_back(lineData);
    }
}
double calcDistance(vector<double> data1, vector<double> data2)
{
    int length = data1.size();
    double distance = 0.0;
    for (int i = 0; i < length; ++i)
        distance += pow(data1[i] - data2[i], 2);
    return sqrt(distance);
}
void KNN(vector<vector<double> > trainData, vector<vector<double> > testData,
         vector<char> trainLabel, vector<char> &testLabel, int k, int L, int M, int N)
{
    for (int i = 0; i < N; ++i) {
        // 计算每一个测试样本与所有训练样本的距离，并排序。
        vector<sample> distances;
        for (int j = 0; j < M; ++j) {
            sample tmpDistance;
            tmpDistance.distance = calcDistance(testData[i], trainData[j]);
            tmpDistance.label = trainLabel[j];
            distances.push_back(tmpDistance);
        }
        sort(distances.begin(), distances.end(), cmp1);
        //for (int j = 0; j < M; ++j)
        //    cout << distances[j].distance << " " << distances[j].label << endl;
        //cout << "-------" << endl;
        // 选择前k个样本的距离，投票选择类别。
        map<char, int> labelMap;
        for (int j = 0; j < k; ++j) {
            if (!labelMap[distances[j].label])
                labelMap[distances[j].label] = 0;
            labelMap[distances[j].label] += 1;
        }
        //for (map<char, int>::iterator it = labelMap.begin(); it != labelMap.end(); ++it)
        //    cout << (*it).first << " " << (*it).second << endl;
        //cout << "--------" << endl;
        for (int j = k; j < M; ++j) {
            if (distances[j].distance == distances[k-1].distance) {
                if (!labelMap[distances[j].label])
                    labelMap[distances[j].label] = 0;
                labelMap[distances[j].label] += 1;
            }
        }
        //for (map<char, int>::iterator it = labelMap.begin(); it != labelMap.end(); ++it)
        //    cout << (*it).first << " " << (*it).second << endl;
        //cout << "--------" << endl;
        vector<pair<char, int> > labelVec;
        for (map<char, int>::iterator it = labelMap.begin(); it != labelMap.end(); ++it)
            labelVec.push_back(make_pair((*it).first, (*it).second));
        sort(labelVec.begin(), labelVec.end(), cmp2);
        //for (int j = 0; j < labelVec.size(); ++j)
        //    cout << labelVec[j].first << " " << labelVec[j].second << endl;
        //cout << "--------" << endl;
        string res = "";
        for (int j = 0; j < labelVec.size(); ++j) {
            res += labelVec[j].first;
            if (j + 1 >= labelVec.size() || labelVec[j].second != labelVec[j+1].second)
                break;
        }
        if (i == 0)
            cout << res;
        else
            cout << "\n" << res;
    }
}
int main()
{
    int k, L, M, N;
    cin >> k >> L >> M >> N;
    vector<vector<double> > trainData;
    vector<vector<double> > testData;
    vector<char> trainLabel;
    vector<char> testLabel;
    readTrainData(trainData, trainLabel, L, M);
    readTestData(testData, L, N);
    KNN(trainData, testData, trainLabel, testLabel, k, L, M, N);
    return 0;
}

题目2 : 理解SQL

时间限制:5000ms
单点时限:1000ms
内存限制:256MB
描述
作为一名游戏行业的数据分析师，每天都会面对大量的游戏数据，对游戏数据进行统计分析必不可少。有效地使用SQL进行数据统计，可以帮助你在工作中获得事半功倍的效果。

请阅读以下SQL，理解SQL的语法含义，根据输入数据，编码输出该SQL的执行结果。

SELECT
    ctype,
    COUNT(DISTINCT pid) AS pcnt,
    SUM(cash) AS cash
FROM
(
    SELECT
        player_channel.pid AS pid,
        player_channel.ctype AS ctype,
        prepaid.cash AS cash
    FROM
    (
        SELECT
            players.pid AS pid,
            IF(channels.ctype IS NULL, -1, channels.ctype) AS ctype
        FROM
            channels
        RIGHT OUTER JOIN
            players
        ON
            channels.cid=players.cid
    ) player_channel
    LEFT OUTER JOIN
        prepaid
    ON
        player_channel.pid=prepaid.pid
) channel_prepaid
GROUP BY
    ctype
ORDER BY
    cash DESC, ctype ASC
;

输入
包含SQL语句中涉及的表数据，每个表的相关数据由以下内容组成：

第一行包含两个数据：tname(字符串类型，长度不超过10，保证为有效单词)和m(0<=m<=1000)，用空格分隔。其中tname表示该表的名字，m表示该表有m行数据；

第二行为3个字符串(保证为有效单词，长度不超过10)，用空格分隔，表示该表的字段名，每个表的字段顺序如下，

channels 表包含字段：cid，name，ctype，其中cid为主键

players 表包含字段：pid，name，cid，其中pid为主键

prepaids 表包含字段：id，pid，cash，其中id为主键

接下来m行，每行3个用空格分隔的数据，仅name字段的数据为字符串类型(长度不超过10，保证为有效单词)，其他均为非负整型。

输出
根据输入数据，输出SQL的执行结果。

样例输入
channels 3
cid name ctype
1 netease 0
2 iplay 0
3 appstore 1
players 7
pid name cid
1 Smith 1
2 Carter 2
3 Linda 2
4 Bush 3
5 Adam 4
6 Gates 5
7 Hill 1
prepaids 8
id pid cash
1 3 1000
2 4 3000
3 1 2000
4 6 1000
5 7 2000
6 2 2000
7 5 6000
8 3 2000
样例输出
0 4 9000
-1 2 7000
1 1 3000

题解

import re
while True:
    try:
        data = {"channels": {"cid":[], "name":[], "ctype":[]},
               "players": {"pid":[], "name":[], "cid":[]},
               "prepaids": {"id":[], "pid":[], "cash":[]}}
        for i in range(3):
            name, num = input().split()
            num = int(num)
            a, b, c = input().split()
            for j in range(num):
                n1, n2, n3 = input().split()
                data[name][a].append(n1)
                data[name][b].append(n2)
                data[name][c].append(n3)
        player_channels = {"pid":[], "ctype":[], "cash":[]}
        channels = data["channels"]
        players = data["players"]
        prepaids = data["prepaids"]
        for i in range(len(players["pid"])):
            pid = players["pid"][i]
            player_channels["pid"].append(pid)
            cid = players["cid"][i]
            if cid not in channels["cid"]:
                type = "-1"
            else:
                type = channels["ctype"][channels["cid"].index(cid)]
            player_channels["ctype"].append(type)
            flag = 0
            for idx in range(len(prepaids["pid"])):
                pidd = prepaids["pid"][idx]
                if pidd == pid:
                    if flag == 0:
                        player_channels["cash"].append(prepaids["cash"][idx])
                        flag  = 1
                    else:
                        player_channels["pid"].append(pid)
                        player_channels["ctype"].append(type)
                        player_channels["cash"].append(prepaids["cash"][idx])
        # print(player_channels)
        results = {}
        ctypes = player_channels["ctype"]
        for i in range(len(ctypes)):
            ctype = ctypes[i]
            if ctype not in results:
                results[ctype] = {"pid":set(),"cash":0}
            results[ctype]["pid"].add(player_channels["pid"][i])
            results[ctype]["cash"] += int(player_channels["cash"][i])
        # print(results)
        res = []
        for key, value in results.items():
            res.append((int(key), len(value["pid"]), value["cash"]))
        from operator import itemgetter, attrgetter
        res = sorted(res,key=itemgetter(2, 0),reverse=True)
        for item in res:
            print(item[0],item[1],item[2])
    except EOFError:
        break

题目1 : 虚拟游戏世界实体分析

时间限制:5000ms
单点时限:1000ms
内存限制:256MB
描述
虚拟游戏世界里面有很多实体，实体可能由很多子实体或者子属性构成。由于实体之间可能有非常之多的嵌套，查询某个实体或者属性属于第几层嵌套，以便将来对虚拟世界的修改和展示是一项待解决的问题，作为未来的虚拟世界分析员，你能用程序解决这个问题吗？

输入
输入数据可能由多组数据构成，每组数据由两行构成：

第一行是对虚拟世界的描述,实体或者属性由英文字母或者数字构成，子实体和子属性紧接着父实体嵌套在{}中，兄弟实体或者属性用“,”分隔。

第二行是要查询的属性或者实体，有且仅有一个。

注意数据输入可能很大。

输出
输出为查询的实体或者属性的层数；如果属性不存在，输出-1；如果有多个结果满足查询，请从小到大输出所有去重之后的结果，用”,”分隔

样例输入
Fruit{apple{shape,color},orange{taste,price}}
Fruit
Fruit{apple{shape,color},orange{taste,price}}
orange
Fruit{apple{shape,color},orange{color,price},color}
color
样例输出
1
2
2,3

题解

import re
while True:
    try:
        data = raw_input()
        sstr = raw_input()
        sstr = sstr.strip()
        res = re.findall(sstr, data)
        if len(res) == 0:
            print(-1)
        else:
            index = 1
            #将单词之间插入空格，方便后面的切分
            data = data.replace("{", " { ").replace("}", " } ").replace(",", " , ")
            data = data.split(" ")
            ress = set()
            level = 1
            for w in data:
                w = w.strip()
                if w == '{':
                    level += 1
                elif w == '}':
                    level -= 1
                elif w == sstr:
                    ress.add(level)
            ress = list(ress)
            sorted(ress)
            out = ",".join([str(i) for i in ress ])
            print(out)
    except EOFError:
        break