## 数据清洗
### 准备工作
#### 查看数据格式
```json
{"events":"1473367236143\u00010\u0001connectByQRCode\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000027\u0001\n1473367261933\u00010\u0001AppLaunch\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000028\u0001\n1473367280349\u00010\u0001connectByQRCode\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000029\u0001\n1473367331326\u00010\u0001AppLaunch\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000030\u0001\n1473367353310\u00010\u0001connectByQRCode\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000031\u0001\n1473367387087\u00010\u0001AppLaunch\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000032\u0001\n1473367402167\u00010\u0001connectByQRCode\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000033\u0001\n1473367451994\u00010\u0001AppLaunch\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000034\u0001\n1473367474316\u00010\u0001connectByQRCode\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000035\u0001\n1473367564181\u00010\u0001AppLaunch\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000036\u0001\n1473367589527\u00010\u0001connectByQRCode\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000037\u0001\n1473367610310\u00010\u0001AppLaunch\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000038\u0001\n1473367624647\u00010\u0001connectByQRCode\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000039\u0001\n1473368004298\u00010\u0001AppLaunch\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000040\u0001\n1473368017851\u00010\u0001connectByQRCode\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000041\u0001\n1473369599067\u00010\u0001AppLaunch\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000042\u0001\n1473369622274\u00010\u0001connectByQRCode\u0001\u00010\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u0001\u00011609072239570000043\u0001\n","header":{"cid_sn":"1501004207EE98AA","mobile_data_type":"","os_ver":"22","mac":"1c:77:f6:78:f5:75","resolution":"1080x1920","commit_time":"1502686418952","sdk_ver":"103","device_id_type":"mac","city":"江门市","device_model":"HUAWEI VNS-AL00","android_id":"867830021735040","carrier":"中国xx","promotion_channel":"1","app_ver_name":"1.4","imei":"867830021735040","app_ver_code":"4010104","pid":"pid","net_type":"3","device_id":"m.1c:77:f6:78:f5:75","app_device_id":"m.1c:77:f6:78:f5:75","release_channel":"1009","country":"CN","time_zone":"28800000","os_name":"android","manufacture":"OPPO","commit_id":"fde7ee2e48494b24bf3599771d7c2a78","account":"none","app_token":"XIAONIU_A","app_id":"com.appid.xiaoniu","language":"zh","build_num":"YVF6R16303000403"}}
```
#### Json 数据生成实体类
```
https://www.json.cn/
```
<img src="E:\md文件图片\屏幕截图 2021-04-10 132700.png" style="zoom:60%;" />
### 编写Map-reduce程序
#### 引入依赖
```java
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>map-reduce-demo</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>